.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.result_types._base Namespace Reference

Classes

class  LegacyResult
class  MainResult
class  Result

Functions

 _normalize_url_fields ("Result | LegacyResult" result)
 _normalize_text_fields ("MainResult | LegacyResult" result)
 _filter_urls ("Result | LegacyResult" result, "Callable[[Result | LegacyResult, str, str], str | bool]" filter_func)
 _normalize_date_fields ("MainResult | LegacyResult" result)

Variables

list __all__ = ["Result"]
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 UNKNOWN = object()

Detailed Description

Basic types for the typification of results.

- :py:obj:`Result` base class
- :py:obj:`LegacyResult` for internal use only

----

.. autoclass:: Result
   :members:

.. _LegacyResult:

.. autoclass:: LegacyResult
   :members:

Function Documentation

◆ _filter_urls()

searx.result_types._base._filter_urls ( "Result | LegacyResult" result,
"Callable[[Result | LegacyResult, str, str], str | bool]" filter_func )
protected

Definition at line 112 of file _base.py.

114):
115 # pylint: disable=too-many-branches, too-many-statements
116
117 # As soon we need LegacyResult not any longer, we can move this function to
118 # method Result.
119
120 url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
121
122 url_src: str
123
124 for field_name in url_fields:
125 url_src = getattr(result, field_name, "")
126 if not url_src:
127 continue
128
129 new_url = filter_func(result, field_name, url_src)
130 # log.debug("filter_urls: filter_func(result, %s) '%s' -> '%s'", field_name, field_value, new_url)
131 if isinstance(new_url, bool):
132 if new_url:
133 # log.debug("filter_urls: unchanged field %s URL %s", field_name, field_value)
134 continue
135 log.debug("filter_urls: drop field %s URL %s", field_name, url_src)
136 new_url = None
137 else:
138 log.debug("filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
139
140 setattr(result, field_name, new_url)
141 if field_name == "url":
142 # sync parsed_url with new_url
143 if not new_url:
144 result.parsed_url = None
145 elif isinstance(new_url, str):
146 result.parsed_url = urllib.parse.urlparse(new_url)
147
148 # "urls": are from infobox
149 #
150 # As soon we have InfoboxResult, we can move this function to method
151 # InfoboxResult.normalize_result_fields
152
153 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
154
155 if infobox_urls:
156 # log.debug("filter_urls: infobox_urls .. %s", infobox_urls)
157 new_infobox_urls: list[dict[str, str]] = []
158
159 for item in infobox_urls:
160 url_src = item.get("url", "")
161 if not url_src:
162 new_infobox_urls.append(item)
163 continue
164
165 new_url = filter_func(result, "infobox_urls", url_src)
166 if isinstance(new_url, bool):
167 if new_url:
168 new_infobox_urls.append(item)
169 # log.debug("filter_urls: leave URL in field 'urls' ('infobox_urls') unchanged -> %s", _url)
170 continue
171 log.debug("filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
172 new_url = None
173 if new_url:
174 log.debug("filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
175 item["url"] = new_url
176 new_infobox_urls.append(item)
177
178 setattr(result, "urls", new_infobox_urls)
179
180 # "attributes": are from infobox
181 #
182 # The infobox has additional subsections for attributes, urls and relatedTopics:
183
184 infobox_attributes: list[dict[str, t.Any]] = getattr(result, "attributes", [])
185
186 if infobox_attributes:
187 # log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes)
188 new_infobox_attributes: list[dict[str, str | list[dict[str, str]]]] = []
189
190 for item in infobox_attributes:
191 image: dict[str, str] = item.get("image", {})
192 url_src = image.get("src", "")
193 if not url_src:
194 new_infobox_attributes.append(item)
195 continue
196
197 new_url = filter_func(result, "infobox_attributes", url_src)
198 if isinstance(new_url, bool):
199 if new_url:
200 new_infobox_attributes.append(item)
201 # log.debug("filter_urls: leave URL in field 'image.src' unchanged -> %s", url_src)
202 continue
203 log.debug("filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
204 new_url = None
205
206 if new_url:
207 log.debug(
208 "filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
209 url_src,
210 new_url,
211 )
212 item["image"]["src"] = new_url
213 new_infobox_attributes.append(item)
214
215 setattr(result, "attributes", new_infobox_attributes)
216
217 result.normalize_result_fields()
218
219

Referenced by searx.result_types._base.LegacyResult.filter_urls().

Here is the caller graph for this function:

◆ _normalize_date_fields()

searx.result_types._base._normalize_date_fields ( "MainResult | LegacyResult" result)
protected

Definition at line 220 of file _base.py.

220def _normalize_date_fields(result: "MainResult | LegacyResult"):
221
222 if result.publishedDate: # do not try to get a date from an empty string or a None type
223 try: # test if publishedDate >= 1900 (datetime module bug)
224 result.pubdate = result.publishedDate.strftime('%Y-%m-%d %H:%M:%S%z')
225 except ValueError:
226 result.publishedDate = None
227
228

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.MainResult.normalize_result_fields().

Here is the caller graph for this function:

◆ _normalize_text_fields()

searx.result_types._base._normalize_text_fields ( "MainResult | LegacyResult" result)
protected

Definition at line 86 of file _base.py.

86def _normalize_text_fields(result: "MainResult | LegacyResult"):
87
88 # As soon we need LegacyResult not any longer, we can move this function to
89 # method MainResult.normalize_result_fields
90
91 # Actually, a type check should not be necessary if the engine is
92 # implemented correctly. Historically, however, we have always had a type
93 # check here.
94
95 if result.title and not isinstance(result.title, str):
96 log.debug("result: invalid type of field 'title': %s", str(result))
97 result.title = str(result)
98 if result.content and not isinstance(result.content, str):
99 log.debug("result: invalid type of field 'content': %s", str(result))
100 result.content = str(result)
101
102 # normalize title and content
103 if result.title:
104 result.title = WHITESPACE_REGEX.sub(" ", result.title).strip()
105 if result.content:
106 result.content = WHITESPACE_REGEX.sub(" ", result.content).strip()
107 if result.content == result.title:
108 # avoid duplicate content between the content and title fields
109 result.content = ""
110
111

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.MainResult.normalize_result_fields().

Here is the caller graph for this function:

◆ _normalize_url_fields()

searx.result_types._base._normalize_url_fields ( "Result | LegacyResult" result)
protected

Definition at line 39 of file _base.py.

39def _normalize_url_fields(result: "Result | LegacyResult"):
40
41 # As soon we need LegacyResult not any longer, we can move this function to
42 # method Result.normalize_result_fields
43
44 if result.url and not result.parsed_url:
45 if not isinstance(result.url, str):
46 log.debug('result: invalid URL: %s', str(result))
47 result.url = ""
48 result.parsed_url = None
49 else:
50 result.parsed_url = urllib.parse.urlparse(result.url)
51
52 if result.parsed_url:
53 result.parsed_url = result.parsed_url._replace(
54 # if the result has no scheme, use http as default
55 scheme=result.parsed_url.scheme or "http",
56 path=result.parsed_url.path,
57 )
58 result.url = result.parsed_url.geturl()
59
60 if isinstance(result, LegacyResult) and getattr(result, "infobox", None):
61 # As soon we have InfoboxResult, we can move this function to method
62 # InfoboxResult.normalize_result_fields
63
64 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
65 for item in infobox_urls:
66 _url = item.get("url")
67 if not _url:
68 continue
69 _url = urllib.parse.urlparse(_url)
70 item["url"] = _url._replace(
71 scheme=_url.scheme or "http",
72 # netloc=_url.netloc.replace("www.", ""),
73 path=_url.path,
74 ).geturl()
75
76 infobox_id: str | None = getattr(result, "id", None)
77 if infobox_id:
78 _url = urllib.parse.urlparse(infobox_id)
79 result.id = _url._replace(
80 scheme=_url.scheme or "http",
81 # netloc=_url.netloc.replace("www.", ""),
82 path=_url.path,
83 ).geturl()
84
85

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.Result.normalize_result_fields().

Here is the caller graph for this function:

Variable Documentation

◆ __all__

list searx.result_types._base.__all__ = ["Result"]
private

Definition at line 19 of file _base.py.

◆ UNKNOWN

searx.result_types._base.UNKNOWN = object()

Definition at line 36 of file _base.py.

◆ WHITESPACE_REGEX

searx.result_types._base.WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

Definition at line 35 of file _base.py.