.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.result_types._base Namespace Reference

Classes

class  LegacyResult
 
class  MainResult
 
class  Result
 

Functions

 _normalize_url_fields (Result|LegacyResult result)
 
 _normalize_text_fields (MainResult|LegacyResult result)
 
 _filter_urls (Result|LegacyResult result, Callable[[Result|LegacyResult, str, str], str|bool] filter_func)
 
 _normalize_date_fields (MainResult|LegacyResult result)
 

Variables

list __all__ = ["Result"]
 
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 
 UNKNOWN = object()
 

Detailed Description

Basic types for the typification of results.

- :py:obj:`Result` base class
- :py:obj:`LegacyResult` for internal use only

----

.. autoclass:: Result
   :members:

.. _LegacyResult:

.. autoclass:: LegacyResult
   :members:

Function Documentation

◆ _filter_urls()

searx.result_types._base._filter_urls ( Result | LegacyResult result,
Callable[[Result | LegacyResult, str, str], str | bool] filter_func )
protected

Definition at line 114 of file _base.py.

114def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
115 # pylint: disable=too-many-branches, too-many-statements
116
117 # As soon we need LegacyResult not any longer, we can move this function to
118 # method Result.
119
120 url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
121
122 for field_name in url_fields:
123 url_src = getattr(result, field_name, "")
124 if not url_src:
125 continue
126
127 new_url = filter_func(result, field_name, url_src)
128 # log.debug("filter_urls: filter_func(result, %s) '%s' -> '%s'", field_name, field_value, new_url)
129 if isinstance(new_url, bool):
130 if new_url:
131 # log.debug("filter_urls: unchanged field %s URL %s", field_name, field_value)
132 continue
133 log.debug("filter_urls: drop field %s URL %s", field_name, url_src)
134 new_url = None
135 else:
136 log.debug("filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
137
138 setattr(result, field_name, new_url)
139 if field_name == "url":
140 # sync parsed_url with new_url
141 if not new_url:
142 result.parsed_url = None
143 elif isinstance(new_url, str):
144 result.parsed_url = urllib.parse.urlparse(new_url)
145
146 # "urls": are from infobox
147 #
148 # As soon we have InfoboxResult, we can move this function to method
149 # InfoboxResult.normalize_result_fields
150
151 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
152
153 if infobox_urls:
154 # log.debug("filter_urls: infobox_urls .. %s", infobox_urls)
155 new_infobox_urls: list[dict[str, str]] = []
156
157 for item in infobox_urls:
158 url_src = item.get("url")
159 if not url_src:
160 new_infobox_urls.append(item)
161 continue
162
163 new_url = filter_func(result, "infobox_urls", url_src)
164 if isinstance(new_url, bool):
165 if new_url:
166 new_infobox_urls.append(item)
167 # log.debug("filter_urls: leave URL in field 'urls' ('infobox_urls') unchanged -> %s", _url)
168 continue
169 log.debug("filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
170 new_url = None
171 if new_url:
172 log.debug("filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
173 item["url"] = new_url
174 new_infobox_urls.append(item)
175
176 setattr(result, "urls", new_infobox_urls)
177
178 # "attributes": are from infobox
179 #
180 # The infobox has additional subsections for attributes, urls and relatedTopics:
181
182 infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", [])
183
184 if infobox_attributes:
185 # log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes)
186 new_infobox_attributes: list[dict[str, dict]] = []
187
188 for item in infobox_attributes:
189 image = item.get("image", {})
190 url_src = image.get("src", "")
191 if not url_src:
192 new_infobox_attributes.append(item)
193 continue
194
195 new_url = filter_func(result, "infobox_attributes", url_src)
196 if isinstance(new_url, bool):
197 if new_url:
198 new_infobox_attributes.append(item)
199 # log.debug("filter_urls: leave URL in field 'image.src' unchanged -> %s", url_src)
200 continue
201 log.debug("filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
202 new_url = None
203
204 if new_url:
205 log.debug(
206 "filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
207 url_src,
208 new_url,
209 )
210 item["image"]["src"] = new_url
211 new_infobox_attributes.append(item)
212
213 setattr(result, "attributes", new_infobox_attributes)
214
215 result.normalize_result_fields()
216
217

Referenced by searx.result_types._base.LegacyResult.filter_urls().

+ Here is the caller graph for this function:

◆ _normalize_date_fields()

searx.result_types._base._normalize_date_fields ( MainResult | LegacyResult result)
protected

Definition at line 218 of file _base.py.

218def _normalize_date_fields(result: MainResult | LegacyResult):
219
220 if result.publishedDate: # do not try to get a date from an empty string or a None type
221 try: # test if publishedDate >= 1900 (datetime module bug)
222 result.pubdate = result.publishedDate.strftime('%Y-%m-%d %H:%M:%S%z')
223 except ValueError:
224 result.publishedDate = None
225
226

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.MainResult.normalize_result_fields().

+ Here is the caller graph for this function:

◆ _normalize_text_fields()

searx.result_types._base._normalize_text_fields ( MainResult | LegacyResult result)
protected

Definition at line 88 of file _base.py.

88def _normalize_text_fields(result: MainResult | LegacyResult):
89
90 # As soon we need LegacyResult not any longer, we can move this function to
91 # method MainResult.normalize_result_fields
92
93 # Actually, a type check should not be necessary if the engine is
94 # implemented correctly. Historically, however, we have always had a type
95 # check here.
96
97 if result.title and not isinstance(result.title, str):
98 log.debug("result: invalid type of field 'title': %s", str(result))
99 result.title = str(result)
100 if result.content and not isinstance(result.content, str):
101 log.debug("result: invalid type of field 'content': %s", str(result))
102 result.content = str(result)
103
104 # normalize title and content
105 if result.title:
106 result.title = WHITESPACE_REGEX.sub(" ", result.title).strip()
107 if result.content:
108 result.content = WHITESPACE_REGEX.sub(" ", result.content).strip()
109 if result.content == result.title:
110 # avoid duplicate content between the content and title fields
111 result.content = ""
112
113

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.MainResult.normalize_result_fields().

+ Here is the caller graph for this function:

◆ _normalize_url_fields()

searx.result_types._base._normalize_url_fields ( Result | LegacyResult result)
protected

Definition at line 41 of file _base.py.

41def _normalize_url_fields(result: Result | LegacyResult):
42
43 # As soon we need LegacyResult not any longer, we can move this function to
44 # method Result.normalize_result_fields
45
46 if result.url and not result.parsed_url:
47 if not isinstance(result.url, str):
48 log.debug('result: invalid URL: %s', str(result))
49 result.url = ""
50 result.parsed_url = None
51 else:
52 result.parsed_url = urllib.parse.urlparse(result.url)
53
54 if result.parsed_url:
55 result.parsed_url = result.parsed_url._replace(
56 # if the result has no scheme, use http as default
57 scheme=result.parsed_url.scheme or "http",
58 path=result.parsed_url.path,
59 )
60 result.url = result.parsed_url.geturl()
61
62 if isinstance(result, LegacyResult) and getattr(result, "infobox", None):
63 # As soon we have InfoboxResult, we can move this function to method
64 # InfoboxResult.normalize_result_fields
65
66 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
67 for item in infobox_urls:
68 _url = item.get("url")
69 if not _url:
70 continue
71 _url = urllib.parse.urlparse(_url)
72 item["url"] = _url._replace(
73 scheme=_url.scheme or "http",
74 # netloc=_url.netloc.replace("www.", ""),
75 path=_url.path,
76 ).geturl()
77
78 infobox_id = getattr(result, "id", None)
79 if infobox_id:
80 _url = urllib.parse.urlparse(infobox_id)
81 result.id = _url._replace(
82 scheme=_url.scheme or "http",
83 # netloc=_url.netloc.replace("www.", ""),
84 path=_url.path,
85 ).geturl()
86
87

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.Result.normalize_result_fields().

+ Here is the caller graph for this function:

Variable Documentation

◆ __all__

list searx.result_types._base.__all__ = ["Result"]
private

Definition at line 22 of file _base.py.

◆ UNKNOWN

searx.result_types._base.UNKNOWN = object()

Definition at line 38 of file _base.py.

◆ WHITESPACE_REGEX

searx.result_types._base.WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

Definition at line 37 of file _base.py.