.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.result_types._base Namespace Reference

Classes

class  LegacyResult
 
class  MainResult
 
class  Result
 

Functions

 _normalize_url_fields (Result|LegacyResult result)
 
 _normalize_text_fields (MainResult|LegacyResult result)
 
 _filter_urls (Result|LegacyResult result, Callable[[Result|LegacyResult, str, str], str|bool] filter_func)
 
 _normalize_date_fields (MainResult|LegacyResult result)
 

Variables

list __all__ = ["Result"]
 
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 
 UNKNOWN = object()
 

Detailed Description

Basic types for the typification of results.

- :py:obj:`Result` base class
- :py:obj:`LegacyResult` for internal use only

----

.. autoclass:: Result
   :members:

.. _LegacyResult:

.. autoclass:: LegacyResult
   :members:

Function Documentation

◆ _filter_urls()

searx.result_types._base._filter_urls ( Result | LegacyResult result,
Callable[[Result | LegacyResult, str, str], str | bool] filter_func )
protected

Definition at line 115 of file _base.py.

115def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
116 # pylint: disable=too-many-branches, too-many-statements
117
118 # As soon we need LegacyResult not any longer, we can move this function to
119 # method Result.
120
121 url_fields = ["url", "iframe_src", "audio_src", "img_src", "thumbnail_src", "thumbnail"]
122
123 for field_name in url_fields:
124 url_src = getattr(result, field_name, "")
125 if not url_src:
126 continue
127
128 new_url = filter_func(result, field_name, url_src)
129 # log.debug("filter_urls: filter_func(result, %s) '%s' -> '%s'", field_name, field_value, new_url)
130 if isinstance(new_url, bool):
131 if new_url:
132 # log.debug("filter_urls: unchanged field %s URL %s", field_name, field_value)
133 continue
134 log.debug("filter_urls: drop field %s URL %s", field_name, url_src)
135 new_url = None
136 else:
137 log.debug("filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
138
139 setattr(result, field_name, new_url)
140 if field_name == "url":
141 # sync parsed_url with new_url
142 if not new_url:
143 result.parsed_url = None
144 elif isinstance(new_url, str):
145 result.parsed_url = urllib.parse.urlparse(new_url)
146
147 # "urls": are from infobox
148 #
149 # As soon we have InfoboxResult, we can move this function to method
150 # InfoboxResult.normalize_result_fields
151
152 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
153
154 if infobox_urls:
155 # log.debug("filter_urls: infobox_urls .. %s", infobox_urls)
156 new_infobox_urls: list[dict[str, str]] = []
157
158 for item in infobox_urls:
159 url_src = item.get("url")
160 if not url_src:
161 new_infobox_urls.append(item)
162 continue
163
164 new_url = filter_func(result, "infobox_urls", url_src)
165 if isinstance(new_url, bool):
166 if new_url:
167 new_infobox_urls.append(item)
168 # log.debug("filter_urls: leave URL in field 'urls' ('infobox_urls') unchanged -> %s", _url)
169 continue
170 log.debug("filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
171 new_url = None
172 if new_url:
173 log.debug("filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
174 item["url"] = new_url
175 new_infobox_urls.append(item)
176
177 setattr(result, "urls", new_infobox_urls)
178
179 # "attributes": are from infobox
180 #
181 # The infobox has additional subsections for attributes, urls and relatedTopics:
182
183 infobox_attributes: list[dict[str, dict]] = getattr(result, "attributes", [])
184
185 if infobox_attributes:
186 # log.debug("filter_urls: infobox_attributes .. %s", infobox_attributes)
187 new_infobox_attributes: list[dict[str, dict]] = []
188
189 for item in infobox_attributes:
190 image = item.get("image", {})
191 url_src = image.get("src", "")
192 if not url_src:
193 new_infobox_attributes.append(item)
194 continue
195
196 new_url = filter_func(result, "infobox_attributes", url_src)
197 if isinstance(new_url, bool):
198 if new_url:
199 new_infobox_attributes.append(item)
200 # log.debug("filter_urls: leave URL in field 'image.src' unchanged -> %s", url_src)
201 continue
202 log.debug("filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
203 new_url = None
204
205 if new_url:
206 log.debug(
207 "filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
208 url_src,
209 new_url,
210 )
211 item["image"]["src"] = new_url
212 new_infobox_attributes.append(item)
213
214 setattr(result, "attributes", new_infobox_attributes)
215
216 result.normalize_result_fields()
217
218

Referenced by searx.result_types._base.LegacyResult.filter_urls().

+ Here is the caller graph for this function:

◆ _normalize_date_fields()

searx.result_types._base._normalize_date_fields ( MainResult | LegacyResult result)
protected

Definition at line 219 of file _base.py.

219def _normalize_date_fields(result: MainResult | LegacyResult):
220
221 if result.publishedDate: # do not try to get a date from an empty string or a None type
222 try: # test if publishedDate >= 1900 (datetime module bug)
223 result.pubdate = result.publishedDate.strftime('%Y-%m-%d %H:%M:%S%z')
224 except ValueError:
225 result.publishedDate = None
226
227

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.MainResult.normalize_result_fields().

+ Here is the caller graph for this function:

◆ _normalize_text_fields()

searx.result_types._base._normalize_text_fields ( MainResult | LegacyResult result)
protected

Definition at line 89 of file _base.py.

89def _normalize_text_fields(result: MainResult | LegacyResult):
90
91 # As soon we need LegacyResult not any longer, we can move this function to
92 # method MainResult.normalize_result_fields
93
94 # Actually, a type check should not be necessary if the engine is
95 # implemented correctly. Historically, however, we have always had a type
96 # check here.
97
98 if result.title and not isinstance(result.title, str):
99 log.debug("result: invalid type of field 'title': %s", str(result))
100 result.title = str(result)
101 if result.content and not isinstance(result.content, str):
102 log.debug("result: invalid type of field 'content': %s", str(result))
103 result.content = str(result)
104
105 # normalize title and content
106 if result.title:
107 result.title = WHITESPACE_REGEX.sub(" ", result.title).strip()
108 if result.content:
109 result.content = WHITESPACE_REGEX.sub(" ", result.content).strip()
110 if result.content == result.title:
111 # avoid duplicate content between the content and title fields
112 result.content = ""
113
114

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.MainResult.normalize_result_fields().

+ Here is the caller graph for this function:

◆ _normalize_url_fields()

searx.result_types._base._normalize_url_fields ( Result | LegacyResult result)
protected

Definition at line 41 of file _base.py.

41def _normalize_url_fields(result: Result | LegacyResult):
42
43 # As soon we need LegacyResult not any longer, we can move this function to
44 # method Result.normalize_result_fields
45
46 if result.url and not result.parsed_url:
47 if not isinstance(result.url, str):
48 log.debug('result: invalid URL: %s', str(result))
49 result.url = ""
50 result.parsed_url = None
51 else:
52 result.parsed_url = urllib.parse.urlparse(result.url)
53
54 if result.parsed_url:
55 result.parsed_url = result.parsed_url._replace(
56 # if the result has no scheme, use http as default
57 scheme=result.parsed_url.scheme or "http",
58 # normalize ``example.com/path/`` to ``example.com/path``
59 path=result.parsed_url.path.rstrip("/"),
60 )
61 result.url = result.parsed_url.geturl()
62
63 if isinstance(result, LegacyResult) and getattr(result, "infobox", None):
64 # As soon we have InfoboxResult, we can move this function to method
65 # InfoboxResult.normalize_result_fields
66
67 infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
68 for item in infobox_urls:
69 _url = item.get("url")
70 if not _url:
71 continue
72 _url = urllib.parse.urlparse(_url)
73 item["url"] = _url._replace(
74 scheme=_url.scheme or "http",
75 # netloc=_url.netloc.replace("www.", ""),
76 path=_url.path.rstrip("/"),
77 ).geturl()
78
79 infobox_id = getattr(result, "id", None)
80 if infobox_id:
81 _url = urllib.parse.urlparse(infobox_id)
82 result.id = _url._replace(
83 scheme=_url.scheme or "http",
84 # netloc=_url.netloc.replace("www.", ""),
85 path=_url.path.rstrip("/"),
86 ).geturl()
87
88

Referenced by searx.result_types._base.LegacyResult.normalize_result_fields(), and searx.result_types._base.Result.normalize_result_fields().

+ Here is the caller graph for this function:

Variable Documentation

◆ __all__

list searx.result_types._base.__all__ = ["Result"]
private

Definition at line 22 of file _base.py.

◆ UNKNOWN

searx.result_types._base.UNKNOWN = object()

Definition at line 38 of file _base.py.

◆ WHITESPACE_REGEX

searx.result_types._base.WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

Definition at line 37 of file _base.py.