46 if result.url
and not result.parsed_url:
47 if not isinstance(result.url, str):
48 log.debug(
'result: invalid URL: %s', str(result))
50 result.parsed_url =
None
52 result.parsed_url = urllib.parse.urlparse(result.url)
55 result.parsed_url = result.parsed_url._replace(
57 scheme=result.parsed_url.scheme
or "http",
59 path=result.parsed_url.path.rstrip(
"/"),
61 result.url = result.parsed_url.geturl()
63 if isinstance(result, LegacyResult)
and getattr(result,
"infobox",
None):
67 infobox_urls: list[dict[str, str]] = getattr(result,
"urls", [])
68 for item
in infobox_urls:
69 _url = item.get(
"url")
72 _url = urllib.parse.urlparse(_url)
73 item[
"url"] = _url._replace(
74 scheme=_url.scheme
or "http",
76 path=_url.path.rstrip(
"/"),
79 infobox_id = getattr(result,
"id",
None)
81 _url = urllib.parse.urlparse(infobox_id)
82 result.id = _url._replace(
83 scheme=_url.scheme
or "http",
85 path=_url.path.rstrip(
"/"),
98 if result.title
and not isinstance(result.title, str):
99 log.debug(
"result: invalid type of field 'title': %s", str(result))
100 result.title = str(result)
101 if result.content
and not isinstance(result.content, str):
102 log.debug(
"result: invalid type of field 'content': %s", str(result))
103 result.content = str(result)
107 result.title = WHITESPACE_REGEX.sub(
" ", result.title).strip()
109 result.content = WHITESPACE_REGEX.sub(
" ", result.content).strip()
110 if result.content == result.title:
115def _filter_urls(result: Result | LegacyResult, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
121 url_fields = [
"url",
"iframe_src",
"audio_src",
"img_src",
"thumbnail_src",
"thumbnail"]
123 for field_name
in url_fields:
124 url_src = getattr(result, field_name,
"")
128 new_url = filter_func(result, field_name, url_src)
130 if isinstance(new_url, bool):
134 log.debug(
"filter_urls: drop field %s URL %s", field_name, url_src)
137 log.debug(
"filter_urls: modify field %s URL %s -> %s", field_name, url_src, new_url)
139 setattr(result, field_name, new_url)
140 if field_name ==
"url":
143 result.parsed_url =
None
144 elif isinstance(new_url, str):
145 result.parsed_url = urllib.parse.urlparse(new_url)
152 infobox_urls: list[dict[str, str]] = getattr(result,
"urls", [])
156 new_infobox_urls: list[dict[str, str]] = []
158 for item
in infobox_urls:
159 url_src = item.get(
"url")
161 new_infobox_urls.append(item)
164 new_url = filter_func(result,
"infobox_urls", url_src)
165 if isinstance(new_url, bool):
167 new_infobox_urls.append(item)
170 log.debug(
"filter_urls: remove URL from field 'urls' ('infobox_urls') URL %s", url_src)
173 log.debug(
"filter_urls: modify URL from field 'urls' ('infobox_urls') URL %s -> %s", url_src, new_url)
174 item[
"url"] = new_url
175 new_infobox_urls.append(item)
177 setattr(result,
"urls", new_infobox_urls)
183 infobox_attributes: list[dict[str, dict]] = getattr(result,
"attributes", [])
185 if infobox_attributes:
187 new_infobox_attributes: list[dict[str, dict]] = []
189 for item
in infobox_attributes:
190 image = item.get(
"image", {})
191 url_src = image.get(
"src",
"")
193 new_infobox_attributes.append(item)
196 new_url = filter_func(result,
"infobox_attributes", url_src)
197 if isinstance(new_url, bool):
199 new_infobox_attributes.append(item)
202 log.debug(
"filter_urls: drop field 'image.src' ('infobox_attributes') URL %s", url_src)
207 "filter_urls: modify 'image.src' ('infobox_attributes') URL %s -> %s",
211 item[
"image"][
"src"] = new_url
212 new_infobox_attributes.append(item)
214 setattr(result,
"attributes", new_infobox_attributes)
216 result.normalize_result_fields()
420 """A wrapper around a legacy result item. The SearXNG core uses this class
421 for untyped dictionaries / to be downward compatible.
423 This class is needed until we have implemented an :py:obj:`Result` class for
424 each result type and the old usages in the codebase have been fully
427 There is only one place where this class is used, in the
428 :py:obj:`searx.results.ResultContainer`.
432 Do not use this class in your own implementations!
441 parsed_url: urllib.parse.ParseResult |
None
448 priority: typing.Literal[
"",
"high",
"low"]
453 publishedDate: datetime.datetime |
None =
None
457 urls: list[dict[str, str]]
458 attributes: list[dict[str, str]]
468 self[
"url"] = self.get(
"url")
469 self[
"template"] = self.get(
"template",
"default.html")
470 self[
"engine"] = self.get(
"engine",
"")
471 self[
"parsed_url"] = self.get(
"parsed_url")
474 self[
"title"] = self.get(
"title",
"")
475 self[
"content"] = self.get(
"content",
"")
476 self[
"img_src"] = self.get(
"img_src",
"")
477 self[
"thumbnail"] = self.get(
"thumbnail",
"")
478 self[
"priority"] = self.get(
"priority",
"")
479 self[
"engines"] = self.get(
"engines", set())
480 self[
"positions"] = self.get(
"positions",
"")
481 self[
"score"] = self.get(
"score", 0)
482 self[
"category"] = self.get(
"category",
"")
484 if "infobox" in self:
485 self[
"urls"] = self.get(
"urls", [])
486 self[
"attributes"] = self.get(
"attributes", [])
492 f
"engine {self.engine} is using deprecated `dict` for answers"
493 f
" / use a class from searx.result_types.answer",
498 if self.
template ==
"keyvalue.html":
500 f
"engine {self.engine} is using deprecated `dict` for key/value results"
501 f
" / use a class from searx.result_types",
506 if default == self.
UNSET and name
not in self:
507 raise AttributeError(f
"LegacyResult object has no field named: {name}")
517 return hash(self[
"answer"])
522 return hash(f
"{self.template}|{self.url}|{self.img_src}")
524 if not any(cls
in self
for cls
in [
"suggestion",
"correction",
"infobox",
"number_of_results",
"engine_data"]):
530 raise ValueError(f
"missing a value in field 'parsed_url': {self}")
535 + f
"|{url.netloc}|{url.path}|{url.params}|{url.query}|{url.fragment}"
543 return hash(self) == hash(other)
547 return f
"LegacyResult: {super().__repr__()}"
557 for k, v
in other.items():
561 def filter_urls(self, filter_func: Callable[[Result | LegacyResult, str, str], str | bool]):
562 """See :py:obj:`Result.filter_urls`"""
normalize_result_fields(self)