.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
_base.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=too-few-public-methods, missing-module-docstring
3"""Basic types for the typification of results.
4
5- :py:obj:`Result` base class
6- :py:obj:`LegacyResult` for internal use only
7
8----
9
10.. autoclass:: Result
11 :members:
12
13.. autoclass:: LegacyResult
14 :members:
15"""
16
17
18from __future__ import annotations
19
20__all__ = ["Result"]
21
22import re
23import urllib.parse
24import warnings
25
26import msgspec
27
28
29class Result(msgspec.Struct, kw_only=True):
30 """Base class of all result types :ref:`result types`."""
31
32 url: str | None = None
33 """A link related to this *result*"""
34
35 template: str = "default.html"
36 """Name of the template used to render the result.
37
38 By default :origin:`result_templates/default.html
39 <searx/templates/simple/result_templates/default.html>` is used.
40 """
41
42 engine: str | None = ""
43 """Name of the engine *this* result comes from. In case of *plugins* a
44 prefix ``plugin:`` is set, in case of *answerer* prefix ``answerer:`` is
45 set.
46
47 The field is optional and is initialized from the context if necessary.
48 """
49
50 parsed_url: urllib.parse.ParseResult | None = None
51 """:py:obj:`urllib.parse.ParseResult` of :py:obj:`Result.url`.
52
53 The field is optional and is initialized from the context if necessary.
54 """
55
57 """Normalize a result ..
58
59 - if field ``url`` is set and field ``parse_url`` is unset, init
60 ``parse_url`` from field ``url``. This method can be extended in the
61 inheritance.
62
63 """
64
65 if not self.parsed_url and self.url:
66 self.parsed_url = urllib.parse.urlparse(self.url)
67
68 # if the result has no scheme, use http as default
69 if not self.parsed_url.scheme:
70 self.parsed_url = self.parsed_url._replace(scheme="http")
71 self.url = self.parsed_url.geturl()
72
73 def __post_init__(self):
74 pass
75
76 def __hash__(self) -> int:
77 """Generates a hash value that uniquely identifies the content of *this*
78 result. The method can be adapted in the inheritance to compare results
79 from different sources.
80
81 If two result objects are not identical but have the same content, their
82 hash values should also be identical.
83
84 The hash value is used in contexts, e.g. when checking for equality to
85 identify identical results from different sources (engines).
86 """
87
88 return id(self)
89
90 def __eq__(self, other):
91 """py:obj:`Result` objects are equal if the hash values of the two
92 objects are equal. If needed, its recommended to overwrite
93 "py:obj:`Result.__hash__`."""
94
95 return hash(self) == hash(other)
96
97 # for legacy code where a result is treated as a Python dict
98
99 def __setitem__(self, field_name, value):
100
101 return setattr(self, field_name, value)
102
103 def __getitem__(self, field_name):
104
105 if field_name not in self.__struct_fields__:
106 raise KeyError(f"{field_name}")
107 return getattr(self, field_name)
108
109 def __iter__(self):
110
111 return iter(self.__struct_fields__)
112
113 def as_dict(self):
114 return {f: getattr(self, f) for f in self.__struct_fields__}
115
116
117class MainResult(Result): # pylint: disable=missing-class-docstring
118
119 # open_group and close_group should not manged in the Result class (we should rop it from here!)
120 open_group: bool = False
121 close_group: bool = False
122
123 title: str = ""
124 """Link title of the result item."""
125
126 content: str = ""
127 """Extract or description of the result item"""
128
129 img_src: str = ""
130 """URL of a image that is displayed in the result item."""
131
132 thumbnail: str = ""
133 """URL of a thumbnail that is displayed in the result item."""
134
135
136class LegacyResult(dict):
137 """A wrapper around a legacy result item. The SearXNG core uses this class
138 for untyped dictionaries / to be downward compatible.
139
140 This class is needed until we have implemented an :py:obj:`Result` class for
141 each result type and the old usages in the codebase have been fully
142 migrated.
143
144 There is only one place where this class is used, in the
145 :py:obj:`searx.results.ResultContainer`.
146
147 .. attention::
148
149 Do not use this class in your own implementations!
150 """
151
152 UNSET = object()
153 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
154
155 def as_dict(self):
156 return self
157
158 def __init__(self, *args, **kwargs):
159
160 super().__init__(*args, **kwargs)
161
162 # Init fields with defaults / compare with defaults of the fields in class Result
163 self.engine = self.get("engine", "")
164 self.template = self.get("template", "default.html")
165 self.url = self.get("url", None)
166 self.parsed_url = self.get("parsed_url", None)
167
168 self.content = self.get("content", "")
169 self.title = self.get("title", "")
170
171 # Legacy types that have already been ported to a type ..
172
173 if "answer" in self:
174 warnings.warn(
175 f"engine {self.engine} is using deprecated `dict` for answers"
176 f" / use a class from searx.result_types.answer",
177 DeprecationWarning,
178 )
179 self.template = "answer/legacy.html"
180
181 def __hash__(self) -> int: # type: ignore
182
183 if "answer" in self:
184 return hash(self["answer"])
185 if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]):
186 # it is a commun url-result ..
187 return hash(self.url)
188 return id(self)
189
190 def __eq__(self, other):
191
192 return hash(self) == hash(other)
193
194 def __repr__(self) -> str:
195
196 return f"LegacyResult: {super().__repr__()}"
197
198 def __getattr__(self, name: str, default=UNSET):
199
200 if default == self.UNSET and name not in self:
201 raise AttributeError(f"LegacyResult object has no field named: {name}")
202 return self[name]
203
204 def __setattr__(self, name: str, val):
205
206 self[name] = val
207
209
210 self.title = self.WHITESPACE_REGEX.sub(" ", self.title)
211
212 if not self.parsed_url and self.url:
213 self.parsed_url = urllib.parse.urlparse(self.url)
214
215 # if the result has no scheme, use http as default
216 if not self.parsed_url.scheme:
217 self.parsed_url = self.parsed_url._replace(scheme="http")
218 self.url = self.parsed_url.geturl()
219
220 if self.content:
221 self.content = self.WHITESPACE_REGEX.sub(" ", self.content)
222 if self.content == self.title:
223 # avoid duplicate content between the content and title fields
224 self.content = ""
__getattr__(self, str name, default=UNSET)
Definition _base.py:198
__setattr__(self, str name, val)
Definition _base.py:204
__init__(self, *args, **kwargs)
Definition _base.py:158
__setitem__(self, field_name, value)
Definition _base.py:99
__getitem__(self, field_name)
Definition _base.py:103