.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
abstract.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Abstract base classes for all engine processors."""
3
4import typing as t
5
6import logging
7import threading
8from abc import abstractmethod, ABC
9from timeit import default_timer
10
11from searx import get_setting
12from searx import logger
13from searx.engines import engines
14from searx.network import get_time_for_thread, get_network
15from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
16from searx.exceptions import SearxEngineAccessDeniedException
17from searx.utils import get_engine_from_settings
18
19if t.TYPE_CHECKING:
20 import types
21 from searx.enginelib import Engine
22 from searx.search.models import SearchQuery
23 from searx.results import ResultContainer
24 from searx.result_types import Result, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage]
25
26
27logger = logger.getChild("searx.search.processor")
28SUSPENDED_STATUS: dict[int | str, "SuspendedStatus"] = {}
29
30
31class RequestParams(t.TypedDict):
32 """Basic quantity of the Request parameters of all engine types."""
33
34 query: str
35 """Search term, stripped of search syntax arguments."""
36
37 category: str
38 """Current category, like ``general``.
39
40 .. hint::
41
42 This field is deprecated, don't use it in further implementations.
43
44 This field is currently *arbitrarily* filled with the name of "one""
45 category (the name of the first category of the engine). In practice,
46 however, it is not clear what this "one" category should be; in principle,
47 multiple categories can also be activated in a search.
48 """
49
50 pageno: int
51 """Current page number, where the first page is ``1``."""
52
53 safesearch: t.Literal[0, 1, 2]
54 """Safe-Search filter (0:normal, 1:moderate, 2:strict)."""
55
56 time_range: t.Literal["day", "week", "month", "year"] | None
57 """Time-range filter."""
58
59 engine_data: dict[str, str]
60 """Allows the transfer of (engine specific) data to the next request of the
61 client. In the case of the ``online`` engines, this data is delivered to
62 the client via the HTML ``<form>`` in response.
63
64 If the client then sends this form back to the server with the next request,
65 this data will be available.
66
67 This makes it possible to carry data from one request to the next without a
68 session context, but this feature (is fragile) and should only be used in
69 exceptional cases. See also :ref:`engine_data`."""
70
71 searxng_locale: str
72 """Language / locale filter from the search request, a string like 'all',
73 'en', 'en-US', 'zh-HK' .. and others, for more details see
74 :py:obj:`searx.locales`."""
75
76
78 """Class to handle suspend state."""
79
80 def __init__(self):
81 self.lock: threading.Lock = threading.Lock()
82 self.continuous_errors: int = 0
83 self.suspend_end_time: float = 0
84 self.suspend_reason: str = ""
85
86 @property
87 def is_suspended(self):
88 return self.suspend_end_time >= default_timer()
89
90 def suspend(self, suspended_time: int | None, suspend_reason: str):
91 with self.lock:
92 # update continuous_errors / suspend_end_time
93 self.continuous_errors += 1
94 if suspended_time is None:
95 max_ban: int = get_setting("search.max_ban_time_on_fail")
96 ban_fail: int = get_setting("search.ban_time_on_fail")
97 suspended_time = min(max_ban, ban_fail)
98
99 self.suspend_end_time = default_timer() + suspended_time
100 self.suspend_reason = suspend_reason
101 logger.debug("Suspend for %i seconds", suspended_time)
102
103 def resume(self):
104 with self.lock:
105 # reset the suspend variables
106 self.continuous_errors = 0
107 self.suspend_end_time = 0
108 self.suspend_reason = ""
109
110
112 """Base classes used for all types of request processors."""
113
114 engine_type: str
115
116 def __init__(self, engine: "Engine|types.ModuleType"):
117 self.engine: "Engine" = engine # pyright: ignore[reportAttributeAccessIssue]
118 self.logger: logging.Logger = engines[engine.name].logger
119 key = get_network(self.engine.name)
120 key = id(key) if key else self.engine.name
121 self.suspended_status: SuspendedStatus = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
122
123 def initialize(self, callback: t.Callable[["EngineProcessor", bool], bool]):
124 """Initialization of *this* :py:obj:`EngineProcessor`.
125
126 If processor's engine has an ``init`` method, it is called first.
127 Engine's ``init`` method is executed in a thread, meaning that the
128 *registration* (the ``callback``) may occur later and is not already
129 established by the return from this registration method.
130
131 Registration only takes place if the ``init`` method is not available or
132 is successfully run through.
133 """
134
135 if not hasattr(self.engine, "init"):
136 callback(self, True)
137 return
138
139 if not callable(self.engine.init):
140 logger.error("Engine's init method isn't a callable (is of type: %s).", type(self.engine.init))
141 callback(self, False)
142 return
143
144 def __init_processor_thread():
145 eng_ok = self.init_engine()
146 callback(self, eng_ok)
147
148 # set up and start a thread
149 threading.Thread(target=__init_processor_thread, daemon=True).start()
150
151 def init_engine(self) -> bool:
152 eng_setting = get_engine_from_settings(self.engine.name)
153 init_ok: bool | None = False
154 try:
155 init_ok = self.engine.init(eng_setting)
156 except Exception: # pylint: disable=broad-except
157 logger.exception("Init method of engine %s failed due to an exception.", self.engine.name)
158 init_ok = False
159 # In older engines, None is returned from the init method, which is
160 # equivalent to indicating that the initialization was successful.
161 if init_ok is None:
162 init_ok = True
163 return init_ok
164
166 self,
167 result_container: "ResultContainer",
168 exception_or_message: BaseException | str,
169 suspend: bool = False,
170 ):
171 # update result_container
172 if isinstance(exception_or_message, BaseException):
173 exception_class = exception_or_message.__class__
174 module_name = getattr(exception_class, '__module__', 'builtins')
175 module_name = '' if module_name == 'builtins' else module_name + '.'
176 error_message = module_name + exception_class.__qualname__
177 else:
178 error_message = exception_or_message
179 result_container.add_unresponsive_engine(self.engine.name, error_message)
180 # metrics
181 counter_inc('engine', self.engine.name, 'search', 'count', 'error')
182 if isinstance(exception_or_message, BaseException):
183 count_exception(self.engine.name, exception_or_message)
184 else:
185 count_error(self.engine.name, exception_or_message)
186 # suspend the engine ?
187 if suspend:
188 suspended_time = None
189 if isinstance(exception_or_message, SearxEngineAccessDeniedException):
190 suspended_time = exception_or_message.suspended_time
191 self.suspended_status.suspend(suspended_time, error_message) # pylint: disable=no-member
192
194 self,
195 result_container: "ResultContainer",
196 start_time: float,
197 search_results: "list[Result | LegacyResult]",
198 ):
199 # update result_container
200 result_container.extend(self.engine.name, search_results)
201 engine_time = default_timer() - start_time
202 page_load_time = get_time_for_thread()
203 result_container.add_timing(self.engine.name, engine_time, page_load_time)
204 # metrics
205 counter_inc('engine', self.engine.name, 'search', 'count', 'successful')
206 histogram_observe(engine_time, 'engine', self.engine.name, 'time', 'total')
207 if page_load_time is not None:
208 histogram_observe(page_load_time, 'engine', self.engine.name, 'time', 'http')
209
211 self,
212 result_container: "ResultContainer",
213 start_time: float,
214 search_results: "list[Result | LegacyResult]|None",
215 ):
216 if getattr(threading.current_thread(), '_timeout', False):
217 # the main thread is not waiting anymore
218 self.handle_exception(result_container, 'timeout', False)
219 else:
220 # check if the engine accepted the request
221 if search_results is not None:
222 self._extend_container_basic(result_container, start_time, search_results)
223 self.suspended_status.resume()
224
225 def extend_container_if_suspended(self, result_container: "ResultContainer") -> bool:
226 if self.suspended_status.is_suspended:
227 result_container.add_unresponsive_engine(
228 self.engine.name, self.suspended_status.suspend_reason, suspended=True
229 )
230 return True
231 return False
232
233 def get_params(self, search_query: "SearchQuery", engine_category: str) -> RequestParams | None:
234 """Returns a dictionary with the :ref:`request parameters <engine
235 request arguments>` (:py:obj:`RequestParams`), if the search condition
236 is not supported by the engine, ``None`` is returned:
237
238 - *time range* filter in search conditions, but the engine does not have
239 a corresponding filter
240 - page number > 1 when engine does not support paging
241 - page number > ``max_page``
242
243 """
244 # if paging is not supported, skip
245 if search_query.pageno > 1 and not self.engine.paging:
246 return None
247
248 # if max page is reached, skip
249 max_page = self.engine.max_page or get_setting("search.max_page")
250 if max_page and max_page < search_query.pageno:
251 return None
252
253 # if time_range is not supported, skip
254 if search_query.time_range and not self.engine.time_range_support:
255 return None
256
257 params: RequestParams = {
258 "query": search_query.query,
259 "category": engine_category,
260 "pageno": search_query.pageno,
261 "safesearch": search_query.safesearch,
262 "time_range": search_query.time_range,
263 "engine_data": search_query.engine_data.get(self.engine.name, {}),
264 "searxng_locale": search_query.lang,
265 }
266
267 # deprecated / vintage --> use params["searxng_locale"]
268 #
269 # Conditions related to engine's traits are implemented in engine.traits
270 # module. Don't do "locale" decisions here in the abstract layer of the
271 # search processor, just pass the value from user's choice unchanged to
272 # the engine request.
273
274 if hasattr(self.engine, "language") and self.engine.language:
275 params["language"] = self.engine.language # pyright: ignore[reportGeneralTypeIssues]
276 else:
277 params["language"] = search_query.lang # pyright: ignore[reportGeneralTypeIssues]
278
279 return params
280
281 @abstractmethod
282 def search(
283 self,
284 query: str,
285 params: RequestParams,
286 result_container: "ResultContainer",
287 start_time: float,
288 timeout_limit: float,
289 ):
290 pass
291
292 def get_tests(self):
293 # deprecated!
294 return {}
295
297 # deprecated!
298 return {}
RequestParams|None get_params(self, "SearchQuery" search_query, str engine_category)
Definition abstract.py:233
bool extend_container_if_suspended(self, "ResultContainer" result_container)
Definition abstract.py:225
_extend_container_basic(self, "ResultContainer" result_container, float start_time, "list[Result | LegacyResult]" search_results)
Definition abstract.py:198
extend_container(self, "ResultContainer" result_container, float start_time, "list[Result | LegacyResult]|None" search_results)
Definition abstract.py:215
handle_exception(self, "ResultContainer" result_container, BaseException|str exception_or_message, bool suspend=False)
Definition abstract.py:170
__init__(self, "Engine|types.ModuleType" engine)
Definition abstract.py:116
initialize(self, t.Callable[["EngineProcessor", bool], bool] callback)
Definition abstract.py:123
suspend(self, int|None suspended_time, str suspend_reason)
Definition abstract.py:90
::1337x
Definition 1337x.py:1
t.Any get_setting(str name, t.Any default=_unset)
Definition __init__.py:74