2"""Abstract base classes for all engine processors."""
8from abc
import abstractmethod, ABC
9from timeit
import default_timer
11from searx
import get_setting
12from searx
import logger
15from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
27logger = logger.getChild(
"searx.search.processor")
28SUSPENDED_STATUS: dict[int | str,
"SuspendedStatus"] = {}
32 """Basic quantity of the Request parameters of all engine types."""
35 """Search term, stripped of search syntax arguments."""
38 """Current category, like ``general``.
42 This field is deprecated, don't use it in further implementations.
44 This field is currently *arbitrarily* filled with the name of "one""
45 category (the name of the first category of the engine). In practice,
46 however, it is not clear what this "one" category should be; in principle,
47 multiple categories can also be activated in a search.
51 """Current page number, where the first page is ``1``."""
53 safesearch: t.Literal[0, 1, 2]
54 """Safe-Search filter (0:normal, 1:moderate, 2:strict)."""
56 time_range: t.Literal[
"day",
"week",
"month",
"year"] |
None
57 """Time-range filter."""
59 engine_data: dict[str, str]
60 """Allows the transfer of (engine specific) data to the next request of the
61 client. In the case of the ``online`` engines, this data is delivered to
62 the client via the HTML ``<form>`` in response.
64 If the client then sends this form back to the server with the next request,
65 this data will be available.
67 This makes it possible to carry data from one request to the next without a
68 session context, but this feature (is fragile) and should only be used in
69 exceptional cases. See also :ref:`engine_data`."""
72 """Language / locale filter from the search request, a string like 'all',
73 'en', 'en-US', 'zh-HK' .. and others, for more details see
74 :py:obj:`searx.locales`."""
78 """Class to handle suspend state."""
81 self.
lock: threading.Lock = threading.Lock()
90 def suspend(self, suspended_time: int |
None, suspend_reason: str):
94 if suspended_time
is None:
95 max_ban: int =
get_setting(
"search.max_ban_time_on_fail")
96 ban_fail: int =
get_setting(
"search.ban_time_on_fail")
97 suspended_time = min(max_ban, ban_fail)
101 logger.debug(
"Suspend for %i seconds", suspended_time)
112 """Base classes used for all types of request processors."""
116 def __init__(self, engine:
"Engine|types.ModuleType"):
118 self.
logger: logging.Logger = engines[engine.name].logger
119 key = get_network(self.
engine.name)
120 key = id(key)
if key
else self.
engine.name
123 def initialize(self, callback: t.Callable[[
"EngineProcessor", bool], bool]):
124 """Initialization of *this* :py:obj:`EngineProcessor`.
126 If processor's engine has an ``init`` method, it is called first.
127 Engine's ``init`` method is executed in a thread, meaning that the
128 *registration* (the ``callback``) may occur later and is not already
129 established by the return from this registration method.
131 Registration only takes place if the ``init`` method is not available or
132 is successfully run through.
135 if not hasattr(self.
engine,
"init"):
139 if not callable(self.
engine.init):
140 logger.error(
"Engine's init method isn't a callable (is of type: %s).", type(self.
engine.init))
141 callback(self,
False)
144 def __init_processor_thread():
146 callback(self, eng_ok)
149 threading.Thread(target=__init_processor_thread, daemon=
True).start()
152 eng_setting = get_engine_from_settings(self.
engine.name)
153 init_ok: bool |
None =
False
155 init_ok = self.
engine.init(eng_setting)
157 logger.exception(
"Init method of engine %s failed due to an exception.", self.
engine.name)
167 result_container:
"ResultContainer",
168 exception_or_message: BaseException | str,
169 suspend: bool =
False,
172 if isinstance(exception_or_message, BaseException):
173 exception_class = exception_or_message.__class__
174 module_name = getattr(exception_class,
'__module__',
'builtins')
175 module_name =
'' if module_name ==
'builtins' else module_name +
'.'
176 error_message = module_name + exception_class.__qualname__
178 error_message = exception_or_message
179 result_container.add_unresponsive_engine(self.
engine.name, error_message)
181 counter_inc(
'engine', self.
engine.name,
'search',
'count',
'error')
182 if isinstance(exception_or_message, BaseException):
183 count_exception(self.
engine.name, exception_or_message)
185 count_error(self.
engine.name, exception_or_message)
188 suspended_time =
None
189 if isinstance(exception_or_message, SearxEngineAccessDeniedException):
190 suspended_time = exception_or_message.suspended_time
195 result_container:
"ResultContainer",
197 search_results:
"list[Result | LegacyResult]",
200 result_container.extend(self.
engine.name, search_results)
201 engine_time = default_timer() - start_time
202 page_load_time = get_time_for_thread()
203 result_container.add_timing(self.
engine.name, engine_time, page_load_time)
205 counter_inc(
'engine', self.
engine.name,
'search',
'count',
'successful')
206 histogram_observe(engine_time,
'engine', self.
engine.name,
'time',
'total')
207 if page_load_time
is not None:
208 histogram_observe(page_load_time,
'engine', self.
engine.name,
'time',
'http')
212 result_container:
"ResultContainer",
214 search_results:
"list[Result | LegacyResult]|None",
216 if getattr(threading.current_thread(),
'_timeout',
False):
221 if search_results
is not None:
227 result_container.add_unresponsive_engine(
233 def get_params(self, search_query:
"SearchQuery", engine_category: str) -> RequestParams |
None:
234 """Returns a dictionary with the :ref:`request parameters <engine
235 request arguments>` (:py:obj:`RequestParams`), if the search condition
236 is not supported by the engine, ``None`` is returned:
238 - *time range* filter in search conditions, but the engine does not have
239 a corresponding filter
240 - page number > 1 when engine does not support paging
241 - page number > ``max_page``
245 if search_query.pageno > 1
and not self.
engine.paging:
250 if max_page
and max_page < search_query.pageno:
254 if search_query.time_range
and not self.
engine.time_range_support:
257 params: RequestParams = {
258 "query": search_query.query,
259 "category": engine_category,
260 "pageno": search_query.pageno,
261 "safesearch": search_query.safesearch,
262 "time_range": search_query.time_range,
263 "engine_data": search_query.engine_data.get(self.
engine.name, {}),
264 "searxng_locale": search_query.lang,
274 if hasattr(self.
engine,
"language")
and self.
engine.language:
275 params[
"language"] = self.
engine.language
277 params[
"language"] = search_query.lang
285 params: RequestParams,
286 result_container:
"ResultContainer",
288 timeout_limit: float,
RequestParams|None get_params(self, "SearchQuery" search_query, str engine_category)
bool extend_container_if_suspended(self, "ResultContainer" result_container)
_extend_container_basic(self, "ResultContainer" result_container, float start_time, "list[Result | LegacyResult]" search_results)
extend_container(self, "ResultContainer" result_container, float start_time, "list[Result | LegacyResult]|None" search_results)
handle_exception(self, "ResultContainer" result_container, BaseException|str exception_or_message, bool suspend=False)
__init__(self, "Engine|types.ModuleType" engine)
SuspendedStatus suspended_status
initialize(self, t.Callable[["EngineProcessor", bool], bool] callback)
suspend(self, int|None suspended_time, str suspend_reason)
t.Any get_setting(str name, t.Any default=_unset)