.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
__init__.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Implementations of the framework for the SearXNG engines.
3
4- :py:obj:`searx.enginelib.EngineCache`
5- :py:obj:`searx.enginelib.Engine`
6- :py:obj:`searx.enginelib.traits`
7
8There is a command line for developer purposes and for deeper analysis. Here is
9an example in which the command line is called in the development environment::
10
11 $ ./manage pyenv.cmd bash --norc --noprofile
12 (py3) python -m searx.enginelib --help
13
14.. hint::
15
16 The long term goal is to modularize all implementations of the engine
17 framework here in this Python package. ToDo:
18
19 - move implementations of the :ref:`searx.engines loader` to a new module in
20 the :py:obj:`searx.enginelib` namespace.
21
22-----
23
24"""
25
26__all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
27
28import typing as t
29import abc
30from collections.abc import Callable
31import logging
32import string
33import typer
34
35from ..cache import ExpireCacheSQLite, ExpireCacheCfg
36
37if t.TYPE_CHECKING:
38 from searx.enginelib import traits
39 from searx.enginelib.traits import EngineTraits
40 from searx.extended_types import SXNG_Response
41 from searx.result_types import EngineResults
42 from searx.search.processors import OfflineParamTypes, OnlineParamTypes
43
44ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache(
46 name="ENGINES_CACHE",
47 MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
48 MAINTENANCE_PERIOD=60 * 60, # 2h
49 )
50)
51"""Global :py:obj:`searx.cache.ExpireCacheSQLite` instance where the cached
52values from all engines are stored. The `MAXHOLD_TIME` is 7 days and the
53`MAINTENANCE_PERIOD` is set to two hours."""
54
55app = typer.Typer()
56
57
58@app.command()
59def state():
60 """Show state for the caches of the engines."""
61
62 title = "cache tables and key/values"
63 print(title)
64 print("=" * len(title))
65 print(ENGINES_CACHE.state().report())
66 print()
67 title = f"properties of {ENGINES_CACHE.cfg.name}"
68 print(title)
69 print("=" * len(title))
70 print(str(ENGINES_CACHE.properties))
71
72
73@app.command()
74def maintenance(force: bool = True):
75 """Carry out maintenance on cache of the engines."""
76 ENGINES_CACHE.maintenance(force=force)
77
78
80 """Persistent (SQLite) key/value cache that deletes its values again after
81 ``expire`` seconds (default/max: :py:obj:`MAXHOLD_TIME
82 <searx.cache.ExpireCacheCfg.MAXHOLD_TIME>`). This class is a wrapper around
83 :py:obj:`ENGINES_CACHE` (:py:obj:`ExpireCacheSQLite
84 <searx.cache.ExpireCacheSQLite>`).
85
86 In the :origin:`searx/engines/demo_offline.py` engine you can find an
87 exemplary implementation of such a cache other examples are implemented
88 in:
89
90 - :origin:`searx/engines/radio_browser.py`
91 - :origin:`searx/engines/soundcloud.py`
92 - :origin:`searx/engines/startpage.py`
93
94 .. code: python
95
96 from searx.enginelib import EngineCache
97 CACHE: EngineCache
98
99 def init(engine_settings):
100 global CACHE
101 CACHE = EngineCache(engine_settings["name"])
102
103 def request(query, params):
104 token = CACHE.get(key="token")
105 if token is None:
106 token = get_token()
107 # cache token of this engine for 1h
108 CACHE.set(key="token", value=token, expire=3600)
109 ...
110
111 For introspection of the DB, jump into developer environment and run command to
112 show cache state::
113
114 $ ./manage pyenv.cmd bash --norc --noprofile
115 (py3) python -m searx.enginelib cache state
116
117 cache tables and key/values
118 ===========================
119 [demo_offline ] 2025-04-22 11:32:50 count --> (int) 4
120 [startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20
121 [duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325
122 [duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451
123 [radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...]
124 [soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp
125 [wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7
126 number of tables: 6
127 number of key/value pairs: 7
128
129 In the "cache tables and key/values" section, the table name (engine name) is at
130 first position on the second there is the calculated expire date and on the
131 third and fourth position the key/value is shown.
132
133 About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore
134 the key is a hash value of the query term (to not to store the raw query term).
135
136 In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl /
137 ExpireCache and their last modification date are shown::
138
139 properties of ENGINES_CACHE
140 ===========================
141 [last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1
142 [last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE :
143 [last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe...
144 [last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline
145 [last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage
146 [last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo
147 [last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser
148 [last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud
149 [last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha
150
151 These properties provide information about the state of the ExpireCache and
152 control the behavior. For example, the maintenance intervals are controlled by
153 the last modification date of the LAST_MAINTENANCE property and the hash value
154 of the password can be used to detect whether the password has been changed (in
155 this case the DB entries can no longer be decrypted and the entire cache must be
156 discarded).
157 """
158
159 def __init__(self, engine_name: str, expire: int | None = None):
160 self.expire: int = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
161 _valid = "-_." + string.ascii_letters + string.digits
162 self.table_name: str = "".join([c if c in _valid else "_" for c in engine_name])
163
164 def set(self, key: str, value: t.Any, expire: int | None = None) -> bool:
165 return ENGINES_CACHE.set(
166 key=key,
167 value=value,
168 expire=expire or self.expire,
169 ctx=self.table_name,
170 )
171
172 def get(self, key: str, default: t.Any = None) -> t.Any:
173 return ENGINES_CACHE.get(key, default=default, ctx=self.table_name)
174
175 def secret_hash(self, name: str | bytes) -> str:
176 return ENGINES_CACHE.secret_hash(name=name)
177
178
179class Engine(abc.ABC): # pylint: disable=too-few-public-methods
180 """Class of engine instances build from YAML settings.
181
182 Further documentation see :ref:`general engine configuration`.
183
184 .. hint::
185
186 This class is currently never initialized and only used for type hinting.
187 """
188
189 logger: logging.Logger
190
191 # Common options in the engine module
192
193 engine_type: str
194 """Type of the engine (:ref:`searx.search.processors`)"""
195
196 paging: bool
197 """Engine supports multiple pages."""
198
199 max_page: int = 0
200 """If the engine supports paging, then this is the value for the last page
201 that is still supported. ``0`` means unlimited numbers of pages."""
202
203 time_range_support: bool
204 """Engine supports search time range."""
205
206 safesearch: bool
207 """Engine supports SafeSearch"""
208
209 language_support: bool
210 """Engine supports languages (locales) search."""
211
212 language: str
213 """For an engine, when there is ``language: ...`` in the YAML settings the engine
214 does support only this one language:
215
216 .. code:: yaml
217
218 - name: google french
219 engine: google
220 language: fr
221 """
222
223 region: str
224 """For an engine, when there is ``region: ...`` in the YAML settings the engine
225 does support only this one region::
226
227 .. code:: yaml
228
229 - name: google belgium
230 engine: google
231 region: fr-BE
232 """
233
234 fetch_traits: "Callable[[EngineTraits, bool], None]"
235 """Function to to fetch engine's traits from origin."""
236
237 traits: "traits.EngineTraits"
238 """Traits of the engine."""
239
240 # settings.yml
241
242 categories: list[str]
243 """Specifies to which :ref:`engine categories` the engine should be added."""
244
245 name: str
246 """Name that will be used across SearXNG to define this engine. In settings, on
247 the result page .."""
248
249 engine: str
250 """Name of the python file used to handle requests and responses to and from
251 this search engine (file name from :origin:`searx/engines` without
252 ``.py``)."""
253
254 enable_http: bool
255 """Enable HTTP (by default only HTTPS is enabled)."""
256
257 shortcut: str
258 """Code used to execute bang requests (``!foo``)"""
259
260 timeout: float
261 """Specific timeout for search-engine."""
262
263 display_error_messages: bool
264 """Display error messages on the web UI."""
265
266 proxies: dict[str, dict[str, str]]
267 """Set proxies for a specific engine (YAML):
268
269 .. code:: yaml
270
271 proxies :
272 http: socks5://proxy:port
273 https: socks5://proxy:port
274 """
275
276 disabled: bool
277 """To disable by default the engine, but not deleting it. It will allow the
278 user to manually activate it in the settings."""
279
280 inactive: bool
281 """Remove the engine from the settings (*disabled & removed*)."""
282
283 about: dict[str, dict[str, str]]
284 """Additional fields describing the engine.
285
286 .. code:: yaml
287
288 about:
289 website: https://example.com
290 wikidata_id: Q306656
291 official_api_documentation: https://example.com/api-doc
292 use_official_api: true
293 require_api_key: true
294 results: HTML
295 """
296
297 using_tor_proxy: bool
298 """Using tor proxy (``true``) or not (``false``) for this engine."""
299
300 send_accept_language_header: bool
301 """When this option is activated, the language (locale) that is selected by
302 the user is used to build and send a ``Accept-Language`` header in the
303 request to the origin search engine."""
304
305 tokens: list[str]
306 """A list of secret tokens to make this engine *private*, more details see
307 :ref:`private engines`."""
308
309 weight: int
310 """Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
311
312 def setup(self, engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument
313 """Dynamic setup of the engine settings.
314
315 With this method, the engine's setup is carried out. For example, to
316 check or dynamically adapt the values handed over in the parameter
317 ``engine_settings``. The return value (True/False) indicates whether
318 the setup was successful and the engine can be built or rejected.
319
320 The method is optional and is called synchronously as part of the
321 initialization of the service and is therefore only suitable for simple
322 (local) exams/changes at the engine setting. The :py:obj:`Engine.init`
323 method must be used for longer tasks in which values of a remote must be
324 determined, for example.
325 """
326 return True
327
328 def init(self, engine_settings: dict[str, t.Any]) -> bool | None: # pylint: disable=unused-argument
329 """Initialization of the engine.
330
331 The method is optional and asynchronous (in a thread). It is suitable,
332 for example, for setting up a cache (for the engine) or for querying
333 values (required by the engine) from a remote.
334
335 Whether the initialization was successful can be indicated by the return
336 value ``True`` or even ``False``.
337
338 - If no return value is given from this init method (``None``), this is
339 equivalent to ``True``.
340
341 - If an exception is thrown as part of the initialization, this is
342 equivalent to ``False``.
343 """
344 return True
345
346 @abc.abstractmethod
347 def search(self, query: str, params: "OfflineParamTypes") -> "EngineResults":
348 """Search method of the ``offline`` engines"""
349
350 @abc.abstractmethod
351 def request(self, query: str, params: "OnlineParamTypes") -> None:
352 """Method to build the parameters for the request of an ``online``
353 engine."""
354
355 @abc.abstractmethod
356 def response(self, resp: "SXNG_Response") -> "EngineResults":
357 """Method to parse the response of an ``online`` engine."""
str secret_hash(self, str|bytes name)
Definition __init__.py:175
__init__(self, str engine_name, int|None expire=None)
Definition __init__.py:159
bool set(self, str key, t.Any value, int|None expire=None)
Definition __init__.py:164
t.Any get(self, str key, t.Any default=None)
Definition __init__.py:172
bool|None init(self, dict[str, t.Any] engine_settings)
Definition __init__.py:328
bool setup(self, dict[str, t.Any] engine_settings)
Definition __init__.py:312
"EngineResults" response(self, "SXNG_Response" resp)
Definition __init__.py:356
None request(self, str query, "OnlineParamTypes" params)
Definition __init__.py:351
maintenance(bool force=True)
Definition __init__.py:74