.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
__init__.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Implementations of the framework for the SearXNG engines.
3
4- :py:obj:`searx.enginelib.EngineCache`
5- :py:obj:`searx.enginelib.Engine`
6- :py:obj:`searx.enginelib.traits`
7
8There is a command line for developer purposes and for deeper analysis. Here is
9an example in which the command line is called in the development environment::
10
11 $ ./manage pyenv.cmd bash --norc --noprofile
12 (py3) python -m searx.enginelib --help
13
14.. hint::
15
16 The long term goal is to modularize all implementations of the engine
17 framework here in this Python package. ToDo:
18
19 - move implementations of the :ref:`searx.engines loader` to a new module in
20 the :py:obj:`searx.enginelib` namespace.
21
22-----
23
24"""
25
26__all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
27
28import typing as t
29import abc
30from collections.abc import Callable
31import logging
32import string
33import typer
34
35from ..cache import ExpireCacheSQLite, ExpireCacheCfg
36
37if t.TYPE_CHECKING:
38 from searx.enginelib import traits
39 from searx.enginelib.traits import EngineTraits
40 from searx.extended_types import SXNG_Response
41 from searx.result_types import EngineResults
42
43ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache(
45 name="ENGINES_CACHE",
46 MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
47 MAINTENANCE_PERIOD=60 * 60, # 2h
48 )
49)
50"""Global :py:obj:`searx.cache.ExpireCacheSQLite` instance where the cached
51values from all engines are stored. The `MAXHOLD_TIME` is 7 days and the
52`MAINTENANCE_PERIOD` is set to two hours."""
53
54app = typer.Typer()
55
56
57@app.command()
58def state():
59 """Show state for the caches of the engines."""
60
61 title = "cache tables and key/values"
62 print(title)
63 print("=" * len(title))
64 print(ENGINES_CACHE.state().report())
65 print()
66 title = f"properties of {ENGINES_CACHE.cfg.name}"
67 print(title)
68 print("=" * len(title))
69 print(str(ENGINES_CACHE.properties))
70
71
72@app.command()
73def maintenance(force: bool = True):
74 """Carry out maintenance on cache of the engines."""
75 ENGINES_CACHE.maintenance(force=force)
76
77
79 """Persistent (SQLite) key/value cache that deletes its values again after
80 ``expire`` seconds (default/max: :py:obj:`MAXHOLD_TIME
81 <searx.cache.ExpireCacheCfg.MAXHOLD_TIME>`). This class is a wrapper around
82 :py:obj:`ENGINES_CACHE` (:py:obj:`ExpireCacheSQLite
83 <searx.cache.ExpireCacheSQLite>`).
84
85 In the :origin:`searx/engines/demo_offline.py` engine you can find an
86 exemplary implementation of such a cache other examples are implemented
87 in:
88
89 - :origin:`searx/engines/radio_browser.py`
90 - :origin:`searx/engines/soundcloud.py`
91 - :origin:`searx/engines/startpage.py`
92
93 .. code: python
94
95 from searx.enginelib import EngineCache
96 CACHE: EngineCache
97
98 def init(engine_settings):
99 global CACHE
100 CACHE = EngineCache(engine_settings["name"])
101
102 def request(query, params):
103 token = CACHE.get(key="token")
104 if token is None:
105 token = get_token()
106 # cache token of this engine for 1h
107 CACHE.set(key="token", value=token, expire=3600)
108 ...
109
110 For introspection of the DB, jump into developer environment and run command to
111 show cache state::
112
113 $ ./manage pyenv.cmd bash --norc --noprofile
114 (py3) python -m searx.enginelib cache state
115
116 cache tables and key/values
117 ===========================
118 [demo_offline ] 2025-04-22 11:32:50 count --> (int) 4
119 [startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20
120 [duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325
121 [duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451
122 [radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...]
123 [soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp
124 [wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7
125 number of tables: 6
126 number of key/value pairs: 7
127
128 In the "cache tables and key/values" section, the table name (engine name) is at
129 first position on the second there is the calculated expire date and on the
130 third and fourth position the key/value is shown.
131
132 About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore
133 the key is a hash value of the query term (to not to store the raw query term).
134
135 In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl /
136 ExpireCache and their last modification date are shown::
137
138 properties of ENGINES_CACHE
139 ===========================
140 [last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1
141 [last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE :
142 [last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe...
143 [last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline
144 [last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage
145 [last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo
146 [last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser
147 [last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud
148 [last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha
149
150 These properties provide information about the state of the ExpireCache and
151 control the behavior. For example, the maintenance intervals are controlled by
152 the last modification date of the LAST_MAINTENANCE property and the hash value
153 of the password can be used to detect whether the password has been changed (in
154 this case the DB entries can no longer be decrypted and the entire cache must be
155 discarded).
156 """
157
158 def __init__(self, engine_name: str, expire: int | None = None):
159 self.expire: int = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
160 _valid = "-_." + string.ascii_letters + string.digits
161 self.table_name: str = "".join([c if c in _valid else "_" for c in engine_name])
162
163 def set(self, key: str, value: t.Any, expire: int | None = None) -> bool:
164 return ENGINES_CACHE.set(
165 key=key,
166 value=value,
167 expire=expire or self.expire,
168 ctx=self.table_name,
169 )
170
171 def get(self, key: str, default: t.Any = None) -> t.Any:
172 return ENGINES_CACHE.get(key, default=default, ctx=self.table_name)
173
174 def secret_hash(self, name: str | bytes) -> str:
175 return ENGINES_CACHE.secret_hash(name=name)
176
177
178class Engine(abc.ABC): # pylint: disable=too-few-public-methods
179 """Class of engine instances build from YAML settings.
180
181 Further documentation see :ref:`general engine configuration`.
182
183 .. hint::
184
185 This class is currently never initialized and only used for type hinting.
186 """
187
188 logger: logging.Logger
189
190 # Common options in the engine module
191
192 engine_type: str
193 """Type of the engine (:ref:`searx.search.processors`)"""
194
195 paging: bool
196 """Engine supports multiple pages."""
197
198 time_range_support: bool
199 """Engine supports search time range."""
200
201 safesearch: bool
202 """Engine supports SafeSearch"""
203
204 language_support: bool
205 """Engine supports languages (locales) search."""
206
207 language: str
208 """For an engine, when there is ``language: ...`` in the YAML settings the engine
209 does support only this one language:
210
211 .. code:: yaml
212
213 - name: google french
214 engine: google
215 language: fr
216 """
217
218 region: str
219 """For an engine, when there is ``region: ...`` in the YAML settings the engine
220 does support only this one region::
221
222 .. code:: yaml
223
224 - name: google belgium
225 engine: google
226 region: fr-BE
227 """
228
229 fetch_traits: "Callable[[EngineTraits, bool], None]"
230 """Function to to fetch engine's traits from origin."""
231
232 traits: "traits.EngineTraits"
233 """Traits of the engine."""
234
235 # settings.yml
236
237 categories: list[str]
238 """Specifies to which :ref:`engine categories` the engine should be added."""
239
240 name: str
241 """Name that will be used across SearXNG to define this engine. In settings, on
242 the result page .."""
243
244 engine: str
245 """Name of the python file used to handle requests and responses to and from
246 this search engine (file name from :origin:`searx/engines` without
247 ``.py``)."""
248
249 enable_http: bool
250 """Enable HTTP (by default only HTTPS is enabled)."""
251
252 shortcut: str
253 """Code used to execute bang requests (``!foo``)"""
254
255 timeout: float
256 """Specific timeout for search-engine."""
257
258 display_error_messages: bool
259 """Display error messages on the web UI."""
260
261 proxies: dict[str, dict[str, str]]
262 """Set proxies for a specific engine (YAML):
263
264 .. code:: yaml
265
266 proxies :
267 http: socks5://proxy:port
268 https: socks5://proxy:port
269 """
270
271 disabled: bool
272 """To disable by default the engine, but not deleting it. It will allow the
273 user to manually activate it in the settings."""
274
275 inactive: bool
276 """Remove the engine from the settings (*disabled & removed*)."""
277
278 about: dict[str, dict[str, str]]
279 """Additional fields describing the engine.
280
281 .. code:: yaml
282
283 about:
284 website: https://example.com
285 wikidata_id: Q306656
286 official_api_documentation: https://example.com/api-doc
287 use_official_api: true
288 require_api_key: true
289 results: HTML
290 """
291
292 using_tor_proxy: bool
293 """Using tor proxy (``true``) or not (``false``) for this engine."""
294
295 send_accept_language_header: bool
296 """When this option is activated, the language (locale) that is selected by
297 the user is used to build and send a ``Accept-Language`` header in the
298 request to the origin search engine."""
299
300 tokens: list[str]
301 """A list of secret tokens to make this engine *private*, more details see
302 :ref:`private engines`."""
303
304 weight: int
305 """Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
306
307 def init(self, engine_settings: dict[str, t.Any]) -> None: # pyright: ignore[reportUnusedParameter]
308 """Initialization of the engine. If no initialization is needed, drop
309 this init function."""
310
311 @abc.abstractmethod
312 def request(self, query: str, params: dict[str, t.Any]) -> None:
313 """Build up the params for the online request."""
314
315 @abc.abstractmethod
316 def response(self, resp: "SXNG_Response") -> "EngineResults":
317 """Parse out the result items from the response."""
str secret_hash(self, str|bytes name)
Definition __init__.py:174
__init__(self, str engine_name, int|None expire=None)
Definition __init__.py:158
bool set(self, str key, t.Any value, int|None expire=None)
Definition __init__.py:163
t.Any get(self, str key, t.Any default=None)
Definition __init__.py:171
"EngineResults" response(self, "SXNG_Response" resp)
Definition __init__.py:316
None init(self, dict[str, t.Any] engine_settings)
Definition __init__.py:307
None request(self, str query, dict[str, t.Any] params)
Definition __init__.py:312
maintenance(bool force=True)
Definition __init__.py:73