.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
__init__.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Load and initialize the ``engines``, see :py:func:`load_engines` and register
3:py:obj:`engine_shortcuts`.
4
5usage::
6
7 load_engines( settings['engines'] )
8
9"""
10
11import typing as t
12
13import sys
14import copy
15from os.path import realpath, dirname
16
17import types
18import inspect
19
20from searx import logger, settings
21from searx.utils import load_module
22
23if t.TYPE_CHECKING:
24 from searx.enginelib import Engine
25
26logger = logger.getChild('engines')
27ENGINE_DIR = dirname(realpath(__file__))
28
29# Defaults for the namespace of an engine module, see load_engine()
30ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool] = {
31 # Common options in the engine module
32 "engine_type": "online",
33 "paging": False,
34 "time_range_support": False,
35 "safesearch": False,
36 # settings.yml
37 "categories": ["general"],
38 "enable_http": False,
39 "shortcut": "-",
40 "timeout": settings["outgoing"]["request_timeout"],
41 "display_error_messages": True,
42 "disabled": False,
43 "inactive": False,
44 "about": {},
45 "using_tor_proxy": False,
46 "send_accept_language_header": False,
47 "tokens": [],
48 "max_page": 0,
49}
50# set automatically when an engine does not have any tab category
51DEFAULT_CATEGORY = 'other'
52
53categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []}
54
55engines: "dict[str, Engine | types.ModuleType]" = {}
56"""Global registered engine instances."""
57
58engine_shortcuts = {}
59"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
60
61::
62
63 engine_shortcuts[engine.shortcut] = engine.name
64
65:meta hide-value:
66"""
67
68
69def check_engine_module(module: types.ModuleType):
70 # probe unintentional name collisions / for example name collisions caused
71 # by import statements in the engine module ..
72
73 # network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
74 obj = getattr(module, 'network', None)
75 if obj and inspect.ismodule(obj):
76 msg = f'type of {module.__name__}.network is a module ({obj.__name__}), expected a string'
77 # logger.error(msg)
78 raise TypeError(msg)
79
80
81def load_engine(engine_data: dict[str, t.Any]) -> "Engine | types.ModuleType | None":
82 """Load engine from ``engine_data``.
83
84 :param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
85 :return: initialized namespace of the ``<engine>``.
86
87 1. create a namespace and load module of the ``<engine>``
88 2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS`
89 3. update namespace with values from ``engine_data``
90
91 If engine *is active*, return namespace of the engine, otherwise return
92 ``None``.
93
94 This function also returns ``None`` if initialization of the namespace fails
95 for one of the following reasons:
96
97 - engine name contains underscore
98 - engine name is not lowercase
99 - required attribute is not set :py:func:`is_missing_required_attributes`
100
101 """
102 # pylint: disable=too-many-return-statements
103
104 engine_name = engine_data.get('name')
105 if engine_name is None:
106 logger.error('An engine does not have a "name" field')
107 return None
108 if '_' in engine_name:
109 logger.error('Engine name contains underscore: "{}"'.format(engine_name))
110 return None
111
112 if engine_name.lower() != engine_name:
113 logger.warning('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
114 engine_name = engine_name.lower()
115 engine_data['name'] = engine_name
116
117 # load_module
118 module_name = engine_data.get('engine')
119 if module_name is None:
120 logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name))
121 return None
122 try:
123 engine = load_module(module_name + '.py', ENGINE_DIR)
124 except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
125 logger.exception('Fatal exception in engine "{}"'.format(module_name))
126 sys.exit(1)
127 except BaseException:
128 logger.exception('Cannot load engine "{}"'.format(module_name))
129 return None
130
131 check_engine_module(engine)
132 update_engine_attributes(engine, engine_data)
134
135 # avoid cyclic imports
136 # pylint: disable=import-outside-toplevel
137 from searx.enginelib.traits import EngineTraitsMap
138
139 trait_map = EngineTraitsMap.from_data()
140 trait_map.set_traits(engine)
141
142 if not is_engine_active(engine):
143 return None
144
146 return None
147
148 set_loggers(engine, engine_name)
149
150 if not call_engine_setup(engine, engine_data):
151 return None
152
153 if not any(cat in settings['categories_as_tabs'] for cat in engine.categories):
154 engine.categories.append(DEFAULT_CATEGORY)
155
156 return engine
157
158
159def set_loggers(engine: "Engine|types.ModuleType", engine_name: str):
160 # set the logger for engine
161 engine.logger = logger.getChild(engine_name)
162 # the engine may have load some other engines
163 # may sure the logger is initialized
164 # use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"
165 # see https://github.com/python/cpython/issues/89516
166 # and https://docs.python.org/3.10/library/sys.html#sys.modules
167 modules = sys.modules.copy()
168 for module_name, module in modules.items():
169 if (
170 module_name.startswith("searx.engines")
171 and module_name != "searx.engines.__init__"
172 and not hasattr(module, "logger")
173 ):
174 module_engine_name = module_name.split(".")[-1]
175 module.logger = logger.getChild(module_engine_name) # type: ignore
176
177
178def update_engine_attributes(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]):
179 # set engine attributes from engine_data
180 for param_name, param_value in engine_data.items():
181 if param_name == 'categories':
182 if isinstance(param_value, str):
183 param_value = list(map(str.strip, param_value.split(',')))
184 engine.categories = param_value # type: ignore
185 elif hasattr(engine, 'about') and param_name == 'about':
186 engine.about = {**engine.about, **engine_data['about']} # type: ignore
187 else:
188 setattr(engine, param_name, param_value)
189
190 # set default attributes
191 for arg_name, arg_value in ENGINE_DEFAULT_ARGS.items():
192 if not hasattr(engine, arg_name):
193 setattr(engine, arg_name, copy.deepcopy(arg_value))
194
195
196def update_attributes_for_tor(engine: "Engine | types.ModuleType"):
197 if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
198 engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
199 engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
200
201
202def is_missing_required_attributes(engine: "Engine | types.ModuleType"):
203 """An attribute is required when its name doesn't start with ``_`` (underline).
204 Required attributes must not be ``None``.
205
206 """
207 missing = False
208 for engine_attr in dir(engine):
209 if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None:
210 logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr))
211 missing = True
212 return missing
213
214
215def using_tor_proxy(engine: "Engine | types.ModuleType"):
216 """Return True if the engine configuration declares to use Tor."""
217 return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
218
219
220def is_engine_active(engine: "Engine | types.ModuleType"):
221 # check if engine is inactive
222 if engine.inactive is True:
223 return False
224
225 # exclude onion engines if not using tor
226 if 'onions' in engine.categories and not using_tor_proxy(engine):
227 return False
228
229 return True
230
231
232def call_engine_setup(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]) -> bool:
233 setup_ok = False
234 setup_func = getattr(engine, "setup", None)
235
236 if setup_func is None:
237 setup_ok = True
238 elif not callable(setup_func):
239 logger.error("engine's setup method isn't a callable (is of type: %s)", type(setup_func))
240 else:
241 try:
242 setup_ok = engine.setup(engine_data)
243 except Exception as e: # pylint: disable=broad-except
244 logger.exception('exception : {0}'.format(e))
245
246 if not setup_ok:
247 logger.error("%s: Engine setup was not successful, engine is set to inactive.", engine.name)
248 return setup_ok
249
250
251def register_engine(engine: "Engine | types.ModuleType"):
252 if engine.name in engines:
253 logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
254 sys.exit(1)
255 engines[engine.name] = engine
256
257 if engine.shortcut in engine_shortcuts:
258 logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut))
259 sys.exit(1)
260 engine_shortcuts[engine.shortcut] = engine.name
261
262 for category_name in engine.categories:
263 categories.setdefault(category_name, []).append(engine)
264
265
266def load_engines(engine_list: list[dict[str, t.Any]]):
267 """usage: ``engine_list = settings['engines']``"""
268 engines.clear()
269 engine_shortcuts.clear()
270 categories.clear()
271 categories['general'] = []
272 for engine_data in engine_list:
273 engine = load_engine(engine_data)
274 if engine:
275 register_engine(engine)
276 return engines
using_tor_proxy("Engine | types.ModuleType" engine)
Definition __init__.py:215
is_engine_active("Engine | types.ModuleType" engine)
Definition __init__.py:220
"Engine | types.ModuleType | None" load_engine(dict[str, t.Any] engine_data)
Definition __init__.py:81
update_engine_attributes("Engine | types.ModuleType" engine, dict[str, t.Any] engine_data)
Definition __init__.py:178
is_missing_required_attributes("Engine | types.ModuleType" engine)
Definition __init__.py:202
update_attributes_for_tor("Engine | types.ModuleType" engine)
Definition __init__.py:196
bool call_engine_setup("Engine | types.ModuleType" engine, dict[str, t.Any] engine_data)
Definition __init__.py:232
check_engine_module(types.ModuleType module)
Definition __init__.py:69
load_engines(list[dict[str, t.Any]] engine_list)
Definition __init__.py:266
register_engine("Engine | types.ModuleType" engine)
Definition __init__.py:251
set_loggers("Engine|types.ModuleType" engine, str engine_name)
Definition __init__.py:159