.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
__init__.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Load and initialize the ``engines``, see :py:func:`load_engines` and register
3:py:obj:`engine_shortcuts`.
4
5usage::
6
7 load_engines( settings['engines'] )
8
9"""
10
11import typing as t
12
13import sys
14import copy
15from os.path import realpath, dirname
16
17import types
18import inspect
19
20from searx import logger, settings
21from searx.utils import load_module
22
23if t.TYPE_CHECKING:
24 from searx.enginelib import Engine
25
26logger = logger.getChild('engines')
27ENGINE_DIR = dirname(realpath(__file__))
28
29# Defaults for the namespace of an engine module, see load_engine()
30ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool] = {
31 # Common options in the engine module
32 "engine_type": "online",
33 "paging": False,
34 "time_range_support": False,
35 "safesearch": False,
36 # settings.yml
37 "categories": ["general"],
38 "enable_http": False,
39 "shortcut": "-",
40 "timeout": settings["outgoing"]["request_timeout"],
41 "display_error_messages": True,
42 "disabled": False,
43 "inactive": False,
44 "about": {},
45 "using_tor_proxy": False,
46 "send_accept_language_header": False,
47 "tokens": [],
48 "max_page": 0,
49}
50# set automatically when an engine does not have any tab category
51DEFAULT_CATEGORY = 'other'
52
53categories: "dict[str, list[Engine|types.ModuleType]]" = {'general': []}
54engines: "dict[str, Engine | types.ModuleType]" = {}
55engine_shortcuts = {}
56"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
57
58::
59
60 engine_shortcuts[engine.shortcut] = engine.name
61
62:meta hide-value:
63"""
64
65
66def check_engine_module(module: types.ModuleType):
67 # probe unintentional name collisions / for example name collisions caused
68 # by import statements in the engine module ..
69
70 # network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
71 obj = getattr(module, 'network', None)
72 if obj and inspect.ismodule(obj):
73 msg = f'type of {module.__name__}.network is a module ({obj.__name__}), expected a string'
74 # logger.error(msg)
75 raise TypeError(msg)
76
77
78def load_engine(engine_data: dict[str, t.Any]) -> "Engine | types.ModuleType | None":
79 """Load engine from ``engine_data``.
80
81 :param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
82 :return: initialized namespace of the ``<engine>``.
83
84 1. create a namespace and load module of the ``<engine>``
85 2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS`
86 3. update namespace with values from ``engine_data``
87
88 If engine *is active*, return namespace of the engine, otherwise return
89 ``None``.
90
91 This function also returns ``None`` if initialization of the namespace fails
92 for one of the following reasons:
93
94 - engine name contains underscore
95 - engine name is not lowercase
96 - required attribute is not set :py:func:`is_missing_required_attributes`
97
98 """
99 # pylint: disable=too-many-return-statements
100
101 engine_name = engine_data.get('name')
102 if engine_name is None:
103 logger.error('An engine does not have a "name" field')
104 return None
105 if '_' in engine_name:
106 logger.error('Engine name contains underscore: "{}"'.format(engine_name))
107 return None
108
109 if engine_name.lower() != engine_name:
110 logger.warning('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
111 engine_name = engine_name.lower()
112 engine_data['name'] = engine_name
113
114 # load_module
115 module_name = engine_data.get('engine')
116 if module_name is None:
117 logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name))
118 return None
119 try:
120 engine = load_module(module_name + '.py', ENGINE_DIR)
121 except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
122 logger.exception('Fatal exception in engine "{}"'.format(module_name))
123 sys.exit(1)
124 except BaseException:
125 logger.exception('Cannot load engine "{}"'.format(module_name))
126 return None
127
128 check_engine_module(engine)
129 update_engine_attributes(engine, engine_data)
131
132 # avoid cyclic imports
133 # pylint: disable=import-outside-toplevel
134 from searx.enginelib.traits import EngineTraitsMap
135
136 trait_map = EngineTraitsMap.from_data()
137 trait_map.set_traits(engine)
138
139 if not is_engine_active(engine):
140 return None
141
143 return None
144
145 set_loggers(engine, engine_name)
146
147 if not any(cat in settings['categories_as_tabs'] for cat in engine.categories):
148 engine.categories.append(DEFAULT_CATEGORY)
149
150 return engine
151
152
153def set_loggers(engine: "Engine|types.ModuleType", engine_name: str):
154 # set the logger for engine
155 engine.logger = logger.getChild(engine_name)
156 # the engine may have load some other engines
157 # may sure the logger is initialized
158 # use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"
159 # see https://github.com/python/cpython/issues/89516
160 # and https://docs.python.org/3.10/library/sys.html#sys.modules
161 modules = sys.modules.copy()
162 for module_name, module in modules.items():
163 if (
164 module_name.startswith("searx.engines")
165 and module_name != "searx.engines.__init__"
166 and not hasattr(module, "logger")
167 ):
168 module_engine_name = module_name.split(".")[-1]
169 module.logger = logger.getChild(module_engine_name) # type: ignore
170
171
172def update_engine_attributes(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]):
173 # set engine attributes from engine_data
174 for param_name, param_value in engine_data.items():
175 if param_name == 'categories':
176 if isinstance(param_value, str):
177 param_value = list(map(str.strip, param_value.split(',')))
178 engine.categories = param_value # type: ignore
179 elif hasattr(engine, 'about') and param_name == 'about':
180 engine.about = {**engine.about, **engine_data['about']} # type: ignore
181 else:
182 setattr(engine, param_name, param_value)
183
184 # set default attributes
185 for arg_name, arg_value in ENGINE_DEFAULT_ARGS.items():
186 if not hasattr(engine, arg_name):
187 setattr(engine, arg_name, copy.deepcopy(arg_value))
188
189
190def update_attributes_for_tor(engine: "Engine | types.ModuleType"):
191 if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
192 engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
193 engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
194
195
196def is_missing_required_attributes(engine: "Engine | types.ModuleType"):
197 """An attribute is required when its name doesn't start with ``_`` (underline).
198 Required attributes must not be ``None``.
199
200 """
201 missing = False
202 for engine_attr in dir(engine):
203 if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None:
204 logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr))
205 missing = True
206 return missing
207
208
209def using_tor_proxy(engine: "Engine | types.ModuleType"):
210 """Return True if the engine configuration declares to use Tor."""
211 return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
212
213
214def is_engine_active(engine: "Engine | types.ModuleType"):
215 # check if engine is inactive
216 if engine.inactive is True:
217 return False
218
219 # exclude onion engines if not using tor
220 if 'onions' in engine.categories and not using_tor_proxy(engine):
221 return False
222
223 return True
224
225
226def register_engine(engine: "Engine | types.ModuleType"):
227 if engine.name in engines:
228 logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
229 sys.exit(1)
230 engines[engine.name] = engine
231
232 if engine.shortcut in engine_shortcuts:
233 logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut))
234 sys.exit(1)
235 engine_shortcuts[engine.shortcut] = engine.name
236
237 for category_name in engine.categories:
238 categories.setdefault(category_name, []).append(engine)
239
240
241def load_engines(engine_list: list[dict[str, t.Any]]):
242 """usage: ``engine_list = settings['engines']``"""
243 engines.clear()
244 engine_shortcuts.clear()
245 categories.clear()
246 categories['general'] = []
247 for engine_data in engine_list:
248 engine = load_engine(engine_data)
249 if engine:
250 register_engine(engine)
251 return engines
using_tor_proxy("Engine | types.ModuleType" engine)
Definition __init__.py:209
is_engine_active("Engine | types.ModuleType" engine)
Definition __init__.py:214
"Engine | types.ModuleType | None" load_engine(dict[str, t.Any] engine_data)
Definition __init__.py:78
update_engine_attributes("Engine | types.ModuleType" engine, dict[str, t.Any] engine_data)
Definition __init__.py:172
is_missing_required_attributes("Engine | types.ModuleType" engine)
Definition __init__.py:196
update_attributes_for_tor("Engine | types.ModuleType" engine)
Definition __init__.py:190
check_engine_module(types.ModuleType module)
Definition __init__.py:66
load_engines(list[dict[str, t.Any]] engine_list)
Definition __init__.py:241
register_engine("Engine | types.ModuleType" engine)
Definition __init__.py:226
set_loggers("Engine|types.ModuleType" engine, str engine_name)
Definition __init__.py:153