.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
query.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=invalid-name, missing-module-docstring, missing-class-docstring
3
4from __future__ import annotations
5from abc import abstractmethod, ABC
6import re
7
8from searx import settings
9from searx.sxng_locales import sxng_locales
10from searx.engines import categories, engines, engine_shortcuts
11from searx.external_bang import get_bang_definition_and_autocomplete
12from searx.search import EngineRef
13from searx.webutils import VALID_LANGUAGE_CODE
14
15
16class QueryPartParser(ABC):
17
18 __slots__ = "raw_text_query", "enable_autocomplete"
19
20 @staticmethod
21 @abstractmethod
22 def check(raw_value):
23 """Check if raw_value can be parsed"""
24
25 def __init__(self, raw_text_query, enable_autocomplete):
26 self.raw_text_query = raw_text_query
27 self.enable_autocomplete = enable_autocomplete
28
29 @abstractmethod
30 def __call__(self, raw_value):
31 """Try to parse raw_value: set the self.raw_text_query properties
32
33 return True if raw_value has been parsed
34
35 self.raw_text_query.autocomplete_list is also modified
36 if self.enable_autocomplete is True
37 """
38
39 def _add_autocomplete(self, value):
40 if value not in self.raw_text_query.autocomplete_list:
41 self.raw_text_query.autocomplete_list.append(value)
42
43
45 @staticmethod
46 def check(raw_value):
47 return raw_value[0] == '<'
48
49 def __call__(self, raw_value):
50 value = raw_value[1:]
51 found = self._parse(value) if len(value) > 0 else False
52 if self.enable_autocomplete and not value:
53 self._autocomplete()
54 return found
55
56 def _parse(self, value):
57 if not value.isdigit():
58 return False
59 raw_timeout_limit = int(value)
60 if raw_timeout_limit < 100:
61 # below 100, the unit is the second ( <3 = 3 seconds timeout )
62 self.raw_text_query.timeout_limit = float(raw_timeout_limit)
63 else:
64 # 100 or above, the unit is the millisecond ( <850 = 850 milliseconds timeout )
65 self.raw_text_query.timeout_limit = raw_timeout_limit / 1000.0
66 return True
67
68 def _autocomplete(self):
69 for suggestion in ['<3', '<850']:
70 self._add_autocomplete(suggestion)
71
72
74 @staticmethod
75 def check(raw_value):
76 return raw_value[0] == ':'
77
78 def __call__(self, raw_value):
79 value = raw_value[1:].lower().replace('_', '-')
80 found = self._parse(value) if len(value) > 0 else False
81 if self.enable_autocomplete and not found:
82 self._autocomplete(value)
83 return found
84
85 def _parse(self, value):
86 found = False
87 # check if any language-code is equal with
88 # declared language-codes
89 for lc in sxng_locales:
90 lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
91
92 # if correct language-code is found
93 # set it as new search-language
94
95 if (
96 value == lang_id or value == lang_name or value == english_name or value.replace('-', ' ') == country
97 ) and value not in self.raw_text_query.languages:
98 found = True
99 lang_parts = lang_id.split('-')
100 if len(lang_parts) == 2:
101 self.raw_text_query.languages.append(lang_parts[0] + '-' + lang_parts[1].upper())
102 else:
103 self.raw_text_query.languages.append(lang_id)
104 # to ensure best match (first match is not necessarily the best one)
105 if value == lang_id:
106 break
107
108 # user may set a valid, yet not selectable language
109 if VALID_LANGUAGE_CODE.match(value) or value == 'auto':
110 lang_parts = value.split('-')
111 if len(lang_parts) > 1:
112 value = lang_parts[0].lower() + '-' + lang_parts[1].upper()
113 if value not in self.raw_text_query.languages:
114 self.raw_text_query.languages.append(value)
115 found = True
116
117 return found
118
119 def _autocomplete(self, value):
120 if not value:
121 # show some example queries
122 if len(settings['search']['languages']) < 10:
123 for lang in settings['search']['languages']:
124 self.raw_text_query.autocomplete_list.append(':' + lang)
125 else:
126 for lang in [":en", ":en_us", ":english", ":united_kingdom"]:
127 self.raw_text_query.autocomplete_list.append(lang)
128 return
129
130 for lc in sxng_locales:
131 if lc[0] not in settings['search']['languages']:
132 continue
133 lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
134
135 # check if query starts with language-id
136 if lang_id.startswith(value):
137 if len(value) <= 2:
138 self._add_autocomplete(':' + lang_id.split('-')[0])
139 else:
140 self._add_autocomplete(':' + lang_id)
141
142 # check if query starts with language name
143 if lang_name.startswith(value) or english_name.startswith(value):
144 self._add_autocomplete(':' + lang_name)
145
146 # check if query starts with country
147 # here "new_zealand" is "new-zealand" (see __call__)
148 if country.startswith(value.replace('-', ' ')):
149 self._add_autocomplete(':' + country.replace(' ', '_'))
150
151
153 @staticmethod
154 def check(raw_value):
155 return raw_value.startswith('!!') and len(raw_value) > 2
156
157 def __call__(self, raw_value):
158 value = raw_value[2:]
159 found, bang_ac_list = self._parse(value) if len(value) > 0 else (False, [])
161 self._autocomplete(bang_ac_list)
162 return found
163
164 def _parse(self, value):
165 found = False
166 bang_definition, bang_ac_list = get_bang_definition_and_autocomplete(value)
167 if bang_definition is not None:
168 self.raw_text_query.external_bang = value
169 found = True
170 return found, bang_ac_list
171
172 def _autocomplete(self, bang_ac_list):
173 if not bang_ac_list:
174 bang_ac_list = ['g', 'ddg', 'bing']
175 for external_bang in bang_ac_list:
176 self._add_autocomplete('!!' + external_bang)
177
178
180 @staticmethod
181 def check(raw_value):
182 # make sure it's not any bang with double '!!'
183 return raw_value[0] == '!' and (len(raw_value) < 2 or raw_value[1] != '!')
184
185 def __call__(self, raw_value):
186 value = raw_value[1:].replace('-', ' ').replace('_', ' ')
187 found = self._parse(value) if len(value) > 0 else False
188 if found and raw_value[0] == '!':
189 self.raw_text_query.specific = True
191 self._autocomplete(raw_value[0], value)
192 return found
193
194 def _parse(self, value):
195 # check if prefix is equal with engine shortcut
196 if value in engine_shortcuts: # pylint: disable=consider-using-get
197 value = engine_shortcuts[value]
198
199 # check if prefix is equal with engine name
200 if value in engines:
201 self.raw_text_query.enginerefs.append(EngineRef(value, 'none'))
202 return True
203
204 # check if prefix is equal with category name
205 if value in categories:
206 # using all engines for that search, which
207 # are declared under that category name
208 self.raw_text_query.enginerefs.extend(
209 EngineRef(engine.name, value)
210 for engine in categories[value]
211 if (engine.name, value) not in self.raw_text_query.disabled_engines
212 )
213 return True
214
215 return False
216
217 def _autocomplete(self, first_char, value):
218 if not value:
219 # show some example queries
220 for suggestion in ['images', 'wikipedia', 'osm']:
221 if suggestion not in self.raw_text_query.disabled_engines or suggestion in categories:
222 self._add_autocomplete(first_char + suggestion)
223 return
224
225 # check if query starts with category name
226 for category in categories:
227 if category.startswith(value):
228 self._add_autocomplete(first_char + category.replace(' ', '_'))
229
230 # check if query starts with engine name
231 for engine in engines:
232 if engine.startswith(value):
233 self._add_autocomplete(first_char + engine.replace(' ', '_'))
234
235 # check if query starts with engine shortcut
236 for engine_shortcut in engine_shortcuts:
237 if engine_shortcut.startswith(value):
238 self._add_autocomplete(first_char + engine_shortcut)
239
240
242 @staticmethod
243 def check(raw_value):
244 return raw_value == '!!'
245
246 def __call__(self, raw_value):
247 self.raw_text_query.redirect_to_first_result = True
248 return True
249
250
252 """parse raw text query (the value from the html input)"""
253
254 PARSER_CLASSES = [
255 TimeoutParser, # force the timeout
256 LanguageParser, # force a language
257 ExternalBangParser, # external bang (must be before BangParser)
258 BangParser, # force an engine or category
259 FeelingLuckyParser, # redirect to the first link in the results list
260 ]
261
262 def __init__(self, query: str, disabled_engines: list):
263 assert isinstance(query, str)
264 # input parameters
265 self.query = query
266 self.disabled_engines = disabled_engines if disabled_engines else []
267 # parsed values
268 self.enginerefs = []
269 self.languages = []
270 self.timeout_limit = None
271 self.external_bang = None
272 self.specific = False
274 # internal properties
275 self.query_parts = [] # use self.getFullQuery()
276 self.user_query_parts = [] # use self.getQuery()
279 self._parse_query()
280
281 def _parse_query(self):
282 """
283 parse self.query, if tags are set, which
284 change the search engine or search-language
285 """
286
287 # split query, including whitespaces
288 raw_query_parts = re.split(r'(\s+)', self.query)
289
290 last_index_location = None
291 autocomplete_index = len(raw_query_parts) - 1
292
293 for i, query_part in enumerate(raw_query_parts):
294 # part does only contain spaces, skip
295 if query_part.isspace() or query_part == '':
296 continue
297
298 # parse special commands
299 special_part = False
300 for parser_class in RawTextQuery.PARSER_CLASSES:
301 if parser_class.check(query_part):
302 special_part = parser_class(self, i == autocomplete_index)(query_part)
303 break
304
305 # append query part to query_part list
306 qlist = self.query_parts if special_part else self.user_query_parts
307 qlist.append(query_part)
308 last_index_location = (qlist, len(qlist) - 1)
309
310 self.autocomplete_location = last_index_location
311
313 qlist, position = self.autocomplete_location
314 qlist[position] = text
315 return self.getFullQuery()
316
317 def changeQuery(self, query):
318 self.user_query_parts = query.strip().split()
319 self.query = self.getFullQuery()
320 self.autocomplete_location = (self.user_query_parts, len(self.user_query_parts) - 1)
321 self.autocomplete_list = []
322 return self
323
324 def getQuery(self):
325 return ' '.join(self.user_query_parts)
326
327 def getFullQuery(self):
328 """
329 get full query including whitespaces
330 """
331 return '{0} {1}'.format(' '.join(self.query_parts), self.getQuery()).strip()
332
333 def __str__(self):
334 return self.getFullQuery()
335
336 def __repr__(self):
337 return (
338 f"<{self.__class__.__name__} "
339 + f"query={self.query!r} "
340 + f"disabled_engines={self.disabled_engines!r}\n "
341 + f"languages={self.languages!r} "
342 + f"timeout_limit={self.timeout_limit!r} "
343 + f"external_bang={self.external_bang!r} "
344 + f"specific={self.specific!r} "
345 + f"enginerefs={self.enginerefs!r}\n "
346 + f"autocomplete_list={self.autocomplete_list!r}\n "
347 + f"query_parts={self.query_parts!r}\n "
348 + f"user_query_parts={self.user_query_parts!r} >\n"
349 + f"redirect_to_first_result={self.redirect_to_first_result!r}"
350 )
_parse(self, value)
Definition query.py:194
check(raw_value)
Definition query.py:181
_autocomplete(self, first_char, value)
Definition query.py:217
__call__(self, raw_value)
Definition query.py:185
_autocomplete(self, bang_ac_list)
Definition query.py:172
__call__(self, raw_value)
Definition query.py:157
__call__(self, raw_value)
Definition query.py:246
_autocomplete(self, value)
Definition query.py:119
_parse(self, value)
Definition query.py:85
__call__(self, raw_value)
Definition query.py:78
_add_autocomplete(self, value)
Definition query.py:39
__call__(self, raw_value)
Definition query.py:30
__init__(self, raw_text_query, enable_autocomplete)
Definition query.py:25
changeQuery(self, query)
Definition query.py:317
__init__(self, str query, list disabled_engines)
Definition query.py:262
get_autocomplete_full_query(self, text)
Definition query.py:312
__call__(self, raw_value)
Definition query.py:49
_parse(self, value)
Definition query.py:56
::1337x
Definition 1337x.py:1