.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
webadapter.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=missing-module-docstring
3
4from collections import defaultdict
5from typing import Dict, List, Optional, Tuple
6from searx.exceptions import SearxParameterException
7from searx.webutils import VALID_LANGUAGE_CODE
8from searx.query import RawTextQuery
9from searx.engines import categories, engines
10from searx.search import SearchQuery, EngineRef
11from searx.preferences import Preferences, is_locked
12from searx.utils import detect_language
13
14
15# remove duplicate queries.
16# HINT: does not fix "!music !soundcloud", because the categories are 'none' and 'music'
17def deduplicate_engineref_list(engineref_list: List[EngineRef]) -> List[EngineRef]:
18 engineref_dict = {q.category + '|' + q.name: q for q in engineref_list}
19 return list(engineref_dict.values())
20
21
23 engineref_list: List[EngineRef], preferences: Preferences
24) -> Tuple[List[EngineRef], List[EngineRef], List[EngineRef]]:
25 """Validate query_engines according to the preferences
26
27 Returns:
28 List[EngineRef]: list of existing engines with a validated token
29 List[EngineRef]: list of unknown engine
30 List[EngineRef]: list of engine with invalid token according to the preferences
31 """
32 valid = []
33 unknown = []
34 no_token = []
35 for engineref in engineref_list:
36 if engineref.name not in engines:
37 unknown.append(engineref)
38 continue
39
40 engine = engines[engineref.name]
41 if not preferences.validate_token(engine):
42 no_token.append(engineref)
43 continue
44
45 valid.append(engineref)
46 return valid, unknown, no_token
47
48
49def parse_pageno(form: Dict[str, str]) -> int:
50 pageno_param = form.get('pageno', '1')
51 if not pageno_param.isdigit() or int(pageno_param) < 1:
52 raise SearxParameterException('pageno', pageno_param)
53 return int(pageno_param)
54
55
56def parse_lang(preferences: Preferences, form: Dict[str, str], raw_text_query: RawTextQuery) -> str:
57 if is_locked('language'):
58 return preferences.get_value('language')
59 # get language
60 # set specific language if set on request, query or preferences
61 # search with multiple languages is not supported (by most engines)
62 if len(raw_text_query.languages):
63 query_lang = raw_text_query.languages[-1]
64 elif 'language' in form:
65 query_lang = form.get('language')
66 else:
67 query_lang = preferences.get_value('language')
68
69 # check language
70 if not VALID_LANGUAGE_CODE.match(query_lang) and query_lang != 'auto':
71 raise SearxParameterException('language', query_lang)
72
73 return query_lang
74
75
76def parse_safesearch(preferences: Preferences, form: Dict[str, str]) -> int:
77 if is_locked('safesearch'):
78 return preferences.get_value('safesearch')
79
80 if 'safesearch' in form:
81 query_safesearch = form.get('safesearch')
82 # first check safesearch
83 if not query_safesearch.isdigit():
84 raise SearxParameterException('safesearch', query_safesearch)
85 query_safesearch = int(query_safesearch)
86 else:
87 query_safesearch = preferences.get_value('safesearch')
88
89 # safesearch : second check
90 if query_safesearch < 0 or query_safesearch > 2:
91 raise SearxParameterException('safesearch', query_safesearch)
92
93 return query_safesearch
94
95
96def parse_time_range(form: Dict[str, str]) -> Optional[str]:
97 query_time_range = form.get('time_range')
98 # check time_range
99 query_time_range = None if query_time_range in ('', 'None') else query_time_range
100 if query_time_range not in (None, 'day', 'week', 'month', 'year'):
101 raise SearxParameterException('time_range', query_time_range)
102 return query_time_range
103
104
105def parse_timeout(form: Dict[str, str], raw_text_query: RawTextQuery) -> Optional[float]:
106 timeout_limit = raw_text_query.timeout_limit
107 if timeout_limit is None:
108 timeout_limit = form.get('timeout_limit')
109
110 if timeout_limit is None or timeout_limit in ['None', '']:
111 return None
112 try:
113 return float(timeout_limit)
114 except ValueError as e:
115 raise SearxParameterException('timeout_limit', timeout_limit) from e
116
117
118def parse_category_form(query_categories: List[str], name: str, value: str) -> None:
119 if name == 'categories':
120 query_categories.extend(categ for categ in map(str.strip, value.split(',')) if categ in categories)
121 elif name.startswith('category_'):
122 category = name[9:]
123
124 # if category is not found in list, skip
125 if category not in categories:
126 return
127
128 if value != 'off':
129 # add category to list
130 query_categories.append(category)
131 elif category in query_categories:
132 # remove category from list if property is set to 'off'
133 query_categories.remove(category)
134
135
136def get_selected_categories(preferences: Preferences, form: Optional[Dict[str, str]]) -> List[str]:
137 selected_categories = []
138
139 if not is_locked('categories') and form is not None:
140 for name, value in form.items():
141 parse_category_form(selected_categories, name, value)
142
143 # if no category is specified for this search,
144 # using user-defined default-configuration which
145 # (is stored in cookie)
146 if not selected_categories:
147 cookie_categories = preferences.get_value('categories')
148 for ccateg in cookie_categories:
149 selected_categories.append(ccateg)
150
151 # if still no category is specified, using general
152 # as default-category
153 if not selected_categories:
154 selected_categories = ['general']
155
156 return selected_categories
157
158
159def get_engineref_from_category_list( # pylint: disable=invalid-name
160 category_list: List[str],
161 disabled_engines: List[str],
162) -> List[EngineRef]:
163 result = []
164 for categ in category_list:
165 result.extend(
166 EngineRef(engine.name, categ)
167 for engine in categories[categ]
168 if (engine.name, categ) not in disabled_engines
169 )
170 return result
171
172
173def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engines: List[str]) -> List[EngineRef]:
174 query_engineref_list = []
175 query_categories = []
176
177 # set categories/engines
178 explicit_engine_list = False
179 if not is_locked('categories'):
180 # parse the form only if the categories are not locked
181 for pd_name, pd in form.items(): # pylint: disable=invalid-name
182 if pd_name == 'engines':
183 pd_engines = [
184 EngineRef(engine_name, engines[engine_name].categories[0])
185 for engine_name in map(str.strip, pd.split(','))
186 if engine_name in engines
187 ]
188 if pd_engines:
189 query_engineref_list.extend(pd_engines)
190 explicit_engine_list = True
191 else:
192 parse_category_form(query_categories, pd_name, pd)
193
194 if explicit_engine_list:
195 # explicit list of engines with the "engines" parameter in the form
196 if query_categories:
197 # add engines from referenced by the "categories" parameter and the "category_*"" parameters
198 query_engineref_list.extend(get_engineref_from_category_list(query_categories, disabled_engines))
199 else:
200 # no "engines" parameters in the form
201 if not query_categories:
202 # and neither "categories" parameter nor "category_*"" parameters in the form
203 # -> get the categories from the preferences (the cookies or the settings)
204 query_categories = get_selected_categories(preferences, None)
205
206 # using all engines for that search, which are
207 # declared under the specific categories
208 query_engineref_list.extend(get_engineref_from_category_list(query_categories, disabled_engines))
209
210 return query_engineref_list
211
212
214 engine_data = defaultdict(dict)
215 for k, v in form.items():
216 if k.startswith("engine_data"):
217 _, engine, key = k.split('-')
218 engine_data[engine][key] = v
219 return engine_data
220
221
223 preferences: Preferences, form: Dict[str, str]
224) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef], str]:
225 """Assemble data from preferences and request.form (from the HTML form) needed
226 in a search query.
227
228 The returned tuple consits of:
229
230 1. instance of :py:obj:`searx.search.SearchQuery`
231 2. instance of :py:obj:`searx.query.RawTextQuery`
232 3. list of :py:obj:`searx.search.EngineRef` instances
233 4. string with the *selected locale* of the query
234
235 About language/locale: if the client selects the alias ``auto`` the
236 ``SearchQuery`` object is build up by the :py:obj:`detected language
237 <searx.utils.detect_language>`. If language recognition does not have a
238 match the language preferred by the :py:obj:`Preferences.client` is used.
239 If client does not have a preference, the default ``all`` is used.
240
241 The *selected locale* in the tuple always represents the selected
242 language/locale and might differ from the language recognition.
243
244 """
245 # no text for the query ?
246 if not form.get('q'):
247 raise SearxParameterException('q', '')
248
249 # set blocked engines
250 disabled_engines = preferences.engines.get_disabled()
251
252 # parse query, if tags are set, which change
253 # the search engine or search-language
254 raw_text_query = RawTextQuery(form['q'], disabled_engines)
255
256 # set query
257 query = raw_text_query.getQuery()
258 query_pageno = parse_pageno(form)
259 query_safesearch = parse_safesearch(preferences, form)
260 query_time_range = parse_time_range(form)
261 query_timeout = parse_timeout(form, raw_text_query)
262 external_bang = raw_text_query.external_bang
263 redirect_to_first_result = raw_text_query.redirect_to_first_result
264 engine_data = parse_engine_data(form)
265
266 query_lang = parse_lang(preferences, form, raw_text_query)
267 selected_locale = query_lang
268
269 if query_lang == 'auto':
270 query_lang = detect_language(query, threshold=0.8, only_search_languages=True)
271 query_lang = query_lang or preferences.client.locale_tag or 'all'
272
273 if not is_locked('categories') and raw_text_query.specific:
274 # if engines are calculated from query,
275 # set categories by using that information
276 query_engineref_list = raw_text_query.enginerefs
277 else:
278 # otherwise, using defined categories to
279 # calculate which engines should be used
280 query_engineref_list = parse_generic(preferences, form, disabled_engines)
281
282 query_engineref_list = deduplicate_engineref_list(query_engineref_list)
283 query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken = validate_engineref_list(
284 query_engineref_list, preferences
285 )
286
287 return (
289 query,
290 query_engineref_list,
291 query_lang,
292 query_safesearch,
293 query_pageno,
294 query_time_range,
295 query_timeout,
296 external_bang=external_bang,
297 engine_data=engine_data,
298 redirect_to_first_result=redirect_to_first_result,
299 ),
300 raw_text_query,
301 query_engineref_list_unknown,
302 query_engineref_list_notoken,
303 selected_locale,
304 )
::1337x
Definition 1337x.py:1
List[EngineRef] parse_generic(Preferences preferences, Dict[str, str] form, List[str] disabled_engines)
None parse_category_form(List[str] query_categories, str name, str value)
parse_engine_data(form)
List[str] get_selected_categories(Preferences preferences, Optional[Dict[str, str]] form)
List[EngineRef] deduplicate_engineref_list(List[EngineRef] engineref_list)
Definition webadapter.py:17
Optional[float] parse_timeout(Dict[str, str] form, RawTextQuery raw_text_query)
List[EngineRef] get_engineref_from_category_list(List[str] category_list, List[str] disabled_engines)
Optional[str] parse_time_range(Dict[str, str] form)
Definition webadapter.py:96
str parse_lang(Preferences preferences, Dict[str, str] form, RawTextQuery raw_text_query)
Definition webadapter.py:56
int parse_pageno(Dict[str, str] form)
Definition webadapter.py:49
int parse_safesearch(Preferences preferences, Dict[str, str] form)
Definition webadapter.py:76
Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef], str] get_search_query_from_webapp(Preferences preferences, Dict[str, str] form)
Tuple[List[EngineRef], List[EngineRef], List[EngineRef]] validate_engineref_list(List[EngineRef] engineref_list, Preferences preferences)
Definition webadapter.py:24