.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
autocomplete.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""This module implements functions needed for the autocompleter.
3
4"""
5# pylint: disable=use-dict-literal
6
7import json
8import html
9import typing as t
10from urllib.parse import urlencode, quote_plus
11
12import lxml.etree
13import lxml.html
14from httpx import HTTPError
15
16from searx import settings
17from searx.engines import (
18 engines,
19 google,
20)
21from searx.network import get as http_get, post as http_post # pyright: ignore[reportUnknownVariableType]
22from searx.exceptions import SearxEngineResponseException
23from searx.utils import extr, gen_useragent
24
25if t.TYPE_CHECKING:
26 from searx.extended_types import SXNG_Response
27
28
29def update_kwargs(**kwargs) -> None: # type: ignore
30 if 'timeout' not in kwargs:
31 kwargs['timeout'] = settings['outgoing']['request_timeout']
32 kwargs['raise_for_httperror'] = True
33
34
35def get(*args, **kwargs) -> "SXNG_Response": # type: ignore
36 update_kwargs(**kwargs) # pyright: ignore[reportUnknownArgumentType]
37 return http_get(*args, **kwargs) # pyright: ignore[reportUnknownArgumentType]
38
39
40def post(*args, **kwargs) -> "SXNG_Response": # type: ignore
41 update_kwargs(**kwargs) # pyright: ignore[reportUnknownArgumentType]
42 return http_post(*args, **kwargs) # pyright: ignore[reportUnknownArgumentType]
43
44
45def baidu(query: str, _sxng_locale: str) -> list[str]:
46 # baidu search autocompleter
47 base_url = "https://www.baidu.com/sugrec?"
48 response = get(base_url + urlencode({'ie': 'utf-8', 'json': 1, 'prod': 'pc', 'wd': query}))
49 results: list[str] = []
50
51 if response.ok:
52 data: dict[str, t.Any] = response.json()
53 if 'g' in data:
54 for item in data['g']:
55 results.append(item['q'])
56 return results
57
58
59def brave(query: str, _sxng_locale: str) -> list[str]:
60 # brave search autocompleter
61 url = 'https://search.brave.com/api/suggest?'
62 url += urlencode({'q': query})
63 country = 'all'
64 kwargs = {'cookies': {'country': country}}
65 resp = get(url, **kwargs)
66 results: list[str] = []
67
68 if resp.ok:
69 data: list[list[str]] = resp.json()
70 for item in data[1]:
71 results.append(item)
72 return results
73
74
75def dbpedia(query: str, _sxng_locale: str) -> list[str]:
76 autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
77 resp = get(autocomplete_url + urlencode(dict(QueryString=query)))
78 results: list[str] = []
79
80 if resp.ok:
81 dom = lxml.etree.fromstring(resp.content)
82 results = [str(x) for x in dom.xpath('//Result/Label//text()')]
83
84 return results
85
86
87def duckduckgo(query: str, sxng_locale: str) -> list[str]:
88 """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
89
90 traits = engines['duckduckgo'].traits
91 args: dict[str, str] = {
92 'q': query,
93 'kl': traits.get_region(sxng_locale, traits.all_locale),
94 }
95
96 url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
97 resp = get(url)
98 results: list[str] = []
99
100 if resp.ok:
101 j = resp.json()
102 if len(j) > 1:
103 results = j[1]
104 return results
105
106
107def google_complete(query: str, sxng_locale: str) -> list[str]:
108 """Autocomplete from Google. Supports Google's languages and subdomains
109 (:py:obj:`searx.engines.google.get_google_info`) by using the async REST
110 API::
111
112 https://{subdomain}/complete/search?{args}
113
114 """
115
116 google_info: dict[str, t.Any] = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
117 url = 'https://{subdomain}/complete/search?{args}'
118 args = urlencode(
119 {
120 'q': query,
121 'client': 'gws-wiz',
122 'hl': google_info['params']['hl'],
123 }
124 )
125 results: list[str] = []
126
127 resp = get(url.format(subdomain=google_info['subdomain'], args=args))
128 if resp and resp.ok:
129 json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
130 data = json.loads(json_txt)
131 for item in data[0]:
132 results.append(lxml.html.fromstring(item[0]).text_content())
133 return results
134
135
136def mwmbl(query: str, _sxng_locale: str) -> list[str]:
137 """Autocomplete from Mwmbl_."""
138
139 # mwmbl autocompleter
140 url = 'https://api.mwmbl.org/search/complete?{query}'
141
142 results: list[str] = get(url.format(query=urlencode({'q': query}))).json()[1]
143
144 # results starting with `go:` are direct urls and not useful for auto completion
145 return [result for result in results if not result.startswith("go: ") and not result.startswith("search: ")]
146
147
148def naver(query: str, _sxng_locale: str) -> list[str]:
149 # Naver search autocompleter
150 url = f"https://ac.search.naver.com/nx/ac?{urlencode({'q': query, 'r_format': 'json', 'st': 0})}"
151 response = get(url)
152 results: list[str] = []
153
154 if response.ok:
155 data: dict[str, t.Any] = response.json()
156 if data.get('items'):
157 for item in data['items'][0]:
158 results.append(item[0])
159 return results
160
161
162def qihu360search(query: str, _sxng_locale: str) -> list[str]:
163 # 360Search search autocompleter
164 url = f"https://sug.so.360.cn/suggest?{urlencode({'format': 'json', 'word': query})}"
165 response = get(url)
166 results: list[str] = []
167
168 if response.ok:
169 data: dict[str, t.Any] = response.json()
170 if 'result' in data:
171 for item in data['result']:
172 results.append(item['word'])
173 return results
174
175
176def quark(query: str, _sxng_locale: str) -> list[str]:
177 # Quark search autocompleter
178 url = f"https://sugs.m.sm.cn/web?{urlencode({'q': query})}"
179 response = get(url)
180 results: list[str] = []
181
182 if response.ok:
183 data = response.json()
184 for item in data.get('r', []):
185 results.append(item['w'])
186 return results
187
188
189def seznam(query: str, _sxng_locale: str) -> list[str]:
190 # seznam search autocompleter
191 url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
192 resp = get(
193 url.format(
194 query=urlencode(
195 {'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
196 )
197 )
198 )
199 results: list[str] = []
200
201 if resp.ok:
202 data = resp.json()
203 results = [
204 ''.join([part.get('text', '') for part in item.get('text', [])])
205 for item in data.get('result', [])
206 if item.get('itemType', None) == 'ItemType.TEXT'
207 ]
208 return results
209
210
211def sogou(query: str, _sxng_locale: str) -> list[str]:
212 # Sogou search autocompleter
213 base_url = "https://sor.html5.qq.com/api/getsug?"
214 resp = get(base_url + urlencode({'m': 'searxng', 'key': query}))
215 results: list[str] = []
216
217 if resp.ok:
218 raw_json = extr(resp.text, "[", "]", default="")
219 try:
220 data = json.loads(f"[{raw_json}]]")
221 results = data[1]
222 except json.JSONDecodeError:
223 pass
224 return results
225
226
227def startpage(query: str, sxng_locale: str) -> list[str]:
228 """Autocomplete from Startpage's Firefox extension.
229 Supports the languages specified in lang_map.
230 """
231
232 lang_map = {
233 'da': 'dansk',
234 'de': 'deutsch',
235 'en': 'english',
236 'es': 'espanol',
237 'fr': 'francais',
238 'nb': 'norsk',
239 'nl': 'nederlands',
240 'pl': 'polski',
241 'pt': 'portugues',
242 'sv': 'svenska',
243 }
244
245 base_lang = sxng_locale.split('-')[0]
246 lui = lang_map.get(base_lang, 'english')
247
248 url_params = {
249 'q': query,
250 'format': 'opensearch',
251 'segment': 'startpage.defaultffx',
252 'lui': lui,
253 }
254 url = f'https://www.startpage.com/suggestions?{urlencode(url_params)}'
255
256 # Needs user agent, returns a 204 otherwise
257 h = {'User-Agent': gen_useragent()}
258
259 resp = get(url, headers=h)
260 results: list[str] = []
261
262 if resp.ok:
263 try:
264 data = resp.json()
265 if len(data) >= 2 and isinstance(data[1], list):
266 results = data[1]
267 except json.JSONDecodeError:
268 pass
269
270 return results
271
272
273def stract(query: str, _sxng_locale: str) -> list[str]:
274 # stract autocompleter (beta)
275 url = f"https://stract.com/beta/api/autosuggest?q={quote_plus(query)}"
276 resp = post(url)
277 results: list[str] = []
278
279 if resp.ok:
280 results = [html.unescape(suggestion['raw']) for suggestion in resp.json()]
281
282 return results
283
284
285def swisscows(query: str, _sxng_locale: str) -> list[str]:
286 # swisscows autocompleter
287 url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
288 results: list[str] = json.loads(get(url.format(query=urlencode({'query': query}))).text)
289 return results
290
291
292def qwant(query: str, sxng_locale: str) -> list[str]:
293 """Autocomplete from Qwant. Supports Qwant's regions."""
294 locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
295 url = 'https://api.qwant.com/v3/suggest?{query}'
296 resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
297 results: list[str] = []
298
299 if resp.ok:
300 data = resp.json()
301 if data['status'] == 'success':
302 for item in data['data']['items']:
303 results.append(item['value'])
304
305 return results
306
307
308def wikipedia(query: str, sxng_locale: str) -> list[str]:
309 """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
310 eng_traits = engines['wikipedia'].traits
311 wiki_lang = eng_traits.get_language(sxng_locale, 'en')
312 wiki_netloc: str = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') # type: ignore
313
314 args = urlencode(
315 {
316 'action': 'opensearch',
317 'format': 'json',
318 'formatversion': '2',
319 'search': query,
320 'namespace': '0',
321 'limit': '10',
322 }
323 )
324 resp = get(f'https://{wiki_netloc}/w/api.php?{args}')
325 results: list[str] = []
326
327 if resp.ok:
328 data = resp.json()
329 if len(data) > 1:
330 results = data[1]
331
332 return results
333
334
335def yandex(query: str, _sxng_locale: str) -> list[str]:
336 # yandex autocompleter
337 url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
338 resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
339 results: list[str] = []
340
341 if len(resp) > 1:
342 results = resp[1]
343 return results
344
345
346backends: dict[str, t.Callable[[str, str], list[str]]] = {
347 '360search': qihu360search,
348 'baidu': baidu,
349 'brave': brave,
350 'dbpedia': dbpedia,
351 'duckduckgo': duckduckgo,
352 'google': google_complete,
353 'mwmbl': mwmbl,
354 'naver': naver,
355 'quark': quark,
356 'qwant': qwant,
357 'seznam': seznam,
358 'sogou': sogou,
359 'startpage': startpage,
360 'stract': stract,
361 'swisscows': swisscows,
362 'wikipedia': wikipedia,
363 'yandex': yandex,
364}
365
366
367def search_autocomplete(backend_name: str, query: str, sxng_locale: str) -> list[str]:
368 backend = backends.get(backend_name)
369 if backend is None:
370 return []
371 try:
372 return backend(query, sxng_locale)
373 except (HTTPError, SearxEngineResponseException):
374 return []
list[str] google_complete(str query, str sxng_locale)
"SXNG_Response" post(*args, **kwargs)
list[str] swisscows(str query, str _sxng_locale)
list[str] wikipedia(str query, str sxng_locale)
list[str] stract(str query, str _sxng_locale)
list[str] seznam(str query, str _sxng_locale)
list[str] yandex(str query, str _sxng_locale)
list[str] naver(str query, str _sxng_locale)
list[str] baidu(str query, str _sxng_locale)
list[str] qihu360search(str query, str _sxng_locale)
list[str] qwant(str query, str sxng_locale)
list[str] brave(str query, str _sxng_locale)
list[str] mwmbl(str query, str _sxng_locale)
list[str] dbpedia(str query, str _sxng_locale)
list[str] quark(str query, str _sxng_locale)
"SXNG_Response" get(*args, **kwargs)
list[str] sogou(str query, str _sxng_locale)
list[str] search_autocomplete(str backend_name, str query, str sxng_locale)
list[str] startpage(str query, str sxng_locale)
list[str] duckduckgo(str query, str sxng_locale)
None update_kwargs(**kwargs)
::1337x
Definition 1337x.py:1