7from typing
import TYPE_CHECKING
9from urllib.parse
import urlencode
25from searx
import redisdb
26from searx.enginelib.traits
import EngineTraits
31 logger: logging.Logger
36 "website":
'https://lite.duckduckgo.com/lite/',
37 "wikidata_id":
'Q12805',
38 "use_official_api":
False,
39 "require_api_key":
False,
43send_accept_language_header =
True
44"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
45``Accept-Language``. Optional the user can select a region filter (but not a
50categories = [
'general',
'web']
52time_range_support =
True
55url =
'https://lite.duckduckgo.com/lite/'
58time_range_dict = {
'day':
'd',
'week':
'w',
'month':
'm',
'year':
'y'}
59form_data = {
'v':
'l',
'api':
'd.js',
'o':
'json'}
63 """Caches a ``vqd`` value from a query."""
66 logger.debug(
"cache vqd value: %s", value)
67 key =
'SearXNG_ddg_web_vqd' + redislib.secret_hash(query)
68 c.set(key, value, ex=600)
72 """Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
73 (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
78 If an empty string is returned there are no results for the ``query`` and
79 therefore no ``vqd`` value.
81 DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
82 (such as extremely long search terms that are often sent by bots), no ``vqd``
83 value can be determined.
85 If SearXNG cannot determine a ``vqd`` value, then no request should go out
88 A request with a wrong ``vqd`` value leads to DDG temporarily putting
89 SearXNG's IP on a block list.
91 Requests from IPs in this block list run into timeouts.
93 Not sure, but it seems the block list is a sliding window: to get my IP rid
94 from the bot list I had to cool down my IP for 1h (send no requests from
97 TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used
98 by all request to DDG:
100 - DuckDuckGo Lite: ``https://lite.duckduckgo.com/lite`` (POST form data)
101 - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
102 - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
103 - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
104 - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
110 key =
'SearXNG_ddg_web_vqd' + redislib.secret_hash(query)
112 if value
or value == b
'':
113 value = value.decode(
'utf-8')
114 logger.debug(
"re-use cached vqd value: %s", value)
117 query_url =
'https://duckduckgo.com/?' + urlencode({
'q': query})
119 doc = lxml.html.fromstring(res.text)
120 for script
in doc.xpath(
"//script[@type='text/javascript']"):
122 if 'vqd="' in script:
123 value = script[script.index(
'vqd="') + 5 :]
124 value = value[: value.index(
'"')]
126 logger.debug(
"new vqd value: '%s'", value)
127 if value
is not None:
132def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default=
'en_US'):
133 """Get DuckDuckGo's language identifier from SearXNG's locale.
135 DuckDuckGo defines its languages by region codes (see
136 :py:obj:`fetch_traits`).
138 To get region and language of a DDG service use:
142 eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
143 eng_lang = get_ddg_lang(traits, params['searxng_locale'])
145 It might confuse, but the ``l`` value of the cookie is what SearXNG calls
150 # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
151 params['cookies']['ad'] = eng_lang
152 params['cookies']['ah'] = eng_region
153 params['cookies']['l'] = eng_region
157 `DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
158 selection to the user, only a region can be selected by the user
159 (``eng_region`` from the example above). DDG-lite stores the selected
162 params['cookies']['kl'] = eng_region # 'ar-es'
165 return eng_traits.custom[
'lang_region'].get(
166 sxng_locale, eng_traits.get_language(sxng_locale, default)
187 "ar_DZ":
'lang_region',
188 "ar_JO":
'lang_region',
189 "ar_SA":
'lang_region',
191 'bn_IN':
'lang_region',
193 'de_CH':
'lang_region',
195 'en_AU':
'lang_region',
196 'en_CA':
'lang_region',
197 'en_GB':
'lang_region',
201 'es_AR':
'lang_region',
202 'es_CL':
'lang_region',
203 'es_CO':
'lang_region',
204 'es_CR':
'lang_region',
205 'es_EC':
'lang_region',
206 'es_MX':
'lang_region',
207 'es_PE':
'lang_region',
208 'es_UY':
'lang_region',
209 'es_VE':
'lang_region',
211 'fr_CA':
'lang_region',
212 'fr_CH':
'lang_region',
213 'fr_BE':
'lang_region',
215 'nl_BE':
'lang_region',
217 'pt_BR':
'lang_region',
221 'tokipona_XX':
'skip',
225def quote_ddg_bangs(query):
230 for val
in re.split(
r'(\s+)', query):
233 if val.startswith(
'!')
and external_bang.get_node(external_bang.EXTERNAL_BANGS, val[1:]):
235 query_parts.append(val)
236 return ' '.join(query_parts)
239def request(query, params):
241 query = quote_ddg_bangs(query)
246 eng_region = traits.get_region(params[
'searxng_locale'], traits.all_locale)
250 params[
'method'] =
'POST'
251 params[
'data'][
'q'] = query
257 params[
'headers'][
'Content-Type'] =
'application/x-www-form-urlencoded'
258 params[
'data'][
'vqd'] = vqd
261 if params[
'pageno'] == 2:
263 offset = (params[
'pageno'] - 1) * 20
264 params[
'data'][
's'] = offset
265 params[
'data'][
'dc'] = offset + 1
267 elif params[
'pageno'] > 2:
269 offset = 20 + (params[
'pageno'] - 2) * 50
270 params[
'data'][
's'] = offset
271 params[
'data'][
'dc'] = offset + 1
274 if params[
'pageno'] > 1:
276 params[
'data'][
'o'] = form_data.get(
'o',
'json')
277 params[
'data'][
'api'] = form_data.get(
'api',
'd.js')
278 params[
'data'][
'nextParams'] = form_data.get(
'nextParams',
'')
279 params[
'data'][
'v'] = form_data.get(
'v',
'l')
280 params[
'headers'][
'Referer'] =
'https://lite.duckduckgo.com/'
282 params[
'data'][
'kl'] = eng_region
283 params[
'cookies'][
'kl'] = eng_region
285 params[
'data'][
'df'] =
''
286 if params[
'time_range']
in time_range_dict:
287 params[
'data'][
'df'] = time_range_dict[params[
'time_range']]
288 params[
'cookies'][
'df'] = time_range_dict[params[
'time_range']]
290 logger.debug(
"param data: %s", params[
'data'])
291 logger.debug(
"param cookies: %s", params[
'cookies'])
297 if resp.status_code == 303:
301 doc = lxml.html.fromstring(resp.text)
303 result_table = eval_xpath(doc,
'//html/body/form/div[@class="filters"]/table')
305 if len(result_table) == 2:
308 result_table = result_table[1]
309 elif not len(result_table) >= 3:
313 result_table = result_table[2]
315 form = eval_xpath(doc,
'//html/body/form/div[@class="filters"]/table//input/..')
319 form_data[
'v'] = eval_xpath(form,
'//input[@name="v"]/@value')[0]
320 form_data[
'api'] = eval_xpath(form,
'//input[@name="api"]/@value')[0]
321 form_data[
'o'] = eval_xpath(form,
'//input[@name="o"]/@value')[0]
322 logger.debug(
'form_data: %s', form_data)
324 tr_rows = eval_xpath(result_table,
'.//tr')
326 tr_rows = tr_rows[:-1]
328 len_tr_rows = len(tr_rows)
331 while len_tr_rows >= offset + 4:
334 tr_title = tr_rows[offset]
335 tr_content = tr_rows[offset + 1]
339 if tr_content.get(
'class') ==
'result-sponsored':
342 a_tag = eval_xpath_getindex(tr_title,
'.//td//a[@class="result-link"]', 0,
None)
346 td_content = eval_xpath_getindex(tr_content,
'.//td[@class="result-snippet"]', 0,
None)
347 if td_content
is None:
352 'title': a_tag.text_content(),
353 'content': extract_text(td_content),
354 'url': a_tag.get(
'href'),
361def fetch_traits(engine_traits: EngineTraits):
362 """Fetch languages & regions from DuckDuckGo.
364 SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
365 DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
366 sense in a SearXNG request since SearXNG's ``all`` will not add a
367 ``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``
368 is ``wt-wt`` (the region).
370 Beside regions DuckDuckGo also defines its languages by region codes. By
371 example these are the english languages in DuckDuckGo:
378 The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
385 engine_traits.all_locale =
'wt-wt'
388 resp = get(
'https://duckduckgo.com/dist/util/u.7669f071a13a7daa57cb.js')
391 print(
"ERROR: response from DuckDuckGo is not OK.")
393 pos = resp.text.find(
'regions:{') + 8
394 js_code = resp.text[pos:]
395 pos = js_code.find(
'}') + 1
396 regions = json.loads(js_code[:pos])
398 for eng_tag, name
in regions.items():
400 if eng_tag ==
'wt-wt':
401 engine_traits.all_locale =
'wt-wt'
404 region = ddg_reg_map.get(eng_tag)
409 eng_territory, eng_lang = eng_tag.split(
'-')
410 region = eng_lang +
'_' + eng_territory.upper()
413 sxng_tag = locales.region_tag(babel.Locale.parse(region))
414 except babel.UnknownLocaleError:
415 print(
"ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
418 conflict = engine_traits.regions.get(sxng_tag)
420 if conflict != eng_tag:
421 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
423 engine_traits.regions[sxng_tag] = eng_tag
427 engine_traits.custom[
'lang_region'] = {}
429 pos = resp.text.find(
'languages:{') + 10
430 js_code = resp.text[pos:]
431 pos = js_code.find(
'}') + 1
432 js_code =
'{"' + js_code[1:pos].replace(
':',
'":').replace(
',',
',"')
433 languages = json.loads(js_code)
435 for eng_lang, name
in languages.items():
437 if eng_lang ==
'wt_WT':
440 babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
441 if babel_tag ==
'skip':
446 if babel_tag ==
'lang_region':
447 sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
448 engine_traits.custom[
'lang_region'][sxng_tag] = eng_lang
451 sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
453 except babel.UnknownLocaleError:
454 print(
"ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
457 conflict = engine_traits.languages.get(sxng_tag)
459 if conflict != eng_lang:
460 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
462 engine_traits.languages[sxng_tag] = eng_lang
get_ddg_lang(EngineTraits eng_traits, sxng_locale, default='en_US')