7from __future__
import annotations
9from typing
import TYPE_CHECKING
11from urllib.parse
import urlencode, quote_plus
27from searx
import redisdb
35 logger: logging.Logger
40 "website":
'https://lite.duckduckgo.com/lite/',
41 "wikidata_id":
'Q12805',
42 "use_official_api":
False,
43 "require_api_key":
False,
47send_accept_language_header =
True
48"""DuckDuckGo-Lite tries to guess user's preferred language from the HTTP
49``Accept-Language``. Optional the user can select a region filter (but not a
54categories = [
'general',
'web']
56time_range_support =
True
59url =
"https://html.duckduckgo.com/html"
61time_range_dict = {
'day':
'd',
'week':
'w',
'month':
'm',
'year':
'y'}
62form_data = {
'v':
'l',
'api':
'd.js',
'o':
'json'}
67 return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f
"{query}//{region}")
71 """Caches a ``vqd`` value from a query."""
74 logger.debug(
"VALKEY cache vqd value: %s (%s)", value, region)
75 c.set(
_cache_key(query, region), value, ex=600)
78 logger.debug(
"MEM cache vqd value: %s (%s)", value, region)
79 if len(__CACHE) > 100:
81 __CACHE.append((
_cache_key(query, region), value))
84def get_vqd(query: str, region: str, force_request: bool =
False):
85 """Returns the ``vqd`` that fits to the *query*.
87 :param query: The query term
88 :param region: DDG's region code
89 :param force_request: force a request to get a vqd value from DDG
91 TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used
92 by all request to DDG:
94 - DuckDuckGo Lite: ``https://lite.duckduckgo.com/lite`` (POST form data)
95 - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
96 - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
97 - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
98 - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
100 DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
101 (such as extremely long search terms that are often sent by bots), no ``vqd``
102 value can be determined.
104 If SearXNG cannot determine a ``vqd`` value, then no request should go out
109 A request with a wrong ``vqd`` value leads to DDG temporarily putting
110 SearXNG's IP on a block list.
112 Requests from IPs in this block list run into timeouts. Not sure, but it
113 seems the block list is a sliding window: to get my IP rid from the bot list
114 I had to cool down my IP for 1h (send no requests from that IP to DDG).
121 if value
or value == b
'':
122 value = value.decode(
'utf-8')
123 logger.debug(
"re-use CACHED vqd value: %s", value)
126 for k, value
in __CACHE:
128 logger.debug(
"MEM re-use CACHED vqd value: %s", value)
132 resp = get(f
'https://duckduckgo.com/?q={quote_plus(query)}')
133 if resp.status_code == 200:
134 value = extr(resp.text,
'vqd="',
'"')
136 logger.debug(
"vqd value from DDG request: %s", value)
143def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default=
'en_US'):
144 """Get DuckDuckGo's language identifier from SearXNG's locale.
146 DuckDuckGo defines its languages by region codes (see
147 :py:obj:`fetch_traits`).
149 To get region and language of a DDG service use:
153 eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
154 eng_lang = get_ddg_lang(traits, params['searxng_locale'])
156 It might confuse, but the ``l`` value of the cookie is what SearXNG calls
161 # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
162 params['cookies']['ad'] = eng_lang
163 params['cookies']['ah'] = eng_region
164 params['cookies']['l'] = eng_region
168 `DDG-lite <https://lite.duckduckgo.com/lite>`__ and the *no Javascript*
169 page https://html.duckduckgo.com/html do not offer a language selection
170 to the user, only a region can be selected by the user (``eng_region``
171 from the example above). DDG-lite and *no Javascript* store the selected
174 params['cookies']['kl'] = eng_region # 'ar-es'
177 return eng_traits.custom[
'lang_region'].get(
178 sxng_locale, eng_traits.get_language(sxng_locale, default)
199 "ar_DZ":
'lang_region',
200 "ar_JO":
'lang_region',
201 "ar_SA":
'lang_region',
203 'bn_IN':
'lang_region',
205 'de_CH':
'lang_region',
207 'en_AU':
'lang_region',
208 'en_CA':
'lang_region',
209 'en_GB':
'lang_region',
213 'es_AR':
'lang_region',
214 'es_CL':
'lang_region',
215 'es_CO':
'lang_region',
216 'es_CR':
'lang_region',
217 'es_EC':
'lang_region',
218 'es_MX':
'lang_region',
219 'es_PE':
'lang_region',
220 'es_UY':
'lang_region',
221 'es_VE':
'lang_region',
223 'fr_CA':
'lang_region',
224 'fr_CH':
'lang_region',
225 'fr_BE':
'lang_region',
227 'nl_BE':
'lang_region',
229 'pt_BR':
'lang_region',
233 'tokipona_XX':
'skip',
237def quote_ddg_bangs(query):
242 for val
in re.split(
r'(\s+)', query):
245 if val.startswith(
'!')
and external_bang.get_node(external_bang.EXTERNAL_BANGS, val[1:]):
247 query_parts.append(val)
248 return ' '.join(query_parts)
251def request(query, params):
253 query = quote_ddg_bangs(query)
255 if len(query) >= 500:
264 x.removeprefix(
"site:").removeprefix(
"intitle:").removeprefix(
"inurl:").removeprefix(
"filetype:")
265 for x
in query.split()
268 eng_region: str = traits.get_region(params[
'searxng_locale'], traits.all_locale)
269 if eng_region ==
"wt-wt":
273 params[
'data'][
'kl'] = eng_region
274 params[
'cookies'][
'kl'] = eng_region
279 params[
'method'] =
'POST'
280 params[
'data'][
'q'] = query
286 params[
'headers'][
'Content-Type'] =
'application/x-www-form-urlencoded'
288 params[
'headers'][
'Sec-Fetch-Dest'] =
"document"
289 params[
'headers'][
'Sec-Fetch-Mode'] =
"navigate"
290 params[
'headers'][
'Sec-Fetch-Site'] =
"same-origin"
291 params[
'headers'][
'Sec-Fetch-User'] =
"?1"
294 if params[
'pageno'] == 1:
296 params[
'data'][
'b'] =
""
298 params[
'data'][
'df'] =
''
299 if params[
'time_range']
in time_range_dict:
301 params[
'data'][
'df'] = time_range_dict[params[
'time_range']]
302 params[
'cookies'][
'df'] = time_range_dict[params[
'time_range']]
304 if params[
'pageno'] == 2:
307 offset = (params[
'pageno'] - 1) * 20
308 params[
'data'][
's'] = offset
309 params[
'data'][
'dc'] = offset + 1
311 elif params[
'pageno'] > 2:
314 offset = 20 + (params[
'pageno'] - 2) * 50
315 params[
'data'][
's'] = offset
316 params[
'data'][
'dc'] = offset + 1
318 if params[
'pageno'] > 1:
321 params[
'data'][
'o'] = form_data.get(
'o',
'json')
322 params[
'data'][
'api'] = form_data.get(
'api',
'd.js')
323 params[
'data'][
'nextParams'] = form_data.get(
'nextParams',
'')
324 params[
'data'][
'v'] = form_data.get(
'v',
'l')
325 params[
'headers'][
'Referer'] = url
327 vqd =
get_vqd(query, eng_region, force_request=
False)
333 params[
'data'][
'vqd'] = vqd
341 if params[
'searxng_locale'].startswith(
"zh"):
347 logger.debug(
"param data: %s", params[
'data'])
348 logger.debug(
"param cookies: %s", params[
'cookies'])
351def is_ddg_captcha(dom):
352 """In case of CAPTCHA ddg response its own *not a Robot* dialog and is not
353 redirected to a CAPTCHA page."""
355 return bool(eval_xpath(dom,
"//form[@id='challenge-form']"))
358def response(resp) -> EngineResults:
361 if resp.status_code == 303:
364 doc = lxml.html.fromstring(resp.text)
366 if is_ddg_captcha(doc):
370 form = eval_xpath(doc,
'//input[@name="vqd"]/..')
374 form_vqd = eval_xpath(form,
'//input[@name="vqd"]/@value')[0]
376 cache_vqd(resp.search_params[
'data'][
'q'], resp.search_params[
'data'][
'kl'], form_vqd)
379 for div_result
in eval_xpath(doc,
'//div[@id="links"]/div[contains(@class, "web-result")]'):
382 title = eval_xpath(div_result,
'.//h2/a')
386 item[
"title"] = extract_text(title)
387 item[
"url"] = eval_xpath(div_result,
'.//h2/a/@href')[0]
388 item[
"content"] = extract_text(eval_xpath(div_result,
'.//a[contains(@class, "result__snippet")]')[0])
392 zero_click_info_xpath =
'//div[@id="zero_click_abstract"]'
393 zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
396 "Your IP address is" not in zero_click
397 and "Your user agent:" not in zero_click
398 and "URL Decoded:" not in zero_click
400 current_query = resp.search_params[
"data"].get(
"q")
402 results.types.Answer(
404 url=
"https://duckduckgo.com/?"
406 {
"q": current_query},
414def fetch_traits(engine_traits: EngineTraits):
415 """Fetch languages & regions from DuckDuckGo.
417 SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
418 DuckDuckGo's language "Browsers preferred language" (``wt_WT``) makes no
419 sense in a SearXNG request since SearXNG's ``all`` will not add a
420 ``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``
421 is ``wt-wt`` (the region).
423 Beside regions DuckDuckGo also defines its languages by region codes. By
424 example these are the english languages in DuckDuckGo:
431 The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
440 engine_traits.all_locale =
'wt-wt'
443 resp = get(
'https://duckduckgo.com/dist/util/u.7669f071a13a7daa57cb.js')
446 print(
"ERROR: response from DuckDuckGo is not OK.")
448 js_code = extr(resp.text,
'regions:',
',snippetLengths')
450 regions = json.loads(js_code)
451 for eng_tag, name
in regions.items():
453 if eng_tag ==
'wt-wt':
454 engine_traits.all_locale =
'wt-wt'
457 region = ddg_reg_map.get(eng_tag)
462 eng_territory, eng_lang = eng_tag.split(
'-')
463 region = eng_lang +
'_' + eng_territory.upper()
466 sxng_tag = locales.region_tag(babel.Locale.parse(region))
467 except babel.UnknownLocaleError:
468 print(
"ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
471 conflict = engine_traits.regions.get(sxng_tag)
473 if conflict != eng_tag:
474 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
476 engine_traits.regions[sxng_tag] = eng_tag
480 engine_traits.custom[
'lang_region'] = {}
482 js_code = extr(resp.text,
'languages:',
',regions')
484 languages = js_variable_to_python(js_code)
485 for eng_lang, name
in languages.items():
487 if eng_lang ==
'wt_WT':
490 babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
491 if babel_tag ==
'skip':
496 if babel_tag ==
'lang_region':
497 sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
498 engine_traits.custom[
'lang_region'][sxng_tag] = eng_lang
501 sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
503 except babel.UnknownLocaleError:
504 print(
"ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
507 conflict = engine_traits.languages.get(sxng_tag)
509 if conflict != eng_lang:
510 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
512 engine_traits.languages[sxng_tag] = eng_lang
get_vqd(str query, str region, bool force_request=False)
get_ddg_lang(EngineTraits eng_traits, sxng_locale, default='en_US')
_cache_key(str query, str region)
cache_vqd(str query, str region, str value)