75 """Format of the response from UI's async request.
77 - ``arc_id:<...>,use_ac:true,_fmt:prog``
79 The arc_id is random generated every hour.
83 use_ac =
"use_ac:true"
89 if not _arcid_random
or (int(time.time()) - _arcid_random[1]) > 3600:
90 _arcid_random = (
''.join(random.choices(_arcid_range, k=23)), int(time.time()))
91 arc_id = f
"arc_id:srp_{_arcid_random[0]}_1{start:02}"
93 return ",".join([arc_id, use_ac, _fmt])
96def get_google_info(params:
"OnlineParams", eng_traits: EngineTraits) -> dict[str, t.Any]:
97 """Composing various (language) properties for the google engines (:ref:`google
100 This function is called by the various google engines (:ref:`google web
101 engine`, :ref:`google images engine`, :ref:`google news engine` and
102 :ref:`google videos engine`).
104 :param dict param: Request parameters of the engine. At least
105 a ``searxng_locale`` key should be in the dictionary.
107 :param eng_traits: Engine's traits fetched from google preferences
108 (:py:obj:`searx.enginelib.traits.EngineTraits`)
112 Py-Dictionary with the key/value pairs:
115 The language code that is used by google (e.g. ``lang_en`` or
119 The country code that is used by google (e.g. ``US`` or ``TW``)
122 A instance of :py:obj:`babel.core.Locale` build from the
123 ``searxng_locale`` value.
126 Google subdomain :py:obj:`google_domains` that fits to the country
130 Py-Dictionary with additional request arguments (can be passed to
131 :py:func:`urllib.parse.urlencode`).
133 - ``hl`` parameter: specifies the interface language of user interface.
134 - ``lr`` parameter: restricts search results to documents written in
135 a particular language.
136 - ``cr`` parameter: restricts search results to documents
137 originating in a particular country.
138 - ``ie`` parameter: sets the character encoding scheme that should
139 be used to interpret the query string ('utf8').
140 - ``oe`` parameter: sets the character encoding scheme that should
141 be used to decode the XML result ('utf8').
144 Py-Dictionary with additional HTTP headers (can be passed to
151 ret_val: dict[str, t.Any] = {
161 sxng_locale = params.get(
'searxng_locale',
'all')
163 locale = babel.Locale.parse(sxng_locale, sep=
'-')
164 except babel.core.UnknownLocaleError:
167 eng_lang = eng_traits.get_language(sxng_locale,
'lang_en')
168 lang_code = eng_lang.split(
'_')[-1]
169 country = eng_traits.get_region(sxng_locale, eng_traits.all_locale)
179 ret_val[
'language'] = eng_lang
180 ret_val[
'country'] = country
181 ret_val[
'locale'] = locale
182 ret_val[
'subdomain'] = eng_traits.custom[
'supported_domains'].get(country.upper(),
'www.google.com')
194 ret_val[
'params'][
'hl'] = f
'{lang_code}-{country}'
210 ret_val[
'params'][
'lr'] = eng_lang
211 if sxng_locale ==
'all':
212 ret_val[
'params'][
'lr'] =
''
221 ret_val[
'params'][
'cr'] =
''
222 if len(sxng_locale.split(
'-')) > 1:
223 ret_val[
'params'][
'cr'] =
'country' + country
244 ret_val[
'params'][
'ie'] =
'utf8'
251 ret_val[
'params'][
'oe'] =
'utf8'
264 ret_val[
'headers'][
'Accept'] =
'*/*'
270 ret_val[
'cookies'][
'CONSENT'] =
"YES+"
335 for img_id, data_image
in RE_DATA_IMAGE.findall(text):
336 end_pos = data_image.rfind(
'=')
338 data_image = data_image[: end_pos + 1]
339 data_image_map[img_id] = data_image
340 last = RE_DATA_IMAGE_end.search(text)
342 data_image_map[last.group(1)] = last.group(2)
343 logger.debug(
'data:image objects --> %s', list(data_image_map.keys()))
344 return data_image_map
348 """Get response from google's search request"""
356 dom = html.fromstring(resp.text)
359 answer_list = eval_xpath(dom,
'//div[contains(@class, "LGOjhe")]')
360 for item
in answer_list:
361 for bubble
in eval_xpath(item,
'.//div[@class="nnFGuf"]'):
364 results.types.Answer(
365 answer=extract_text(item),
366 url=(eval_xpath(item,
'../..//a/@href') + [
None])[0],
372 for result
in eval_xpath_list(dom,
'.//div[contains(@jscontroller, "SC7lYd")]'):
376 title_tag = eval_xpath_getindex(result,
'.//a/h3[1]', 0, default=
None)
377 if title_tag
is None:
379 logger.debug(
'ignoring item from the result_xpath list: missing title')
381 title = extract_text(title_tag)
383 url = eval_xpath_getindex(result,
'.//a[h3]/@href', 0,
None)
385 logger.debug(
'ignoring item from the result_xpath list: missing url of title "%s"', title)
388 content_nodes = eval_xpath(result,
'.//div[contains(@data-sncf, "1")]')
389 for item
in content_nodes:
390 for script
in item.xpath(
".//script"):
391 script.getparent().remove(script)
393 content = extract_text(content_nodes)
396 logger.debug(
'ignoring item from the result_xpath list: missing content of title "%s"', title)
399 thumbnail = content_nodes[0].xpath(
'.//img/@src')
401 thumbnail = thumbnail[0]
402 if thumbnail.startswith(
'data:image'):
403 img_id = content_nodes[0].xpath(
'.//img/@id')
405 thumbnail = data_image_map.get(img_id[0])
409 results.append({
'url': url,
'title': title,
'content': content,
'thumbnail': thumbnail})
411 except Exception
as e:
412 logger.error(e, exc_info=
True)
416 for suggestion
in eval_xpath_list(dom, suggestion_xpath):
418 results.append({
'suggestion': extract_text(suggestion)})
453def fetch_traits(engine_traits: EngineTraits, add_domains: bool =
True):
454 """Fetch languages from Google."""
457 engine_traits.custom[
'supported_domains'] = {}
459 resp = get(
'https://www.google.com/preferences')
461 raise RuntimeError(
"Response from Google's preferences is not OK.")
463 dom = html.fromstring(resp.text.replace(
'<?xml version="1.0" encoding="UTF-8"?>',
''))
467 lang_map = {
'no':
'nb'}
468 for x
in eval_xpath_list(dom,
"//select[@name='hl']/option"):
469 eng_lang = x.get(
"value")
471 locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep=
'-')
472 except babel.UnknownLocaleError:
473 print(
"INFO: google UI language %s (%s) is unknown by babel" % (eng_lang, x.text.split(
"(")[0].strip()))
475 sxng_lang = language_tag(locale)
477 conflict = engine_traits.languages.get(sxng_lang)
479 if conflict != eng_lang:
480 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
482 engine_traits.languages[sxng_lang] =
'lang_' + eng_lang
485 engine_traits.languages[
'zh'] =
'lang_zh-CN'
489 for x
in eval_xpath_list(dom,
"//select[@name='gl']/option"):
490 eng_country = x.get(
"value")
492 if eng_country
in skip_countries:
494 if eng_country ==
'ZZ':
495 engine_traits.all_locale =
'ZZ'
498 sxng_locales = get_official_locales(eng_country, engine_traits.languages.keys(), regional=
True)
501 print(
"ERROR: can't map from google country %s (%s) to a babel region." % (x.get(
'data-name'), eng_country))
504 for sxng_locale
in sxng_locales:
505 engine_traits.regions[region_tag(sxng_locale)] = eng_country
508 engine_traits.regions[
'zh-CN'] =
'HK'
513 resp = get(
'https://www.google.com/supported_domains')
515 raise RuntimeError(
"Response from https://www.google.com/supported_domains is not OK.")
517 for domain
in resp.text.split():
518 domain = domain.strip()
519 if not domain
or domain
in [
523 region = domain.split(
'.')[-1].upper()
524 engine_traits.custom[
'supported_domains'][region] =
'www' + domain
527 engine_traits.custom[
'supported_domains'][
'CN'] =
'www' + domain