244 """Fetch languages from Wikipedia. Not all languages from the
245 :py:obj:`list_of_wikipedias` are supported by SearXNG locales, only those
246 known from :py:obj:`searx.locales.LOCALE_NAMES` or those with a minimal
247 :py:obj:`editing depth <wikipedia_article_depth>`.
249 The location of the Wikipedia address of a language is mapped in a
250 :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`
251 (``wiki_netloc``). Here is a reduced example:
255 traits.custom['wiki_netloc'] = {
256 "en": "en.wikipedia.org",
258 "gsw": "als.wikipedia.org",
260 "zh": "zh.wikipedia.org",
261 "zh-classical": "zh-classical.wikipedia.org"
265 engine_traits.custom[
'wiki_netloc'] = {}
266 engine_traits.custom[
'WIKIPEDIA_LANGUAGES'] = []
270 for eng_tag, sxng_tag_list
in wikipedia_script_variants.items():
271 for sxng_tag
in sxng_tag_list:
272 engine_traits.languages[sxng_tag] = eng_tag
273 for eng_tag, sxng_tag_list
in wiki_lc_locale_variants.items():
274 for sxng_tag
in sxng_tag_list:
275 engine_traits.regions[sxng_tag] = eng_tag
277 resp = _network.get(list_of_wikipedias)
279 print(
"ERROR: response from Wikipedia is not OK.")
281 dom = html.fromstring(resp.text)
282 for row
in dom.xpath(
'//table[contains(@class,"sortable")]//tbody/tr'):
284 cols = row.xpath(
'./td')
287 cols = [c.text_content().strip()
for c
in cols]
289 depth = float(cols[11].replace(
'-',
'0').replace(
',',
''))
290 articles = int(cols[4].replace(
',',
'').replace(
',',
''))
293 wiki_url = row.xpath(
'./td[4]/a/@href')[0]
294 wiki_url = urllib.parse.urlparse(wiki_url)
297 sxng_tag = locales.language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag), sep=
'-'))
298 except babel.UnknownLocaleError:
302 engine_traits.custom[
'WIKIPEDIA_LANGUAGES'].append(eng_tag)
304 if sxng_tag
not in locales.LOCALE_NAMES:
315 conflict = engine_traits.languages.get(sxng_tag)
317 if conflict != eng_tag:
318 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
321 engine_traits.languages[sxng_tag] = eng_tag
322 engine_traits.custom[
'wiki_netloc'][eng_tag] = wiki_url.netloc
324 engine_traits.custom[
'WIKIPEDIA_LANGUAGES'].sort()