246 """Fetch languages from Wikipedia. Not all languages from the
247 :py:obj:`list_of_wikipedias` are supported by SearXNG locales, only those
248 known from :py:obj:`searx.locales.LOCALE_NAMES` or those with a minimal
249 :py:obj:`editing depth <wikipedia_article_depth>`.
251 The location of the Wikipedia address of a language is mapped in a
252 :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`
253 (``wiki_netloc``). Here is a reduced example:
257 traits.custom['wiki_netloc'] = {
258 "en": "en.wikipedia.org",
260 "gsw": "als.wikipedia.org",
262 "zh": "zh.wikipedia.org",
263 "zh-classical": "zh-classical.wikipedia.org"
267 engine_traits.custom[
'wiki_netloc'] = {}
268 engine_traits.custom[
'WIKIPEDIA_LANGUAGES'] = []
272 for eng_tag, sxng_tag_list
in wikipedia_script_variants.items():
273 for sxng_tag
in sxng_tag_list:
274 engine_traits.languages[sxng_tag] = eng_tag
275 for eng_tag, sxng_tag_list
in wiki_lc_locale_variants.items():
276 for sxng_tag
in sxng_tag_list:
277 engine_traits.regions[sxng_tag] = eng_tag
279 resp = _network.get(list_of_wikipedias)
281 print(
"ERROR: response from Wikipedia is not OK.")
283 dom = html.fromstring(resp.text)
284 for row
in dom.xpath(
'//table[contains(@class,"sortable")]//tbody/tr'):
286 cols = row.xpath(
'./td')
289 cols = [c.text_content().strip()
for c
in cols]
291 depth = float(cols[11].replace(
'-',
'0').replace(
',',
''))
292 articles = int(cols[4].replace(
',',
'').replace(
',',
''))
295 wiki_url = row.xpath(
'./td[4]/a/@href')[0]
296 wiki_url = urllib.parse.urlparse(wiki_url)
299 sxng_tag = locales.language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag), sep=
'-'))
300 except babel.UnknownLocaleError:
304 engine_traits.custom[
'WIKIPEDIA_LANGUAGES'].append(eng_tag)
306 if sxng_tag
not in locales.LOCALE_NAMES:
317 conflict = engine_traits.languages.get(sxng_tag)
319 if conflict != eng_tag:
320 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
323 engine_traits.languages[sxng_tag] = eng_tag
324 engine_traits.custom[
'wiki_netloc'][eng_tag] = wiki_url.netloc
326 engine_traits.custom[
'WIKIPEDIA_LANGUAGES'].sort()