.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
locales.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""
3SearXNG’s locale data
4=====================
5
6The variables :py:obj:`RTL_LOCALES` and :py:obj:`LOCALE_NAMES` are loaded from
7:origin:`searx/data/locales.json` / see :py:obj:`locales_initialize` and
8:ref:`update_locales.py`.
9
10.. hint::
11
12 Whenever the value of :py:obj:`ADDITIONAL_TRANSLATIONS` or
13 :py:obj:`LOCALE_BEST_MATCH` is modified, the
14 :origin:`searx/data/locales.json` needs to be rebuild::
15
16 ./manage data.locales
17
18SearXNG's locale codes
19======================
20
21.. automodule:: searx.sxng_locales
22 :members:
23
24
25SearXNG’s locale implementations
26================================
27"""
28
29from __future__ import annotations
30
31from pathlib import Path
32
33import babel
34from babel.support import Translations
35import babel.languages
36import babel.core
37import flask_babel
38import flask
39from flask.ctx import has_request_context
40from searx import (
41 data,
42 logger,
43 searx_dir,
44)
45
46logger = logger.getChild('locales')
47
48
49# safe before monkey patching flask_babel.get_translations
50_flask_babel_get_translations = flask_babel.get_translations
51
52LOCALE_NAMES = {}
53"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' (see
54:py:obj:`locales_initialize`).
55
56:meta hide-value:
57"""
58
59RTL_LOCALES: set[str] = set()
60"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
61:py:obj:`locales_initialize`)."""
62
63ADDITIONAL_TRANSLATIONS = {
64 "dv": "ދިވެހި (Dhivehi)",
65 "oc": "Occitan",
66 "szl": "Ślōnski (Silesian)",
67 "pap": "Papiamento",
68}
69"""Additional languages SearXNG has translations for but not supported by
70python-babel (see :py:obj:`locales_initialize`)."""
71
72LOCALE_BEST_MATCH = {
73 "dv": "si",
74 "oc": 'fr-FR',
75 "szl": "pl",
76 "nl-BE": "nl",
77 "zh-HK": "zh-Hant-TW",
78 "pap": "pt-BR",
79}
80"""Map a locale we do not have a translations for to a locale we have a
81translation for. By example: use Taiwan version of the translation for Hong
82Kong."""
83
84
86 locale = 'en'
87 if has_request_context():
88 value = flask.request.preferences.get_value('locale')
89 if value:
90 locale = value
91
92 # first, set the language that is not supported by babel
93 if locale in ADDITIONAL_TRANSLATIONS:
94 flask.request.form['use-translation'] = locale
95
96 # second, map locale to a value python-babel supports
97 locale = LOCALE_BEST_MATCH.get(locale, locale)
98
99 if locale == '':
100 # if there is an error loading the preferences
101 # the locale is going to be ''
102 locale = 'en'
103
104 # babel uses underscore instead of hyphen.
105 locale = locale.replace('-', '_')
106 return locale
107
108
110 """Monkey patch of :py:obj:`flask_babel.get_translations`"""
111 if has_request_context():
112 use_translation = flask.request.form.get('use-translation')
113 if use_translation in ADDITIONAL_TRANSLATIONS:
114 babel_ext = flask_babel.current_app.extensions['babel']
115 return Translations.load(babel_ext.translation_directories[0], use_translation)
117
118
119_TR_LOCALES: list[str] = []
120
121
122def get_translation_locales() -> list[str]:
123 """Returns the list of transaltion locales (*underscore*). The list is
124 generated from the translation folders in :origin:`searx/translations`"""
125
126 global _TR_LOCALES # pylint:disable=global-statement
127 if _TR_LOCALES:
128 return _TR_LOCALES
129
130 tr_locales = []
131 for folder in (Path(searx_dir) / 'translations').iterdir():
132 if not folder.is_dir():
133 continue
134 if not (folder / 'LC_MESSAGES').is_dir():
135 continue
136 tr_locales.append(folder.name)
137 _TR_LOCALES = sorted(tr_locales)
138 return _TR_LOCALES
139
140
142 """Initialize locales environment of the SearXNG session.
143
144 - monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`
145 - init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`
146 """
147 flask_babel.get_translations = get_translations
148 LOCALE_NAMES.update(data.LOCALES["LOCALE_NAMES"])
149 RTL_LOCALES.update(data.LOCALES["RTL_LOCALES"])
150
151
152def region_tag(locale: babel.Locale) -> str:
153 """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
154 if not locale.territory:
155 raise ValueError('%s missed a territory')
156 return locale.language + '-' + locale.territory
157
158
159def language_tag(locale: babel.Locale) -> str:
160 """Returns SearXNG's language tag from the locale and if exits, the tag
161 includes the script name (e.g. en, zh_Hant).
162 """
163 sxng_lang = locale.language
164 if locale.script:
165 sxng_lang += '_' + locale.script
166 return sxng_lang
167
168
169def get_locale(locale_tag: str) -> babel.Locale | None:
170 """Returns a :py:obj:`babel.Locale` object parsed from argument
171 ``locale_tag``"""
172 try:
173 locale = babel.Locale.parse(locale_tag, sep='-')
174 return locale
175
176 except babel.core.UnknownLocaleError:
177 return None
178
179
181 territory: str, languages=None, regional: bool = False, de_facto: bool = True
182) -> set[babel.Locale]:
183 """Returns a list of :py:obj:`babel.Locale` with languages from
184 :py:obj:`babel.languages.get_official_languages`.
185
186 :param territory: The territory (country or region) code.
187
188 :param languages: A list of language codes the languages from
189 :py:obj:`babel.languages.get_official_languages` should be in
190 (intersection). If this argument is ``None``, all official languages in
191 this territory are used.
192
193 :param regional: If the regional flag is set, then languages which are
194 regionally official are also returned.
195
196 :param de_facto: If the de_facto flag is set to `False`, then languages
197 which are “de facto” official are not returned.
198
199 """
200 ret_val = set()
201 o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
202
203 if languages:
204 languages = [l.lower() for l in languages]
205 o_languages = set(l for l in o_languages if l.lower() in languages)
206
207 for lang in o_languages:
208 try:
209 locale = babel.Locale.parse(lang + '_' + territory)
210 ret_val.add(locale)
211 except babel.UnknownLocaleError:
212 continue
213
214 return ret_val
215
216
217def get_engine_locale(searxng_locale, engine_locales, default=None):
218 """Return engine's language (aka locale) string that best fits to argument
219 ``searxng_locale``.
220
221 Argument ``engine_locales`` is a python dict that maps *SearXNG locales* to
222 corresponding *engine locales*::
223
224 <engine>: {
225 # SearXNG string : engine-string
226 'ca-ES' : 'ca_ES',
227 'fr-BE' : 'fr_BE',
228 'fr-CA' : 'fr_CA',
229 'fr-CH' : 'fr_CH',
230 'fr' : 'fr_FR',
231 ...
232 'pl-PL' : 'pl_PL',
233 'pt-PT' : 'pt_PT'
234 ..
235 'zh' : 'zh'
236 'zh_Hans' : 'zh'
237 'zh_Hant' : 'zh_TW'
238 }
239
240 .. hint::
241
242 The *SearXNG locale* string has to be known by babel!
243
244 If there is no direct 1:1 mapping, this functions tries to narrow down
245 engine's language (locale). If no value can be determined by these
246 approximation attempts the ``default`` value is returned.
247
248 Assumptions:
249
250 A. When user select a language the results should be optimized according to
251 the selected language.
252
253 B. When user select a language and a territory the results should be
254 optimized with first priority on territory and second on language.
255
256 First approximation rule (*by territory*):
257
258 When the user selects a locale with territory (and a language), the
259 territory has priority over the language. If any of the official languages
260 in the territory is supported by the engine (``engine_locales``) it will
261 be used.
262
263 Second approximation rule (*by language*):
264
265 If "First approximation rule" brings no result or the user selects only a
266 language without a territory. Check in which territories the language
267 has an official status and if one of these territories is supported by the
268 engine.
269
270 """
271 # pylint: disable=too-many-branches, too-many-return-statements
272
273 engine_locale = engine_locales.get(searxng_locale)
274
275 if engine_locale is not None:
276 # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language
277 # "zh --> zh"), no need to narrow language-script nor territory.
278 return engine_locale
279
280 try:
281 locale = babel.Locale.parse(searxng_locale, sep='-')
282 except babel.core.UnknownLocaleError:
283 try:
284 locale = babel.Locale.parse(searxng_locale.split('-')[0])
285 except babel.core.UnknownLocaleError:
286 return default
287
288 searxng_lang = language_tag(locale)
289 engine_locale = engine_locales.get(searxng_lang)
290 if engine_locale is not None:
291 # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")
292 return engine_locale
293
294 # SearXNG's selected locale is not supported by the engine ..
295
296 if locale.territory:
297 # Try to narrow by *official* languages in the territory (??-XX).
298
299 for official_language in babel.languages.get_official_languages(locale.territory, de_facto=True):
300 searxng_locale = official_language + '-' + locale.territory
301 engine_locale = engine_locales.get(searxng_locale)
302 if engine_locale is not None:
303 return engine_locale
304
305 # Engine does not support one of the official languages in the territory or
306 # there is only a language selected without a territory.
307
308 # Now lets have a look if the searxng_lang (the language selected by the
309 # user) is a official language in other territories. If so, check if
310 # engine does support the searxng_lang in this other territory.
311
312 if locale.language:
313
314 terr_lang_dict = {}
315 for territory, langs in babel.core.get_global("territory_languages").items():
316 if not langs.get(searxng_lang, {}).get('official_status'):
317 continue
318 terr_lang_dict[territory] = langs.get(searxng_lang)
319
320 # first: check fr-FR, de-DE .. is supported by the engine
321 # exception: 'en' --> 'en-US'
322
323 territory = locale.language.upper()
324 if territory == 'EN':
325 territory = 'US'
326
327 if terr_lang_dict.get(territory):
328 searxng_locale = locale.language + '-' + territory
329 engine_locale = engine_locales.get(searxng_locale)
330 if engine_locale is not None:
331 return engine_locale
332
333 # second: sort by population_percent and take first match
334
335 # drawback of "population percent": if there is a territory with a
336 # small number of people (e.g 100) but the majority speaks the
337 # language, then the percentage might be 100% (--> 100 people) but in
338 # a different territory with more people (e.g. 10.000) where only 10%
339 # speak the language the total amount of speaker is higher (--> 200
340 # people).
341 #
342 # By example: The population of Saint-Martin is 33.000, of which 100%
343 # speak French, but this is less than the 30% of the approximately 2.5
344 # million Belgian citizens
345 #
346 # - 'fr-MF', 'population_percent': 100.0, 'official_status': 'official'
347 # - 'fr-BE', 'population_percent': 38.0, 'official_status': 'official'
348
349 terr_lang_list = []
350 for k, v in terr_lang_dict.items():
351 terr_lang_list.append((k, v))
352
353 for territory, _lang in sorted(terr_lang_list, key=lambda item: item[1]['population_percent'], reverse=True):
354 searxng_locale = locale.language + '-' + territory
355 engine_locale = engine_locales.get(searxng_locale)
356 if engine_locale is not None:
357 return engine_locale
358
359 # No luck: narrow by "language from territory" and "territory from language"
360 # does not fit to a locale supported by the engine.
361
362 if engine_locale is None:
363 engine_locale = default
364
365 return default
366
367
368def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str | None = None) -> str | None:
369 """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
370
371 :param str searxng_locale: SearXNG's internal representation of locale (de,
372 de-DE, fr-BE, zh, zh-CN, zh-TW ..).
373
374 :param list locale_tag_list: The list of locale tags to select from
375
376 :param str fallback: fallback locale tag (if unset --> ``None``)
377
378 The rules to find a match are implemented in :py:obj:`get_engine_locale`,
379 the ``engine_locales`` is build up by :py:obj:`build_engine_locales`.
380
381 .. hint::
382
383 The *SearXNG locale* string and the members of ``locale_tag_list`` has to
384 be known by babel! The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the
385 UI and are not known by babel --> will be ignored.
386 """
387
388 # searxng_locale = 'es'
389 # locale_tag_list = ['es-AR', 'es-ES', 'es-MX']
390
391 if not searxng_locale:
392 return fallback
393
394 locale = get_locale(searxng_locale)
395 if locale is None:
396 return fallback
397
398 # normalize to a SearXNG locale that can be passed to get_engine_locale
399
400 searxng_locale = language_tag(locale)
401 if locale.territory:
402 searxng_locale = region_tag(locale)
403
404 # clean up locale_tag_list
405
406 tag_list = []
407 for tag in locale_tag_list:
408 if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
409 continue
410 tag_list.append(tag)
411
412 # emulate fetch_traits
413 engine_locales = build_engine_locales(tag_list)
414 return get_engine_locale(searxng_locale, engine_locales, default=fallback)
415
416
417def build_engine_locales(tag_list: list[str]):
418 """From a list of locale tags a dictionary is build that can be passed by
419 argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
420 is mainly used by :py:obj:`match_locale` and is similar to what the
421 ``fetch_traits(..)`` function of engines do.
422
423 If there are territory codes in the ``tag_list`` that have a *script code*
424 additional keys are added to the returned dictionary.
425
426 .. code:: python
427
428 >>> import locales
429 >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW'])
430 >>> engine_locales
431 {
432 'en': 'en', 'en-US': 'en-US',
433 'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN',
434 'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW'
435 }
436 >>> get_engine_locale('zh-Hans', engine_locales)
437 'zh-CN'
438
439 This function is a good example to understand the language/region model
440 of SearXNG:
441
442 SearXNG only distinguishes between **search languages** and **search
443 regions**, by adding the *script-tags*, languages with *script-tags* can
444 be assigned to the **regions** that SearXNG supports.
445
446 """
447 engine_locales = {}
448
449 for tag in tag_list:
450 locale = get_locale(tag)
451 if locale is None:
452 logger.warning("build_engine_locales: skip locale tag %s / unknown by babel", tag)
453 continue
454 if locale.territory:
455 engine_locales[region_tag(locale)] = tag
456 if locale.script:
457 engine_locales[language_tag(locale)] = tag
458 else:
459 engine_locales[language_tag(locale)] = tag
460 return engine_locales
str region_tag(babel.Locale locale)
Definition locales.py:152
str|None match_locale(str searxng_locale, list[str] locale_tag_list, str|None fallback=None)
Definition locales.py:368
locales_initialize()
Definition locales.py:141
set[babel.Locale] get_official_locales(str territory, languages=None, bool regional=False, bool de_facto=True)
Definition locales.py:182
build_engine_locales(list[str] tag_list)
Definition locales.py:417
list[str] get_translation_locales()
Definition locales.py:122
get_engine_locale(searxng_locale, engine_locales, default=None)
Definition locales.py:217
_flask_babel_get_translations
Definition locales.py:50
babel.Locale|None get_locale(str locale_tag)
Definition locales.py:169
str language_tag(babel.Locale locale)
Definition locales.py:159