190def fetch_traits(engine_traits: EngineTraits):
191 """Fetch languages and regions from Bing-Web."""
192
193
196
197 headers = {
198 "User-Agent": gen_useragent(),
199 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
200 "Accept-Language": "en-US;q=0.5,en;q=0.3",
201 "Accept-Encoding": "gzip, deflate, br",
202 "DNT": "1",
203 "Connection": "keep-alive",
204 "Upgrade-Insecure-Requests": "1",
205 "Sec-GPC": "1",
206 "Cache-Control": "max-age=0",
207 }
208
209 resp = get("https://www.bing.com/account/general", headers=headers)
210 if not resp.ok:
211 print("ERROR: response from bing is not OK.")
212
213 dom = html.fromstring(resp.text)
214
215
216
217 engine_traits.languages['zh'] = 'zh-hans'
218
219 map_lang = {'prs': 'fa-AF', 'en': 'en-us'}
220 bing_ui_lang_map = {
221
222 'en': 'us',
223 'da': 'dk',
224 }
225
226 for href in eval_xpath(dom, '//div[@id="language-section"]//li/a/@href'):
227 eng_lang = parse_qs(urlparse(href).query)['setlang'][0]
228 babel_lang = map_lang.get(eng_lang, eng_lang)
229 try:
230 sxng_tag = language_tag(babel.Locale.parse(babel_lang.replace('-', '_')))
231 except babel.UnknownLocaleError:
232 print("ERROR: language (%s) is unknown by babel" % (babel_lang))
233 continue
234
235
236
237
238 bing_ui_lang = eng_lang.lower()
239 if '-' not in bing_ui_lang:
240 bing_ui_lang = bing_ui_lang + '-' + bing_ui_lang_map.get(bing_ui_lang, bing_ui_lang)
241
242 conflict = engine_traits.languages.get(sxng_tag)
243 if conflict:
244 if conflict != bing_ui_lang:
245 print(f"CONFLICT: babel {sxng_tag} --> {conflict}, {bing_ui_lang}")
246 continue
247 engine_traits.languages[sxng_tag] = bing_ui_lang
248
249
250
251 engine_traits.regions['zh-CN'] = 'zh-cn'
252
253 map_market_codes = {
254 'zh-hk': 'en-hk',
255 }
256 for href in eval_xpath(dom, '//div[@id="region-section"]//li/a/@href'):
257 cc_tag = parse_qs(urlparse(href).query)['cc'][0]
258 if cc_tag == 'clear':
259 engine_traits.all_locale = cc_tag
260 continue
261
262
263 for lang_tag in babel.languages.get_official_languages(cc_tag, de_facto=True):
264 if lang_tag not in engine_traits.languages.keys():
265
266 continue
267 lang_tag = lang_tag.split('_')[0]
268 market_code = f"{lang_tag}-{cc_tag}"
269
270 market_code = map_market_codes.get(market_code, market_code)
271 sxng_tag = region_tag(babel.Locale.parse('%s_%s' % (lang_tag, cc_tag.upper())))
272 conflict = engine_traits.regions.get(sxng_tag)
273 if conflict:
274 if conflict != market_code:
275 print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, market_code))
276 continue
277 engine_traits.regions[sxng_tag] = market_code