197def fetch_traits(engine_traits: EngineTraits):
198 """Fetch languages and regions from Bing-Web."""
199
200
203
204 headers = {
205 "User-Agent": gen_useragent(),
206 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
207 "Accept-Language": "en-US;q=0.5,en;q=0.3",
208 "Accept-Encoding": "gzip, deflate, br",
209 "DNT": "1",
210 "Connection": "keep-alive",
211 "Upgrade-Insecure-Requests": "1",
212 "Sec-GPC": "1",
213 "Cache-Control": "max-age=0",
214 }
215
216 resp = get("https://www.bing.com/account/general", headers=headers)
217 if not resp.ok:
218 print("ERROR: response from bing is not OK.")
219
220 dom = html.fromstring(resp.text)
221
222
223
224 engine_traits.languages['zh'] = 'zh-hans'
225
226 map_lang = {'prs': 'fa-AF', 'en': 'en-us'}
227 bing_ui_lang_map = {
228
229 'en': 'us',
230 'da': 'dk',
231 }
232
233 for href in eval_xpath(dom, '//div[@id="language-section-content"]//div[@class="languageItem"]/a/@href'):
234 eng_lang = parse_qs(urlparse(href).query)['setlang'][0]
235 babel_lang = map_lang.get(eng_lang, eng_lang)
236 try:
237 sxng_tag = language_tag(babel.Locale.parse(babel_lang.replace('-', '_')))
238 except babel.UnknownLocaleError:
239 print("ERROR: language (%s) is unknown by babel" % (babel_lang))
240 continue
241
242
243
244
245 bing_ui_lang = eng_lang.lower()
246 if '-' not in bing_ui_lang:
247 bing_ui_lang = bing_ui_lang + '-' + bing_ui_lang_map.get(bing_ui_lang, bing_ui_lang)
248
249 conflict = engine_traits.languages.get(sxng_tag)
250 if conflict:
251 if conflict != bing_ui_lang:
252 print(f"CONFLICT: babel {sxng_tag} --> {conflict}, {bing_ui_lang}")
253 continue
254 engine_traits.languages[sxng_tag] = bing_ui_lang
255
256
257
258 engine_traits.regions['zh-CN'] = 'zh-cn'
259
260 map_market_codes = {
261 'zh-hk': 'en-hk',
262 }
263 for href in eval_xpath(dom, '//div[@id="region-section-content"]//div[@class="regionItem"]/a/@href'):
264 cc_tag = parse_qs(urlparse(href).query)['cc'][0]
265 if cc_tag == 'clear':
266 engine_traits.all_locale = cc_tag
267 continue
268
269
270 for lang_tag in babel.languages.get_official_languages(cc_tag, de_facto=True):
271 if lang_tag not in engine_traits.languages.keys():
272
273 continue
274 lang_tag = lang_tag.split('_')[0]
275 market_code = f"{lang_tag}-{cc_tag}"
276
277 market_code = map_market_codes.get(market_code, market_code)
278 sxng_tag = region_tag(babel.Locale.parse('%s_%s' % (lang_tag, cc_tag.upper())))
279 conflict = engine_traits.regions.get(sxng_tag)
280 if conflict:
281 if conflict != market_code:
282 print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, market_code))
283 continue
284 engine_traits.regions[sxng_tag] = market_code