189def fetch_traits(engine_traits: EngineTraits):
190 """Fetch languages and regions from Bing-Web."""
191
192
195
196 headers = {
197 "User-Agent": gen_useragent(),
198 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
199 "Accept-Language": "en-US;q=0.5,en;q=0.3",
200 "Accept-Encoding": "gzip, deflate, br",
201 "DNT": "1",
202 "Connection": "keep-alive",
203 "Upgrade-Insecure-Requests": "1",
204 "Sec-GPC": "1",
205 "Cache-Control": "max-age=0",
206 }
207
208 resp = get("https://www.bing.com/account/general", headers=headers)
209 if not resp.ok:
210 print("ERROR: response from bing is not OK.")
211
212 dom = html.fromstring(resp.text)
213
214
215
216 engine_traits.languages['zh'] = 'zh-hans'
217
218 map_lang = {'prs': 'fa-AF', 'en': 'en-us'}
219 bing_ui_lang_map = {
220
221 'en': 'us',
222 'da': 'dk',
223 }
224
225 for href in eval_xpath(dom, '//div[@id="language-section-content"]//div[@class="languageItem"]/a/@href'):
226 eng_lang = parse_qs(urlparse(href).query)['setlang'][0]
227 babel_lang = map_lang.get(eng_lang, eng_lang)
228 try:
229 sxng_tag = language_tag(babel.Locale.parse(babel_lang.replace('-', '_')))
230 except babel.UnknownLocaleError:
231 print("ERROR: language (%s) is unknown by babel" % (babel_lang))
232 continue
233
234
235
236
237 bing_ui_lang = eng_lang.lower()
238 if '-' not in bing_ui_lang:
239 bing_ui_lang = bing_ui_lang + '-' + bing_ui_lang_map.get(bing_ui_lang, bing_ui_lang)
240
241 conflict = engine_traits.languages.get(sxng_tag)
242 if conflict:
243 if conflict != bing_ui_lang:
244 print(f"CONFLICT: babel {sxng_tag} --> {conflict}, {bing_ui_lang}")
245 continue
246 engine_traits.languages[sxng_tag] = bing_ui_lang
247
248
249
250 engine_traits.regions['zh-CN'] = 'zh-cn'
251
252 map_market_codes = {
253 'zh-hk': 'en-hk',
254 }
255 for href in eval_xpath(dom, '//div[@id="region-section-content"]//div[@class="regionItem"]/a/@href'):
256 cc_tag = parse_qs(urlparse(href).query)['cc'][0]
257 if cc_tag == 'clear':
258 engine_traits.all_locale = cc_tag
259 continue
260
261
262 for lang_tag in babel.languages.get_official_languages(cc_tag, de_facto=True):
263 if lang_tag not in engine_traits.languages.keys():
264
265 continue
266 lang_tag = lang_tag.split('_')[0]
267 market_code = f"{lang_tag}-{cc_tag}"
268
269 market_code = map_market_codes.get(market_code, market_code)
270 sxng_tag = region_tag(babel.Locale.parse('%s_%s' % (lang_tag, cc_tag.upper())))
271 conflict = engine_traits.regions.get(sxng_tag)
272 if conflict:
273 if conflict != market_code:
274 print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, market_code))
275 continue
276 engine_traits.regions[sxng_tag] = market_code