148 "url": base_url + eval_xpath_getindex(item,
"./a/@href", 0),
149 "title": extract_text(eval_xpath(item,
"./div//a[starts-with(@href, '/md5')]")),
150 "authors": [extract_text(eval_xpath_getindex(item,
".//a[starts-with(@href, '/search')]", 0))],
151 "publisher": extract_text(
152 eval_xpath_getindex(item,
".//a[starts-with(@href, '/search')]", 1, default=
None), allow_none=
True
154 "content": extract_text(eval_xpath(item,
".//div[contains(@class, 'relative')]")),
155 "thumbnail": extract_text(eval_xpath_getindex(item,
".//img/@src", 0, default=
None), allow_none=
True),
160 """Fetch languages and other search arguments from Anna's search form."""
167 engine_traits.all_locale =
""
168 engine_traits.custom[
"content"] = []
169 engine_traits.custom[
"ext"] = []
170 engine_traits.custom[
"sort"] = []
172 resp = get(base_url +
"/search")
174 raise RuntimeError(
"Response from Anna's search page is not OK.")
175 dom = html.fromstring(resp.text)
179 lang_map: dict[str, str] = {}
180 for x
in eval_xpath_list(dom,
"//form//input[@name='lang']"):
181 eng_lang = x.get(
"value")
182 if eng_lang
in (
"",
"_empty",
"nl-BE",
"und")
or eng_lang.startswith(
"anti__"):
185 locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep=
"-")
186 except babel.UnknownLocaleError:
190 sxng_lang = language_tag(locale)
191 conflict = engine_traits.languages.get(sxng_lang)
193 if conflict != eng_lang:
194 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
196 engine_traits.languages[sxng_lang] = eng_lang
198 for x
in eval_xpath_list(dom,
"//form//input[@name='content']"):
199 if not x.get(
"value").startswith(
"anti__"):
200 engine_traits.custom[
"content"].append(x.get(
"value"))
202 for x
in eval_xpath_list(dom,
"//form//input[@name='ext']"):
203 if not x.get(
"value").startswith(
"anti__"):
204 engine_traits.custom[
"ext"].append(x.get(
"value"))
206 for x
in eval_xpath_list(dom,
"//form//select[@name='sort']//option"):
207 engine_traits.custom[
"sort"].append(x.get(
"value"))
210 engine_traits.custom[
"content"].sort()
211 engine_traits.custom[
"ext"].sort()
212 engine_traits.custom[
"sort"].sort()