2"""`Anna's Archive`_ is a free non-profit online shadow library metasearch
3engine providing access to a variety of book resources (also via IPFS), created
4by a team of anonymous archivists (AnnaArchivist_).
6.. _Anna's Archive: https://annas-archive.org/
7.. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive
12The engine has the following additional settings:
18With this options a SearXNG maintainer is able to configure **additional**
19engines for specific searches in Anna's Archive. For example a engine to search
20for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.
24 - name: annas articles
27 aa_content: 'magazine'
36from typing
import List, Dict, Any, Optional
37from urllib.parse
import urlencode
40from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list
45about: Dict[str, Any] = {
46 "website":
"https://annas-archive.org/",
47 "wikidata_id":
"Q115288326",
48 "official_api_documentation":
None,
49 "use_official_api":
False,
50 "require_api_key":
False,
55categories: List[str] = [
"files"]
59base_url: str =
"https://annas-archive.org"
61"""Anan's search form field **Content** / possible values::
63 book_fiction, book_unknown, book_nonfiction,
64 book_comic, magazine, standards_document
66To not filter use an empty string (default).
69"""Sort Anna's results, possible values::
71 newest, oldest, largest, smallest
73To sort by *most relevant* use an empty string (default)."""
76"""Filter Anna's results by a file ending. Common filters for example are
81 Anna's Archive is a beta release: Filter results by file extension does not
82 really work on Anna's Archive.
87def init(engine_settings=None):
88 """Check of engine's settings."""
91 if aa_content
and aa_content
not in traits.custom[
'content']:
92 raise ValueError(f
'invalid setting content: {aa_content}')
94 if aa_sort
and aa_sort
not in traits.custom[
'sort']:
95 raise ValueError(f
'invalid setting sort: {aa_sort}')
97 if aa_ext
and aa_ext
not in traits.custom[
'ext']:
98 raise ValueError(f
'invalid setting ext: {aa_ext}')
101def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
102 lang = traits.get_language(params[
"language"], traits.all_locale)
105 'content': aa_content,
109 'page': params[
'pageno'],
112 filtered_args = dict((k, v)
for k, v
in args.items()
if v)
113 params[
"url"] = f
"{base_url}/search?{urlencode(filtered_args)}"
117def response(resp) -> List[Dict[str, Optional[str]]]:
118 results: List[Dict[str, Optional[str]]] = []
119 dom = html.fromstring(resp.text)
121 for item
in eval_xpath_list(dom,
'//main//div[contains(@class, "h-[125]")]/a'):
130 for item
in eval_xpath_list(dom,
'//main//div[contains(@class, "js-scroll-hidden")]'):
131 item = html.fromstring(item.xpath(
'./comment()')[0].text)
139 'template':
'paper.html',
140 'url': base_url + extract_text(eval_xpath_getindex(item,
'./@href', 0)),
141 'title': extract_text(eval_xpath(item,
'.//h3/text()[1]')),
142 'publisher': extract_text(eval_xpath(item,
'.//div[contains(@class, "text-sm")]')),
143 'authors': [extract_text(eval_xpath(item,
'.//div[contains(@class, "italic")]'))],
144 'content': extract_text(eval_xpath(item,
'.//div[contains(@class, "text-xs")]')),
145 'thumbnail': extract_text(eval_xpath_getindex(item,
'.//img/@src', 0, default=
None), allow_none=
True),
150 """Fetch languages and other search arguments from Anna's search form."""
157 engine_traits.all_locale =
''
158 engine_traits.custom[
'content'] = []
159 engine_traits.custom[
'ext'] = []
160 engine_traits.custom[
'sort'] = []
162 resp = get(base_url +
'/search')
164 raise RuntimeError(
"Response from Anna's search page is not OK.")
165 dom = html.fromstring(resp.text)
170 for x
in eval_xpath_list(dom,
"//form//input[@name='lang']"):
171 eng_lang = x.get(
"value")
172 if eng_lang
in (
'',
'_empty',
'nl-BE',
'und')
or eng_lang.startswith(
'anti__'):
175 locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep=
'-')
176 except babel.UnknownLocaleError:
180 sxng_lang = language_tag(locale)
181 conflict = engine_traits.languages.get(sxng_lang)
183 if conflict != eng_lang:
184 print(
"CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
186 engine_traits.languages[sxng_lang] = eng_lang
188 for x
in eval_xpath_list(dom,
"//form//input[@name='content']"):
189 if not x.get(
"value").startswith(
"anti__"):
190 engine_traits.custom[
'content'].append(x.get(
"value"))
192 for x
in eval_xpath_list(dom,
"//form//input[@name='ext']"):
193 if not x.get(
"value").startswith(
"anti__"):
194 engine_traits.custom[
'ext'].append(x.get(
"value"))
196 for x
in eval_xpath_list(dom,
"//form//select[@name='sort']//option"):
197 engine_traits.custom[
'sort'].append(x.get(
"value"))
200 engine_traits.custom[
'content'].sort()
201 engine_traits.custom[
'ext'].sort()
202 engine_traits.custom[
'sort'].sort()
fetch_traits(EngineTraits engine_traits)
List[Dict[str, Optional[str]]] response(resp)
Dict[str, Any] request(query, Dict[str, Any] params)
init(engine_settings=None)