.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
internet_archive_scholar.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Internet Archive scholar(science)
3"""
4
5from datetime import datetime
6from urllib.parse import urlencode
7from searx.utils import html_to_text
8
9about = {
10 "website": "https://scholar.archive.org/",
11 "wikidata_id": "Q115667709",
12 "official_api_documentation": "https://scholar.archive.org/api/redoc",
13 "use_official_api": True,
14 "require_api_key": False,
15 "results": "JSON",
16}
17categories = ['science', 'scientific publications']
18paging = True
19
20base_url = "https://scholar.archive.org"
21results_per_page = 15
22
23
24def request(query, params):
25 args = {
26 "q": query,
27 "limit": results_per_page,
28 "offset": (params["pageno"] - 1) * results_per_page,
29 }
30 params["url"] = f"{base_url}/search?{urlencode(args)}"
31 params["headers"]["Accept"] = "application/json"
32 return params
33
34
35def response(resp):
36 results = []
37
38 json = resp.json()
39
40 for result in json["results"]:
41 publishedDate, content, doi = None, '', None
42
43 if result['biblio'].get('release_date'):
44 publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d")
45
46 if len(result['abstracts']) > 0:
47 content = result['abstracts'][0].get('body')
48 elif len(result['_highlights']) > 0:
49 content = result['_highlights'][0]
50
51 if len(result['releases']) > 0:
52 doi = result['releases'][0].get('doi')
53
54 results.append(
55 {
56 'template': 'paper.html',
57 'url': result['fulltext']['access_url'],
58 'title': result['biblio'].get('title') or result['biblio'].get('container_name'),
59 'content': html_to_text(content),
60 'publisher': result['biblio'].get('publisher'),
61 'doi': doi,
62 'journal': result['biblio'].get('container_name'),
63 'authors': result['biblio'].get('contrib_names'),
64 'tags': result['tags'],
65 'publishedDate': publishedDate,
66 'issns': result['biblio'].get('issns'),
67 'pdf_url': result['fulltext'].get('access_url'),
68 }
69 )
70
71 return results