.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
internet_archive_scholar.py
Go to the documentation of this file.
1
# SPDX-License-Identifier: AGPL-3.0-or-later
2
"""Internet Archive scholar(science)
3
"""
4
5
from
datetime
import
datetime
6
from
urllib.parse
import
urlencode
7
from
searx.utils
import
html_to_text
8
9
about = {
10
"website"
:
"https://scholar.archive.org/"
,
11
"wikidata_id"
:
"Q115667709"
,
12
"official_api_documentation"
:
"https://scholar.archive.org/api/redoc"
,
13
"use_official_api"
:
True
,
14
"require_api_key"
:
False
,
15
"results"
:
"JSON"
,
16
}
17
categories = [
'science'
,
'scientific publications'
]
18
paging =
True
19
20
base_url =
"https://scholar.archive.org"
21
results_per_page = 15
22
23
24
def
request
(query, params):
25
args = {
26
"q"
: query,
27
"limit"
: results_per_page,
28
"offset"
: (params[
"pageno"
] - 1) * results_per_page,
29
}
30
params[
"url"
] = f
"{base_url}/search?{urlencode(args)}"
31
params[
"headers"
][
"Accept"
] =
"application/json"
32
return
params
33
34
35
def
response
(resp):
36
results = []
37
38
json = resp.json()
39
40
for
result
in
json[
"results"
]:
41
publishedDate, content, doi =
None
,
''
,
None
42
43
if
result[
'biblio'
].get(
'release_date'
):
44
publishedDate = datetime.strptime(result[
'biblio'
][
'release_date'
],
"%Y-%m-%d"
)
45
46
if
len(result[
'abstracts'
]) > 0:
47
content = result[
'abstracts'
][0].get(
'body'
)
48
elif
len(result[
'_highlights'
]) > 0:
49
content = result[
'_highlights'
][0]
50
51
if
len(result[
'releases'
]) > 0:
52
doi = result[
'releases'
][0].get(
'doi'
)
53
54
results.append(
55
{
56
'template'
:
'paper.html'
,
57
'url'
: result[
'fulltext'
][
'access_url'
],
58
'title'
: result[
'biblio'
].get(
'title'
)
or
result[
'biblio'
].get(
'container_name'
),
59
'content'
: html_to_text(content),
60
'publisher'
: result[
'biblio'
].get(
'publisher'
),
61
'doi'
: doi,
62
'journal'
: result[
'biblio'
].get(
'container_name'
),
63
'authors'
: result[
'biblio'
].get(
'contrib_names'
),
64
'tags'
: result[
'tags'
],
65
'publishedDate'
: publishedDate,
66
'issns'
: result[
'biblio'
].get(
'issns'
),
67
'pdf_url'
: result[
'fulltext'
].get(
'access_url'
),
68
}
69
)
70
71
return
results
searx.engines.internet_archive_scholar.request
request(query, params)
Definition
internet_archive_scholar.py:24
searx.engines.internet_archive_scholar.response
response(resp)
Definition
internet_archive_scholar.py:35
searx.utils
Definition
utils.py:1
searxng
searx
engines
internet_archive_scholar.py
Generated on Sat Nov 16 2024 00:10:57 for .oO SearXNG Developer Documentation Oo. by
1.12.0