.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
core.py
Go to the documentation of this file.
1
# SPDX-License-Identifier: AGPL-3.0-or-later
2
"""CORE (science)
3
4
"""
5
6
from
datetime
import
datetime
7
from
urllib.parse
import
urlencode
8
9
from
searx.exceptions
import
SearxEngineAPIException
10
11
about = {
12
"website"
:
'https://core.ac.uk'
,
13
"wikidata_id"
:
'Q22661180'
,
14
"official_api_documentation"
:
'https://core.ac.uk/documentation/api/'
,
15
"use_official_api"
:
True
,
16
"require_api_key"
:
True
,
17
"results"
:
'JSON'
,
18
}
19
20
categories = [
'science'
,
'scientific publications'
]
21
paging =
True
22
nb_per_page = 10
23
24
api_key =
'unset'
25
26
base_url =
'https://core.ac.uk:443/api-v2/search/'
27
search_string =
'{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
28
29
30
def
request
(query, params):
31
32
if
api_key ==
'unset'
:
33
raise
SearxEngineAPIException
(
'missing CORE API key'
)
34
35
search_path = search_string.format(
36
query=urlencode({
'q'
: query}),
37
nb_per_page=nb_per_page,
38
page=params[
'pageno'
],
39
apikey=api_key,
40
)
41
params[
'url'
] = base_url + search_path
42
43
return
params
44
45
46
def
response
(resp):
47
results = []
48
json_data = resp.json()
49
50
for
result
in
json_data[
'data'
]:
51
source = result[
'_source'
]
52
url =
None
53
if
source.get(
'urls'
):
54
url = source[
'urls'
][0].replace(
'http://'
,
'https://'
, 1)
55
56
if
url
is
None
and
source.get(
'doi'
):
57
# use the DOI reference
58
url =
'https://doi.org/'
+ source[
'doi'
]
59
60
if
url
is
None
and
source.get(
'downloadUrl'
):
61
# use the downloadUrl
62
url = source[
'downloadUrl'
]
63
64
if
url
is
None
and
source.get(
'identifiers'
):
65
# try to find an ark id, see
66
# https://www.wikidata.org/wiki/Property:P8091
67
# and https://en.wikipedia.org/wiki/Archival_Resource_Key
68
arkids = [
69
identifier[5:]
# 5 is the length of "ark:/"
70
for
identifier
in
source.get(
'identifiers'
)
71
if
isinstance(identifier, str)
and
identifier.startswith(
'ark:/'
)
72
]
73
if
len(arkids) > 0:
74
url =
'https://n2t.net/'
+ arkids[0]
75
76
if
url
is
None
:
77
continue
78
79
publishedDate =
None
80
time = source[
'publishedDate'
]
or
source[
'depositedDate'
]
81
if
time:
82
publishedDate = datetime.fromtimestamp(time / 1000)
83
84
# sometimes the 'title' is None / filter None values
85
journals = [j[
'title'
]
for
j
in
(source.get(
'journals'
)
or
[])
if
j[
'title'
]]
86
87
publisher = source[
'publisher'
]
88
if
publisher:
89
publisher = source[
'publisher'
].strip(
"'"
)
90
91
results.append(
92
{
93
'template'
:
'paper.html'
,
94
'title'
: source[
'title'
],
95
'url'
: url,
96
'content'
: source[
'description'
]
or
''
,
97
# 'comments': '',
98
'tags'
: source[
'topics'
],
99
'publishedDate'
: publishedDate,
100
'type'
: (source[
'types'
]
or
[
None
])[0],
101
'authors'
: source[
'authors'
],
102
'editor'
:
', '
.join(source[
'contributors'
]
or
[]),
103
'publisher'
: publisher,
104
'journal'
:
', '
.join(journals),
105
# 'volume': '',
106
# 'pages' : '',
107
# 'number': '',
108
'doi'
: source[
'doi'
],
109
'issn'
: [x
for
x
in
[source.get(
'issn'
)]
if
x],
110
'isbn'
: [x
for
x
in
[source.get(
'isbn'
)]
if
x],
# exists in the rawRecordXml
111
'pdf_url'
: source.get(
'repositoryDocument'
, {}).get(
'pdfOrigin'
),
112
}
113
)
114
115
return
results
searx.exceptions.SearxEngineAPIException
Definition
exceptions.py:54
searx.engines.core.request
request(query, params)
Definition
core.py:30
searx.engines.core.response
response(resp)
Definition
core.py:46
searx.exceptions
Definition
exceptions.py:1
searxng
searx
engines
core.py
Generated on Sat Nov 16 2024 00:10:57 for .oO SearXNG Developer Documentation Oo. by
1.12.0