.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
core.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""CORE_ (COnnecting REpositories) provides a comprehensive bibliographic
3database of the world’s scholarly literature, collecting and indexing
4research from repositories and journals.
5
6.. _CORE: https://core.ac.uk/about
7
8.. _core engine config:
9
10Configuration
11=============
12
13The engine has the following additional settings:
14
15- :py:obj:`api_key`
16
17.. code:: yaml
18
19 - name: core.ac.uk
20 engine: core
21 categories: science
22 shortcut: cor
23 api_key: "..."
24 timeout: 5
25
26Implementations
27===============
28
29"""
30# pylint: disable=too-many-branches
31
32from datetime import datetime
33from urllib.parse import urlencode
34
35from searx.exceptions import SearxEngineAPIException
36
37about = {
38 "website": 'https://core.ac.uk',
39 "wikidata_id": 'Q22661180',
40 "official_api_documentation": 'https://api.core.ac.uk/docs/v3',
41 "use_official_api": True,
42 "require_api_key": True,
43 "results": 'JSON',
44}
45
46api_key = 'unset'
47"""For an API key register at https://core.ac.uk/services/api and insert
48the API key in the engine :ref:`core engine config`."""
49
50categories = ['science', 'scientific publications']
51paging = True
52nb_per_page = 10
53base_url = 'https://api.core.ac.uk/v3/search/works/'
54
55
56def request(query, params):
57 if api_key == 'unset':
58 raise SearxEngineAPIException('missing CORE API key')
59
60 # API v3 uses different parameters
61 search_params = {
62 'q': query,
63 'offset': (params['pageno'] - 1) * nb_per_page,
64 'limit': nb_per_page,
65 'sort': 'relevance',
66 }
67
68 params['url'] = base_url + '?' + urlencode(search_params)
69 params['headers'] = {'Authorization': f'Bearer {api_key}'}
70
71 return params
72
73
74def response(resp):
75 results = []
76 json_data = resp.json()
77
78 for result in json_data.get('results', []):
79 # Get title
80 if not result.get('title'):
81 continue
82
83 # Get URL - try different options
84 url = None
85
86 # Try DOI first
87 doi = result.get('doi')
88 if doi:
89 url = f'https://doi.org/{doi}'
90
91 if url is None and result.get('doi'):
92 # use the DOI reference
93 url = 'https://doi.org/' + str(result['doi'])
94 elif result.get('id'):
95 url = 'https://core.ac.uk/works/' + str(result['id'])
96 elif result.get('downloadUrl'):
97 url = result['downloadUrl']
98 elif result.get('sourceFulltextUrls'):
99 url = result['sourceFulltextUrls']
100 else:
101 continue
102
103 # Published date
104 published_date = None
105
106 raw_date = result.get('publishedDate') or result.get('depositedDate')
107 if raw_date:
108 try:
109 published_date = datetime.fromisoformat(result['publishedDate'].replace('Z', '+00:00'))
110 except (ValueError, AttributeError):
111 pass
112
113 # Handle journals
114 journals = []
115 if result.get('journals'):
116 journals = [j.get('title') for j in result['journals'] if j.get('title')]
117
118 # Handle publisher
119 publisher = result.get('publisher', '').strip("'")
120 if publisher:
121 publisher = publisher.strip("'")
122
123 # Handle authors
124 authors = set()
125 for i in result.get('authors', []):
126 name = i.get("name")
127 if name:
128 authors.add(name)
129
130 results.append(
131 {
132 'template': 'paper.html',
133 'title': result.get('title'),
134 'url': url,
135 'content': result.get('fullText', '') or '',
136 # 'comments': '',
137 'tags': result.get('fieldOfStudy', []),
138 'publishedDate': published_date,
139 'type': result.get('documentType', '') or '',
140 'authors': authors,
141 'editor': ', '.join(result.get('contributors', [])),
142 'publisher': publisher,
143 'journal': ', '.join(journals),
144 'doi': result.get('doi'),
145 # 'issn' : ''
146 # 'isbn' : ''
147 'pdf_url': result.get('downloadUrl', {}) or result.get("sourceFulltextUrls", {}),
148 }
149 )
150
151 return results
request(query, params)
Definition core.py:56
response(resp)
Definition core.py:74