2"""The OpenAlex engine integrates the `OpenAlex`_ Works API to return scientific
3paper results using the :ref:`result_types.paper` class. It is an "online" JSON
4engine that uses the official public API and does not require an API key.
6.. _OpenAlex: https://openalex.org
7.. _OpenAlex API overview: https://docs.openalex.org/how-to-use-the-api/api-overview
12- Uses the official Works endpoint (JSON)
13- Paging support via ``page`` and ``per-page``
14- Relevance sorting (``sort=relevance_score:desc``)
15- Language filter support (maps SearXNG language to ``filter=language:<iso2>``)
16- Maps fields commonly used in scholarly results: title, authors, abstract
17 (reconstructed from inverted index), journal/venue, publisher, DOI, tags
18 (concepts), PDF/HTML links, pages, volume, issue, published date, and a short
20- Supports OpenAlex "polite pool" by adding a ``mailto`` parameter
26Minimal example for :origin:`settings.yml <searx/settings.yml>`:
33 categories: science, scientific publications
35 # Recommended by OpenAlex: join the polite pool with an email address
36 mailto: "[email protected]"
41- The ``mailto`` key is optional but recommended by OpenAlex for better service.
42- Language is inherited from the user's UI language; when it is not ``all``, the
43 engine adds ``filter=language:<iso2>`` (e.g. ``language:fr``). If OpenAlex has
44 few results for that language, you may see fewer items.
45- Results typically include a main link. When the primary landing page from
46 OpenAlex is a DOI resolver, the engine will use that stable link. When an open
47 access link is available, it is exposed via the ``PDF`` and/or ``HTML`` links
54Each result uses the :ref:`result_types.paper` class and may include:
56- ``title`` and ``content`` (abstract; reconstructed from the inverted index)
57- ``authors`` (display names)
58- ``journal`` (host venue display name) and ``publisher``
59- ``doi`` (normalized to the plain DOI, without the ``https://doi.org/`` prefix)
60- ``tags`` (OpenAlex concepts display names)
61- ``pdf_url`` (Open access PDF if available) and ``html_url`` (landing page)
62- ``publishedDate`` (parsed from ``publication_date``)
63- ``pages``, ``volume``, ``number`` (issue)
64- ``type`` and a brief ``comments`` string with citation count
67Rate limits & polite pool
68=========================
70OpenAlex offers a free public API with generous daily limits. For extra courtesy
71and improved service quality, include a contact email in each request via
72``mailto``. You can set it directly in the engine configuration as shown above.
73See: `OpenAlex API overview`_.
79- Few or no results in a non-English UI language:
80 Ensure the selected language has sufficient coverage at OpenAlex, or set the
81 UI language to English and retry.
82- Preference changes fail while testing locally:
83 Make sure your ``server.secret_key`` and ``server.base_url`` are set in your
84 instance settings so signed cookies work; see :ref:`settings server`.
94from datetime
import datetime
95from urllib.parse
import urlencode
104 "website":
"https://openalex.org/",
105 "wikidata_id":
"Q110718454",
106 "official_api_documentation":
"https://docs.openalex.org/how-to-use-the-api/api-overview",
107 "use_official_api":
True,
108 "require_api_key":
False,
114categories = [
"science",
"scientific publications"]
116search_url =
"https://api.openalex.org/works"
123def request(query: str, params:
"OnlineParams") ->
None:
127 "page": params[
"pageno"],
131 "sort":
"relevance_score:desc",
135 language = params.get(
"language")
136 filters: list[str] = []
137 if isinstance(language, str)
and language !=
"all":
138 iso2 = language.split(
"-")[0].split(
"_")[0]
140 filters.append(f
"language:{iso2}")
143 args[
"filter"] =
",".join(filters)
146 if isinstance(mailto, str)
and mailto !=
"":
147 args[
"mailto"] = mailto
149 params[
"url"] = f
"{search_url}?{urlencode(args)}"
152def response(resp:
"SXNG_Response") -> EngineResults:
156 for item
in data.get(
"results", []):
158 title: str = item.get(
"title",
"")
161 journal, publisher, pages, volume, number, published_date =
_extract_biblio(item)
178 publishedDate=published_date,
182 type=item.get(
"type"),
191 first_page = biblio.get(
"first_page")
192 last_page = biblio.get(
"last_page")
193 if first_page
and last_page:
194 return f
"{first_page}-{last_page}"
196 return str(first_page)
198 return str(last_page)
206 for fmt
in (
"%Y-%m-%d",
"%Y-%m",
"%Y"):
208 return datetime.strptime(value, fmt)
218 return doi_value.removeprefix(
"https://doi.org/")
222 abstract_inverted_index: dict[str, list[int]] |
None,
226 if not abstract_inverted_index:
228 position_to_token: dict[int, str] = {}
230 for token, positions
in abstract_inverted_index.items():
231 for pos
in positions:
232 position_to_token[pos] = token
233 max_index = max(max_index, pos)
236 ordered_tokens = [position_to_token.get(i,
"")
for i
in range(0, max_index + 1)]
238 text =
" ".join(t
for t
in ordered_tokens
if t !=
"")
239 return text
if text !=
"" else None
243 primary_location: dict[str, str] = item.get(
"primary_location", {})
244 open_access: dict[str, str] = item.get(
"open_access", {})
246 landing_page_url: str = primary_location.get(
"landing_page_url")
or ""
247 work_url: str = item.get(
"id",
"")
249 url: str = landing_page_url
or work_url
250 html_url: str = landing_page_url
251 pdf_url: str = primary_location.get(
"pdf_url")
or open_access.get(
"oa_url")
or ""
253 return url, html_url, pdf_url
257 authors: list[str] = []
258 for auth
in item.get(
"authorships", []):
261 author_obj = auth.get(
"author", {})
262 display_name = author_obj.get(
"display_name")
263 if isinstance(display_name, str)
and display_name !=
"":
264 authors.append(display_name)
270 for c
in item.get(
"concepts", []):
271 name = (c
or {}).get(
"display_name")
272 if isinstance(name, str)
and name !=
"":
278 item: dict[str, t.Any],
279) -> tuple[str, str, str, str, str, datetime |
None]:
280 host_venue: dict[str, str] = item.get(
"host_venue", {})
281 biblio: dict[str, str] = item.get(
"biblio", {})
283 journal: str = host_venue.get(
"display_name",
"")
284 publisher: str = host_venue.get(
"publisher",
"")
286 volume = biblio.get(
"volume",
"")
287 number = biblio.get(
"issue",
"")
288 published_date =
_parse_date(item.get(
"publication_date"))
289 return journal, publisher, pages, volume, number, published_date
293 cited_by_count = item.get(
"cited_by_count")
294 if isinstance(cited_by_count, int):
295 return f
"{cited_by_count} citations"
None request(str query, "OnlineParams" params)
str _extract_comments(dict[str, t.Any] item)
str _stringify_pages(dict[str, t.Any] biblio)
EngineResults response("SXNG_Response" resp)
list[str] _extract_authors(dict[str, t.Any] item)
tuple[str, str, str, str, str, datetime|None] _extract_biblio(dict[str, t.Any] item)
list[str] _extract_tags(dict[str, t.Any] item)
datetime|None _parse_date(str|None value)
str|None _reconstruct_abstract(dict[str, list[int]]|None abstract_inverted_index)
str _doi_to_plain(str|None doi_value)
tuple[str, str, str] _extract_links(dict[str, t.Any] item)