.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
crossref.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Crossref_ is the sustainable source of community-owned scholarly metadata and
3is relied upon by thousands of systems across the research ecosystem and the
4globe.
5
6.. _Crossref: https://www.crossref.org/documentation/retrieve-metadata/
7
8"""
9
10import typing as t
11
12from urllib.parse import urlencode
13from datetime import datetime
14from searx.result_types import EngineResults
15
16if t.TYPE_CHECKING:
17 from searx.extended_types import SXNG_Response
18 from searx.search.processors import OnlineParams
19
20about = {
21 "website": "https://www.crossref.org/",
22 "wikidata_id": "Q5188229",
23 "official_api_documentation": "https://api.crossref.org/swagger-ui/",
24 "use_official_api": True,
25 "require_api_key": False,
26 "results": "JSON",
27}
28
29categories = ["science", "scientific publications"]
30paging = True
31search_url = "https://api.crossref.org/works"
32"""Returns a list of all works (journal articles, conference proceedings, books,
33components, etc), 20 per page (`Works/get_works`_).
34
35.. _Works/get_works: https://api.crossref.org/swagger-ui/index.html#/Works/get_works
36"""
37
38
39def request(query: str, params: "OnlineParams") -> None:
40 args = {
41 "query": query,
42 "offset": 20 * (params["pageno"] - 1),
43 }
44 params["url"] = f"{search_url}?{urlencode(args)}"
45
46
47def response(resp: "SXNG_Response") -> EngineResults:
48 res = EngineResults()
49 json_data = resp.json()
50
51 def field(k: str) -> str:
52 return str(record.get(k, ""))
53
54 for record in json_data["message"]["items"]:
55
56 if record["type"] == "component":
57 # These seem to be files published along with papers. Not something
58 # you'd search for.
59 continue
60 title: str = ""
61 journal: str = ""
62
63 if record["type"] == "book-chapter":
64 title = record["container-title"][0]
65 if record["title"][0].lower().strip() != title.lower().strip():
66 title += f" ({record['title'][0]})"
67 else:
68 title = record["title"][0] if "title" in record else record.get("container-title", [None])[0]
69 journal = record.get("container-title", [None])[0] if "title" in record else ""
70
71 item = res.types.Paper(
72 title=title,
73 journal=journal,
74 content=field("abstract"),
75 doi=field("DOI"),
76 pages=field("page"),
77 publisher=field("publisher"),
78 tags=record.get("subject"),
79 type=field("type"),
80 url=field("URL"),
81 volume=field("volume"),
82 )
83 res.add(item)
84
85 if "resource" in record and "primary" in record["resource"] and "URL" in record["resource"]["primary"]:
86 item.url = record["resource"]["primary"]["URL"]
87
88 if "published" in record and "date-parts" in record["published"]:
89 item.publishedDate = datetime(*(record["published"]["date-parts"][0] + [1, 1][:3]))
90
91 item.authors = [a.get("given", "") + " " + a.get("family", "") for a in record.get("author", [])]
92 item.isbn = record.get("isbn") or [i["value"] for i in record.get("isbn-type", [])]
93
94 # All the links are not PDFs, even if the URL ends with ".pdf"
95 # item.pdf_url = record.get("link", [{"URL": None}])[0]["URL"]
96
97 return res
EngineResults response("SXNG_Response" resp)
Definition crossref.py:47
None request(str query, "OnlineParams" params)
Definition crossref.py:39