.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
pubmed.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""PubMed_ comprises more than 39 million citations for biomedical literature
3from MEDLINE, life science journals, and online books. Citations may include
4links to full text content from PubMed Central and publisher web sites.
5
6.. _PubMed: https://pubmed.ncbi.nlm.nih.gov/
7
8Configuration
9=============
10
11.. code:: yaml
12
13 - name: pubmed
14 engine: pubmed
15 shortcut: pub
16
17Implementations
18===============
19
20"""
21
22import typing as t
23
24from datetime import datetime
25from urllib.parse import urlencode
26
27from lxml import etree
28
29from searx.result_types import EngineResults
30from searx.network import get
31from searx.utils import (
32 eval_xpath_getindex,
33 eval_xpath_list,
34 extract_text,
35 ElementType,
36)
37
38if t.TYPE_CHECKING:
39 from searx.extended_types import SXNG_Response
40 from searx.search.processors import OnlineParams
41
42
43about = {
44 "website": "https://www.ncbi.nlm.nih.gov/pubmed/",
45 "wikidata_id": "Q1540899",
46 "official_api_documentation": {
47 "url": "https://www.ncbi.nlm.nih.gov/home/develop/api/",
48 "comment": "More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/",
49 },
50 "use_official_api": True,
51 "require_api_key": False,
52 "results": "XML",
53}
54
55categories = ["science", "scientific publications"]
56
57eutils_api = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
58
59# engine dependent config
60number_of_results = 10
61pubmed_url = "https://www.ncbi.nlm.nih.gov/pubmed/"
62
63
64def request(query: str, params: "OnlineParams") -> None:
65
66 args = urlencode(
67 {
68 "db": "pubmed",
69 "term": query,
70 "retstart": (params["pageno"] - 1) * number_of_results,
71 "hits": number_of_results,
72 }
73 )
74 esearch_url = f"{eutils_api}/esearch.fcgi?{args}"
75 # DTD: https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd
76 esearch_resp: "SXNG_Response" = get(esearch_url)
77 pmids_results = etree.XML(esearch_resp.content)
78 pmids: list[str] = [i.text for i in pmids_results.xpath("//eSearchResult/IdList/Id")]
79
80 # send efetch request with the IDs from esearch response
81 args = urlencode(
82 {
83 "db": "pubmed",
84 "retmode": "xml",
85 "id": ",".join(pmids),
86 }
87 )
88 efetch_url = f"{eutils_api}/efetch.fcgi?{args}"
89 params["url"] = efetch_url
90
91
92def response(resp: "SXNG_Response") -> EngineResults: # pylint: disable=too-many-locals
93
94 # DTD: https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_250101.dtd
95
96 # parse efetch response
97 efetch_xml = etree.XML(resp.content)
98 res = EngineResults()
99
100 def _field_txt(xml: ElementType, xpath_str: str) -> str:
101 elem = eval_xpath_getindex(xml, xpath_str, 0, default="")
102 return extract_text(elem, allow_none=True) or ""
103
104 for pubmed_article in eval_xpath_list(efetch_xml, "//PubmedArticle"):
105
106 medline_citation: ElementType = eval_xpath_getindex(pubmed_article, "./MedlineCitation", 0)
107 pubmed_data: ElementType = eval_xpath_getindex(pubmed_article, "./PubmedData", 0)
108
109 title: str = eval_xpath_getindex(medline_citation, ".//Article/ArticleTitle", 0).text
110 pmid: str = eval_xpath_getindex(medline_citation, ".//PMID", 0).text
111 url: str = pubmed_url + pmid
112 content = _field_txt(medline_citation, ".//Abstract/AbstractText//text()")
113 doi = _field_txt(medline_citation, ".//ELocationID[@EIdType='doi']/text()")
114 journal = _field_txt(medline_citation, "./Article/Journal/Title/text()")
115 issn = _field_txt(medline_citation, "./Article/Journal/ISSN/text()")
116
117 authors: list[str] = []
118
119 for author in eval_xpath_list(medline_citation, "./Article/AuthorList/Author"):
120 f = eval_xpath_getindex(author, "./ForeName", 0, default=None)
121 l = eval_xpath_getindex(author, "./LastName", 0, default=None)
122 author_name = f"{f.text if f is not None else ''} {l.text if l is not None else ''}".strip()
123 if author_name:
124 authors.append(author_name)
125
126 accepted_date = eval_xpath_getindex(
127 pubmed_data, "./History//PubMedPubDate[@PubStatus='accepted']", 0, default=None
128 )
129 pub_date = None
130 if accepted_date is not None:
131 year = eval_xpath_getindex(accepted_date, "./Year", 0)
132 month = eval_xpath_getindex(accepted_date, "./Month", 0)
133 day = eval_xpath_getindex(accepted_date, "./Day", 0)
134 try:
135 pub_date = datetime(year=int(year.text), month=int(month.text), day=int(day.text))
136 except ValueError:
137 pass
138
139 res.add(
140 res.types.Paper(
141 url=url,
142 title=title,
143 content=content,
144 journal=journal,
145 issn=[issn],
146 authors=authors,
147 doi=doi,
148 publishedDate=pub_date,
149 )
150 )
151 return res
EngineResults response("SXNG_Response" resp)
Definition pubmed.py:92
None request(str query, "OnlineParams" params)
Definition pubmed.py:64