.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
oa_doi_rewrite.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=missing-module-docstring
3import typing
4
5import re
6from urllib.parse import parse_qsl
7
8from flask_babel import gettext
9from searx import get_setting
10from searx.plugins import Plugin, PluginInfo
11from searx.extended_types import sxng_request
12
13from ._core import log
14
15if typing.TYPE_CHECKING:
16 from searx.search import SearchWithPlugins
17 from searx.extended_types import SXNG_Request
18 from searx.result_types import Result, LegacyResult
19 from searx.plugins import PluginCfg
20
21
22ahmia_blacklist: list = []
23
24
25def filter_url_field(result: "Result|LegacyResult", field_name: str, url_src: str) -> bool | str:
26 """Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
27 If URL should be modified, the returned string is the new URL to use."""
28
29 if field_name != "url":
30 return True # use it unchanged
31
32 doi = extract_doi(result.parsed_url)
33 if doi and len(doi) < 50:
34 for suffix in ("/", ".pdf", ".xml", "/full", "/meta", "/abstract"):
35 doi = doi.removesuffix(suffix)
36 new_url = get_doi_resolver() + doi
37 if "doi" not in result:
38 result["doi"] = doi
39 log.debug("oa_doi_rewrite: [URL field: %s] %s -> %s", field_name, url_src, new_url)
40 return new_url # use new url
41
42 return True # use it unchanged
43
44
46 """Avoid paywalls by redirecting to open-access."""
47
48 id = "oa_doi_rewrite"
49
50 def __init__(self, plg_cfg: "PluginCfg") -> None:
51 super().__init__(plg_cfg)
53 id=self.id,
54 name=gettext("Open Access DOI rewrite"),
55 description=gettext("Avoid paywalls by redirecting to open-access versions of publications when available"),
56 preference_section="general",
57 )
58
60 self,
61 request: "SXNG_Request",
62 search: "SearchWithPlugins",
63 result: "Result",
64 ) -> bool: # pylint: disable=unused-argument
65 if result.parsed_url:
66 result.filter_urls(filter_url_field)
67 return True
68
69
70regex = re.compile(r'10\.\d{4,9}/[^\s]+')
71
72
73def extract_doi(url):
74 m = regex.search(url.path)
75 if m:
76 return m.group(0)
77 for _, v in parse_qsl(url.query):
78 m = regex.search(v)
79 if m:
80 return m.group(0)
81 return None
82
83
84def get_doi_resolver() -> str:
85 doi_resolvers = get_setting("doi_resolvers")
86 selected_resolver = sxng_request.preferences.get_value('doi_resolver')[0]
87 if selected_resolver not in doi_resolvers:
88 selected_resolver = get_setting("default_doi_resolver")
89 return doi_resolvers[selected_resolver]
None __init__(self, "PluginCfg" plg_cfg)
bool on_result(self, "SXNG_Request" request, "SearchWithPlugins" search, "Result" result)
bool|str filter_url_field("Result|LegacyResult" result, str field_name, str url_src)
t.Any get_setting(str name, t.Any default=_unset)
Definition __init__.py:74