.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
oa_doi_rewrite.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=missing-module-docstring
3from __future__ import annotations
4import typing
5
6import re
7from urllib.parse import parse_qsl
8
9from flask_babel import gettext
10from searx import get_setting
11from searx.plugins import Plugin, PluginInfo
12from searx.extended_types import sxng_request
13
14from ._core import log
15
16if typing.TYPE_CHECKING:
17 from searx.search import SearchWithPlugins
18 from searx.extended_types import SXNG_Request
19 from searx.result_types import Result, LegacyResult
20 from searx.plugins import PluginCfg
21
22
23ahmia_blacklist: list = []
24
25
26def filter_url_field(result: "Result|LegacyResult", field_name: str, url_src: str) -> bool | str:
27 """Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
28 If URL should be modified, the returned string is the new URL to use."""
29
30 if field_name != "url":
31 return True # use it unchanged
32
33 doi = extract_doi(result.parsed_url)
34 if doi and len(doi) < 50:
35 for suffix in ("/", ".pdf", ".xml", "/full", "/meta", "/abstract"):
36 doi = doi.removesuffix(suffix)
37 new_url = get_doi_resolver() + doi
38 if "doi" not in result:
39 result["doi"] = doi
40 log.debug("oa_doi_rewrite: [URL field: %s] %s -> %s", field_name, url_src, new_url)
41 return new_url # use new url
42
43 return True # use it unchanged
44
45
47 """Avoid paywalls by redirecting to open-access."""
48
49 id = "oa_doi_rewrite"
50
51 def __init__(self, plg_cfg: "PluginCfg") -> None:
52 super().__init__(plg_cfg)
54 id=self.id,
55 name=gettext("Open Access DOI rewrite"),
56 description=gettext("Avoid paywalls by redirecting to open-access versions of publications when available"),
57 preference_section="general",
58 )
59
61 self,
62 request: "SXNG_Request",
63 search: "SearchWithPlugins",
64 result: "Result",
65 ) -> bool: # pylint: disable=unused-argument
66 if result.parsed_url:
67 result.filter_urls(filter_url_field)
68 return True
69
70
71regex = re.compile(r'10\.\d{4,9}/[^\s]+')
72
73
74def extract_doi(url):
75 m = regex.search(url.path)
76 if m:
77 return m.group(0)
78 for _, v in parse_qsl(url.query):
79 m = regex.search(v)
80 if m:
81 return m.group(0)
82 return None
83
84
85def get_doi_resolver() -> str:
86 doi_resolvers = get_setting("doi_resolvers")
87 selected_resolver = sxng_request.preferences.get_value('doi_resolver')[0]
88 if selected_resolver not in doi_resolvers:
89 selected_resolver = get_setting("default_doi_resolver")
90 return doi_resolvers[selected_resolver]
None __init__(self, "PluginCfg" plg_cfg)
bool on_result(self, "SXNG_Request" request, "SearchWithPlugins" search, "Result" result)
bool|str filter_url_field("Result|LegacyResult" result, str field_name, str url_src)
get_setting(name, default=_unset)
Definition __init__.py:69