.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
hostnames.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=too-many-branches
3"""
4.. attention::
5
6 The **"Hostname replace"** plugin has been replace by **"Hostnames
7 plugin"**, see :pull:`3463` & :pull:`3552`.
8
9The **Hostnames plugin** can be enabled by adding it to the
10``enabled_plugins`` **list** in the ``setting.yml`` like so.
11
12 .. code:: yaml
13
14 enabled_plugins:
15 - 'Hostnames plugin'
16 ...
17
18- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
19 replaced by other hostnames.
20
21 .. code:: yaml
22
23 hostnames:
24 replace:
25 '(.*\\.)?youtube\\.com$': 'invidious.example.com'
26 '(.*\\.)?youtu\\.be$': 'invidious.example.com'
27 ...
28
29- ``hostnames.remove``: A **list** of regular expressions of the hostnames whose
30 results should be taken from the results list.
31
32 .. code:: yaml
33
34 hostnames:
35 remove:
36 - '(.*\\.)?facebook.com$'
37 - ...
38
39- ``hostnames.high_priority``: A **list** of regular expressions for hostnames
40 whose result should be given higher priority. The results from these hosts are
41 arranged higher in the results list.
42
43 .. code:: yaml
44
45 hostnames:
46 high_priority:
47 - '(.*\\.)?wikipedia.org$'
48 - ...
49
50- ``hostnames.lower_priority``: A **list** of regular expressions for hostnames
51 whose result should be given lower priority. The results from these hosts are
52 arranged lower in the results list.
53
54 .. code:: yaml
55
56 hostnames:
57 low_priority:
58 - '(.*\\.)?google(\\..*)?$'
59 - ...
60
61If the URL matches the pattern of ``high_priority`` AND ``low_priority``, the
62higher priority wins over the lower priority.
63
64Alternatively, you can also specify a file name for the **mappings** or
65**lists** to load these from an external file:
66
67.. code:: yaml
68
69 hostnames:
70 replace: 'rewrite-hosts.yml'
71 remove:
72 - '(.*\\.)?facebook.com$'
73 - ...
74 low_priority:
75 - '(.*\\.)?google(\\..*)?$'
76 - ...
77 high_priority:
78 - '(.*\\.)?wikipedia.org$'
79 - ...
80
81The ``rewrite-hosts.yml`` from the example above must be in the folder in which
82the ``settings.yml`` file is already located (``/etc/searxng``). The file then
83only contains the lists or the mapping tables without further information on the
84namespaces. In the example above, this would be a mapping table that looks
85something like this:
86
87.. code:: yaml
88
89 '(.*\\.)?youtube\\.com$': 'invidious.example.com'
90 '(.*\\.)?youtu\\.be$': 'invidious.example.com'
91
92"""
93
94from __future__ import annotations
95
96import re
97from urllib.parse import urlunparse, urlparse
98
99from flask_babel import gettext
100
101from searx import settings
102from searx.settings_loader import get_yaml_cfg
103
104
105name = gettext('Hostnames plugin')
106description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname')
107default_on = False
108preference_section = 'general'
109
110plugin_id = 'hostnames'
111
112parsed = 'parsed_url'
113_url_fields = ['iframe_src', 'audio_src']
114
115
116def _load_regular_expressions(settings_key) -> dict | set | None:
117 setting_value = settings.get(plugin_id, {}).get(settings_key)
118
119 if not setting_value:
120 return None
121
122 # load external file with configuration
123 if isinstance(setting_value, str):
124 setting_value = get_yaml_cfg(setting_value)
125
126 if isinstance(setting_value, list):
127 return {re.compile(r) for r in setting_value}
128
129 if isinstance(setting_value, dict):
130 return {re.compile(p): r for (p, r) in setting_value.items()}
131
132 return None
133
134
135replacements: dict = _load_regular_expressions('replace') or {} # type: ignore
136removables: set = _load_regular_expressions('remove') or set() # type: ignore
137high_priority: set = _load_regular_expressions('high_priority') or set() # type: ignore
138low_priority: set = _load_regular_expressions('low_priority') or set() # type: ignore
139
140
141def _matches_parsed_url(result, pattern):
142 return result[parsed] and (parsed in result and pattern.search(result[parsed].netloc))
143
144
145def on_result(_request, _search, result) -> bool:
146 for pattern, replacement in replacements.items():
147 if _matches_parsed_url(result, pattern):
148 # logger.debug(result['url'])
149 result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
150 result['url'] = urlunparse(result[parsed])
151 # logger.debug(result['url'])
152
153 for url_field in _url_fields:
154 if not getattr(result, url_field, None):
155 continue
156
157 url_src = urlparse(result[url_field])
158 if pattern.search(url_src.netloc):
159 url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc))
160 result[url_field] = urlunparse(url_src)
161
162 for pattern in removables:
163 if _matches_parsed_url(result, pattern):
164 return False
165
166 for url_field in _url_fields:
167 if not getattr(result, url_field, None):
168 continue
169
170 url_src = urlparse(result[url_field])
171 if pattern.search(url_src.netloc):
172 del result[url_field]
173
174 for pattern in low_priority:
175 if _matches_parsed_url(result, pattern):
176 result['priority'] = 'low'
177
178 for pattern in high_priority:
179 if _matches_parsed_url(result, pattern):
180 result['priority'] = 'high'
181
182 return True
dict|set|None _load_regular_expressions(settings_key)
Definition hostnames.py:116
bool on_result(_request, _search, result)
Definition hostnames.py:145
_matches_parsed_url(result, pattern)
Definition hostnames.py:141