.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
hostnames.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=too-many-branches
3"""
4.. attention::
5
6 The **"Hostname replace"** plugin has been replace by **"Hostnames
7 plugin"**, see :pull:`3463` & :pull:`3552`.
8
9The **Hostnames plugin** can be enabled by adding it to the
10``enabled_plugins`` **list** in the ``setting.yml`` like so.
11
12 .. code:: yaml
13
14 enabled_plugins:
15 - 'Hostnames plugin'
16 ...
17
18- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
19 replaced by other hostnames.
20
21 .. code:: yaml
22
23 hostnames:
24 replace:
25 '(.*\\.)?youtube\\.com$': 'invidious.example.com'
26 '(.*\\.)?youtu\\.be$': 'invidious.example.com'
27 ...
28
29- ``hostnames.remove``: A **list** of regular expressions of the hostnames whose
30 results should be taken from the results list.
31
32 .. code:: yaml
33
34 hostnames:
35 remove:
36 - '(.*\\.)?facebook.com$'
37 - ...
38
39- ``hostnames.high_priority``: A **list** of regular expressions for hostnames
40 whose result should be given higher priority. The results from these hosts are
41 arranged higher in the results list.
42
43 .. code:: yaml
44
45 hostnames:
46 high_priority:
47 - '(.*\\.)?wikipedia.org$'
48 - ...
49
50- ``hostnames.lower_priority``: A **list** of regular expressions for hostnames
51 whose result should be given lower priority. The results from these hosts are
52 arranged lower in the results list.
53
54 .. code:: yaml
55
56 hostnames:
57 low_priority:
58 - '(.*\\.)?google(\\..*)?$'
59 - ...
60
61If the URL matches the pattern of ``high_priority`` AND ``low_priority``, the
62higher priority wins over the lower priority.
63
64Alternatively, you can also specify a file name for the **mappings** or
65**lists** to load these from an external file:
66
67.. code:: yaml
68
69 hostnames:
70 replace: 'rewrite-hosts.yml'
71 remove:
72 - '(.*\\.)?facebook.com$'
73 - ...
74 low_priority:
75 - '(.*\\.)?google(\\..*)?$'
76 - ...
77 high_priority:
78 - '(.*\\.)?wikipedia.org$'
79 - ...
80
81The ``rewrite-hosts.yml`` from the example above must be in the folder in which
82the ``settings.yml`` file is already located (``/etc/searxng``). The file then
83only contains the lists or the mapping tables without further information on the
84namespaces. In the example above, this would be a mapping table that looks
85something like this:
86
87.. code:: yaml
88
89 '(.*\\.)?youtube\\.com$': 'invidious.example.com'
90 '(.*\\.)?youtu\\.be$': 'invidious.example.com'
91
92"""
93
94import re
95from urllib.parse import urlunparse, urlparse
96
97from flask_babel import gettext
98
99from searx import settings
100from searx.plugins import logger
101from searx.settings_loader import get_yaml_cfg
102
103name = gettext('Hostnames plugin')
104description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname')
105default_on = False
106preference_section = 'general'
107
108plugin_id = 'hostnames'
109
110logger = logger.getChild(plugin_id)
111parsed = 'parsed_url'
112_url_fields = ['iframe_src', 'audio_src']
113
114
116 setting_value = settings.get(plugin_id, {}).get(settings_key)
117
118 if not setting_value:
119 return {}
120
121 # load external file with configuration
122 if isinstance(setting_value, str):
123 setting_value = get_yaml_cfg(setting_value)
124
125 if isinstance(setting_value, list):
126 return {re.compile(r) for r in setting_value}
127
128 if isinstance(setting_value, dict):
129 return {re.compile(p): r for (p, r) in setting_value.items()}
130
131 return {}
132
133
134replacements = _load_regular_expressions('replace')
135removables = _load_regular_expressions('remove')
136high_priority = _load_regular_expressions('high_priority')
137low_priority = _load_regular_expressions('low_priority')
138
139
140def _matches_parsed_url(result, pattern):
141 return parsed in result and pattern.search(result[parsed].netloc)
142
143
144def on_result(_request, _search, result):
145 for pattern, replacement in replacements.items():
146 if _matches_parsed_url(result, pattern):
147 # logger.debug(result['url'])
148 result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
149 result['url'] = urlunparse(result[parsed])
150 # logger.debug(result['url'])
151
152 for url_field in _url_fields:
153 if not result.get(url_field):
154 continue
155
156 url_src = urlparse(result[url_field])
157 if pattern.search(url_src.netloc):
158 url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc))
159 result[url_field] = urlunparse(url_src)
160
161 for pattern in removables:
162 if _matches_parsed_url(result, pattern):
163 return False
164
165 for url_field in _url_fields:
166 if not result.get(url_field):
167 continue
168
169 url_src = urlparse(result[url_field])
170 if pattern.search(url_src.netloc):
171 del result[url_field]
172
173 for pattern in low_priority:
174 if _matches_parsed_url(result, pattern):
175 result['priority'] = 'low'
176
177 for pattern in high_priority:
178 if _matches_parsed_url(result, pattern):
179 result['priority'] = 'high'
180
181 return True
on_result(_request, _search, result)
Definition hostnames.py:144
_matches_parsed_url(result, pattern)
Definition hostnames.py:140
_load_regular_expressions(settings_key)
Definition hostnames.py:115