.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
mullvad_leta.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Mullvad Leta is a search engine proxy. Currently Leta only offers text
3search results not image, news or any other types of search result. Leta acts
4as a proxy to Google and Brave search results. You can select which backend
5search engine you wish to use, see (:py:obj:`leta_engine`).
6
7.. hint::
8
9 Leta caches each search for up to 30 days. For example, if you use search
10 terms like ``news``, contrary to your intention you'll get very old results!
11
12
13Configuration
14=============
15
16The engine has the following additional settings:
17
18- :py:obj:`leta_engine` (:py:obj:`LetaEnginesType`)
19
20You can configure one Leta engine for Google and one for Brave:
21
22.. code:: yaml
23
24 - name: mullvadleta
25 engine: mullvad_leta
26 leta_engine: google
27 shortcut: ml
28
29 - name: mullvadleta brave
30 engine: mullvad_leta
31 network: mullvadleta # use network from engine "mullvadleta" configured above
32 leta_engine: brave
33 shortcut: mlb
34
35Implementations
36===============
37
38"""
39
40from __future__ import annotations
41
42import typing
43from urllib.parse import urlencode
44import babel
45from httpx import Response
46from lxml import html
47from searx.enginelib.traits import EngineTraits
48from searx.locales import get_official_locales, language_tag, region_tag
49from searx.utils import eval_xpath_list
50from searx.result_types import EngineResults, MainResult
51
52if typing.TYPE_CHECKING:
53 import logging
54
55 logger = logging.getLogger()
56
57traits: EngineTraits
58
59search_url = "https://leta.mullvad.net"
60
61# about
62about = {
63 "website": search_url,
64 "wikidata_id": 'Q47008412', # the Mullvad id - not leta, but related
65 "official_api_documentation": 'https://leta.mullvad.net/faq',
66 "use_official_api": False,
67 "require_api_key": False,
68 "results": 'HTML',
69}
70
71# engine dependent config
72categories = ["general", "web"]
73paging = True
74max_page = 10
75time_range_support = True
76time_range_dict = {
77 "day": "d",
78 "week": "w",
79 "month": "m",
80 "year": "y",
81}
82
83LetaEnginesType = typing.Literal["google", "brave"]
84"""Engine types supported by mullvadleta."""
85
86leta_engine: LetaEnginesType = "google"
87"""Select Leta's engine type from :py:obj:`LetaEnginesType`."""
88
89
90def init(_):
91 l = typing.get_args(LetaEnginesType)
92 if leta_engine not in l:
93 raise ValueError(f"leta_engine '{leta_engine}' is invalid, use one of {', '.join(l)}")
94
95
96class DataNodeQueryMetaDataIndices(typing.TypedDict):
97 """Indices into query metadata."""
98
99 success: int
100 q: int # pylint: disable=invalid-name
101 country: int
102 language: int
103 lastUpdated: int
104 engine: int
105 items: int
106 infobox: int
107 news: int
108 timestamp: int
109 altered: int
110 page: int
111 next: int # if -1, there no more results are available
112 previous: int
113
114
115class DataNodeResultIndices(typing.TypedDict):
116 """Indices into query resultsdata."""
117
118 link: int
119 snippet: int
120 title: int
121 favicon: int
122
123
124def request(query: str, params: dict):
125 params["method"] = "GET"
126 args = {
127 "q": query,
128 "engine": leta_engine,
129 "x-sveltekit-invalidated": "001", # hardcoded from all requests seen
130 }
131
132 country = traits.get_region(params.get("searxng_locale"), traits.all_locale) # type: ignore
133 if country:
134 args["country"] = country
135
136 language = traits.get_language(params.get("searxng_locale"), traits.all_locale) # type: ignore
137 if language:
138 args["language"] = language
139
140 if params["time_range"] in time_range_dict:
141 args["lastUpdated"] = time_range_dict[params["time_range"]]
142
143 if params["pageno"] > 1:
144 args["page"] = params["pageno"]
145
146 params["url"] = f"{search_url}/search/__data.json?{urlencode(args)}"
147
148 return params
149
150
151def response(resp: Response) -> EngineResults:
152 json_response = resp.json()
153
154 nodes = json_response["nodes"]
155 # 0: is None
156 # 1: has "connected=True", not useful
157 # 2: query results within "data"
158
159 data_nodes = nodes[2]["data"]
160 # Instead of nested object structure, all objects are flattened into a
161 # list. Rather, the first object in data_node provides indices into the
162 # "data_nodes" to access each searchresult (which is an object of more
163 # indices)
164 #
165 # Read the relative TypedDict definitions for details
166
167 query_meta_data: DataNodeQueryMetaDataIndices = data_nodes[0]
168
169 query_items_indices = query_meta_data["items"]
170
171 results = EngineResults()
172 for idx in data_nodes[query_items_indices]:
173 query_item_indices: DataNodeResultIndices = data_nodes[idx]
174 results.add(
176 url=data_nodes[query_item_indices["link"]],
177 title=data_nodes[query_item_indices["title"]],
178 content=data_nodes[query_item_indices["snippet"]],
179 )
180 )
181
182 return results
183
184
185def fetch_traits(engine_traits: EngineTraits) -> None:
186 """Fetch languages and regions from Mullvad-Leta"""
187
188 def extract_table_data(table):
189 for row in table.xpath(".//tr")[2:]:
190 cells = row.xpath(".//td | .//th") # includes headers and data
191 if len(cells) > 1: # ensure the column exists
192 cell0 = cells[0].text_content().strip()
193 cell1 = cells[1].text_content().strip()
194 yield [cell0, cell1]
195
196 # pylint: disable=import-outside-toplevel
197 # see https://github.com/searxng/searxng/issues/762
198 from searx.network import get as http_get
199
200 # pylint: enable=import-outside-toplevel
201
202 resp = http_get(f"{search_url}/documentation")
203 if not isinstance(resp, Response):
204 print("ERROR: failed to get response from mullvad-leta. Are you connected to the VPN?")
205 return
206 if not resp.ok:
207 print("ERROR: response from mullvad-leta is not OK. Are you connected to the VPN?")
208 return
209
210 dom = html.fromstring(resp.text)
211
212 # There are 4 HTML tables on the documentation page for extracting information:
213 # 0. Keyboard Shortcuts
214 # 1. Query Parameters (shoutout to Mullvad for accessible docs for integration)
215 # 2. Country Codes [Country, Code]
216 # 3. Language Codes [Language, Code]
217 tables = eval_xpath_list(dom.body, "//table")
218 if tables is None or len(tables) <= 0:
219 print("ERROR: could not find any tables. Was the page updated?")
220
221 language_table = tables[3]
222 lang_map = {
223 "zh-hant": "zh_Hans",
224 "zh-hans": "zh_Hant",
225 "jp": "ja",
226 }
227
228 for language, code in extract_table_data(language_table):
229
230 locale_tag = lang_map.get(code, code).replace("-", "_") # type: ignore
231 try:
232 locale = babel.Locale.parse(locale_tag)
233 except babel.UnknownLocaleError:
234 print(f"ERROR: Mullvad-Leta language {language} ({code}) is unknown by babel")
235 continue
236
237 sxng_tag = language_tag(locale)
238 engine_traits.languages[sxng_tag] = code
239
240 country_table = tables[2]
241 country_map = {
242 "cn": "zh-CN",
243 "hk": "zh-HK",
244 "jp": "ja-JP",
245 "my": "ms-MY",
246 "tw": "zh-TW",
247 "uk": "en-GB",
248 "us": "en-US",
249 }
250
251 for country, code in extract_table_data(country_table):
252
253 sxng_tag = country_map.get(code)
254 if sxng_tag:
255 engine_traits.regions[sxng_tag] = code
256 continue
257
258 try:
259 locale = babel.Locale.parse(f"{code.lower()}_{code.upper()}")
260 except babel.UnknownLocaleError:
261 locale = None
262
263 if locale:
264 engine_traits.regions[region_tag(locale)] = code
265 continue
266
267 official_locales = get_official_locales(code, engine_traits.languages.keys(), regional=True)
268 if not official_locales:
269 print(f"ERROR: Mullvad-Leta country '{code}' ({country}) could not be mapped as expected.")
270 continue
271
272 for locale in official_locales:
273 engine_traits.regions[region_tag(locale)] = code
request(str query, dict params)
EngineResults response(Response resp)
None fetch_traits(EngineTraits engine_traits)