.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
mullvad_leta.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Mullvad Leta is a search engine proxy. Currently Leta only offers text
3search results not image, news or any other types of search result. Leta acts
4as a proxy to Google and Brave search results. You can select which backend
5search engine you wish to use, see (:py:obj:`leta_engine`).
6
7.. hint::
8
9 Leta caches each search for up to 30 days. For example, if you use search
10 terms like ``news``, contrary to your intention you'll get very old results!
11
12
13Configuration
14=============
15
16The engine has the following additional settings:
17
18- :py:obj:`leta_engine` (:py:obj:`LetaEnginesType`)
19
20You can configure one Leta engine for Google and one for Brave:
21
22.. code:: yaml
23
24 - name: mullvadleta
25 engine: mullvad_leta
26 leta_engine: google
27 shortcut: ml
28
29 - name: mullvadleta brave
30 engine: mullvad_leta
31 network: mullvadleta # use network from engine "mullvadleta" configured above
32 leta_engine: brave
33 shortcut: mlb
34
35Implementations
36===============
37
38"""
39import typing as t
40
41from urllib.parse import urlencode
42import babel
43from httpx import Response
44from lxml import html
45from searx.enginelib.traits import EngineTraits
46from searx.locales import get_official_locales, language_tag, region_tag
47from searx.utils import eval_xpath_list
48from searx.result_types import EngineResults, MainResult
49
50search_url = "https://leta.mullvad.net"
51
52# about
53about = {
54 "website": search_url,
55 "wikidata_id": 'Q47008412', # the Mullvad id - not leta, but related
56 "official_api_documentation": 'https://leta.mullvad.net/faq',
57 "use_official_api": False,
58 "require_api_key": False,
59 "results": 'HTML',
60}
61
62# engine dependent config
63categories = ["general", "web"]
64paging = True
65max_page = 10
66time_range_support = True
67time_range_dict = {
68 "day": "d",
69 "week": "w",
70 "month": "m",
71 "year": "y",
72}
73
74LetaEnginesType = t.Literal["google", "brave"]
75"""Engine types supported by mullvadleta."""
76
77leta_engine: LetaEnginesType = "google"
78"""Select Leta's engine type from :py:obj:`LetaEnginesType`."""
79
80
81def init(_):
82 l = t.get_args(LetaEnginesType)
83 if leta_engine not in l:
84 raise ValueError(f"leta_engine '{leta_engine}' is invalid, use one of {', '.join(l)}")
85
86
88 """Indices into query metadata."""
89
90 success: int
91 q: int # pylint: disable=invalid-name
92 country: int
93 language: int
94 lastUpdated: int
95 engine: int
96 items: int
97 infobox: int
98 news: int
99 timestamp: int
100 altered: int
101 page: int
102 next: int # if -1, there no more results are available
103 previous: int
104
105
106class DataNodeResultIndices(t.TypedDict):
107 """Indices into query resultsdata."""
108
109 link: int
110 snippet: int
111 title: int
112 favicon: int
113
114
115def request(query: str, params: dict):
116 params["method"] = "GET"
117 args = {
118 "q": query,
119 "engine": leta_engine,
120 "x-sveltekit-invalidated": "001", # hardcoded from all requests seen
121 }
122
123 country = traits.get_region(params.get("searxng_locale"), traits.all_locale) # type: ignore
124 if country:
125 args["country"] = country
126
127 language = traits.get_language(params.get("searxng_locale"), traits.all_locale) # type: ignore
128 if language:
129 args["language"] = language
130
131 if params["time_range"] in time_range_dict:
132 args["lastUpdated"] = time_range_dict[params["time_range"]]
133
134 if params["pageno"] > 1:
135 args["page"] = params["pageno"]
136
137 params["url"] = f"{search_url}/search/__data.json?{urlencode(args)}"
138
139 return params
140
141
142def response(resp: Response) -> EngineResults:
143 json_response = resp.json()
144
145 nodes = json_response["nodes"]
146 # 0: is None
147 # 1: has "connected=True", not useful
148 # 2: query results within "data"
149
150 data_nodes = nodes[2]["data"]
151 # Instead of nested object structure, all objects are flattened into a
152 # list. Rather, the first object in data_node provides indices into the
153 # "data_nodes" to access each searchresult (which is an object of more
154 # indices)
155 #
156 # Read the relative TypedDict definitions for details
157
158 query_meta_data: DataNodeQueryMetaDataIndices = data_nodes[0]
159
160 query_items_indices = query_meta_data["items"]
161
162 results = EngineResults()
163 for idx in data_nodes[query_items_indices]:
164 query_item_indices: DataNodeResultIndices = data_nodes[idx]
165 results.add(
167 url=data_nodes[query_item_indices["link"]],
168 title=data_nodes[query_item_indices["title"]],
169 content=data_nodes[query_item_indices["snippet"]],
170 )
171 )
172
173 return results
174
175
176def fetch_traits(engine_traits: EngineTraits) -> None:
177 """Fetch languages and regions from Mullvad-Leta"""
178
179 def extract_table_data(table):
180 for row in table.xpath(".//tr")[2:]:
181 cells = row.xpath(".//td | .//th") # includes headers and data
182 if len(cells) > 1: # ensure the column exists
183 cell0 = cells[0].text_content().strip()
184 cell1 = cells[1].text_content().strip()
185 yield [cell0, cell1]
186
187 # pylint: disable=import-outside-toplevel
188 # see https://github.com/searxng/searxng/issues/762
189 from searx.network import get as http_get
190
191 # pylint: enable=import-outside-toplevel
192
193 resp = http_get(f"{search_url}/documentation")
194 if not isinstance(resp, Response):
195 print("ERROR: failed to get response from mullvad-leta. Are you connected to the VPN?")
196 return
197 if not resp.ok:
198 print("ERROR: response from mullvad-leta is not OK. Are you connected to the VPN?")
199 return
200
201 dom = html.fromstring(resp.text)
202
203 # There are 4 HTML tables on the documentation page for extracting information:
204 # 0. Keyboard Shortcuts
205 # 1. Query Parameters (shoutout to Mullvad for accessible docs for integration)
206 # 2. Country Codes [Country, Code]
207 # 3. Language Codes [Language, Code]
208 tables = eval_xpath_list(dom.body, "//table")
209 if tables is None or len(tables) <= 0:
210 print("ERROR: could not find any tables. Was the page updated?")
211
212 language_table = tables[3]
213 lang_map = {
214 "zh-hant": "zh_Hans",
215 "zh-hans": "zh_Hant",
216 "jp": "ja",
217 }
218
219 for language, code in extract_table_data(language_table):
220
221 locale_tag = lang_map.get(code, code).replace("-", "_") # type: ignore
222 try:
223 locale = babel.Locale.parse(locale_tag)
224 except babel.UnknownLocaleError:
225 print(f"ERROR: Mullvad-Leta language {language} ({code}) is unknown by babel")
226 continue
227
228 sxng_tag = language_tag(locale)
229 engine_traits.languages[sxng_tag] = code
230
231 country_table = tables[2]
232 country_map = {
233 "cn": "zh-CN",
234 "hk": "zh-HK",
235 "jp": "ja-JP",
236 "my": "ms-MY",
237 "tw": "zh-TW",
238 "uk": "en-GB",
239 "us": "en-US",
240 }
241
242 for country, code in extract_table_data(country_table):
243
244 sxng_tag = country_map.get(code)
245 if sxng_tag:
246 engine_traits.regions[sxng_tag] = code
247 continue
248
249 try:
250 locale = babel.Locale.parse(f"{code.lower()}_{code.upper()}")
251 except babel.UnknownLocaleError:
252 locale = None
253
254 if locale:
255 engine_traits.regions[region_tag(locale)] = code
256 continue
257
258 official_locales = get_official_locales(code, engine_traits.languages.keys(), regional=True)
259 if not official_locales:
260 print(f"ERROR: Mullvad-Leta country '{code}' ({country}) could not be mapped as expected.")
261 continue
262
263 for locale in official_locales:
264 engine_traits.regions[region_tag(locale)] = code
request(str query, dict params)
EngineResults response(Response resp)
None fetch_traits(EngineTraits engine_traits)