2"""Presearch supports the search types listed in :py:obj:`search_type` (general,
5Configured ``presarch`` engines:
12 categories: [general, web]
14 - name: presearch images
17 categories: [images, web]
19 - name: presearch videos
22 categories: [general, web]
24 - name: presearch news
27 categories: [news, web]
31 By default Presearch's video category is intentionally placed into::
33 categories: [general, web]
39The results in the video category are most often links to pages that contain a
40video, for instance many links from Preasearch's video category link content
41from facebook (aka Meta) or Twitter (aka X). Since these are not real links to
42video streams SearXNG can't use the video template for this and if SearXNG can't
43use this template, then the user doesn't want to see these hits in the videos
50In Presearch there are languages for the UI and regions for narrowing down the
51search. If we set "auto" for the region in the WEB-UI of Presearch and cookie
52``use_local_search_results=false``, then the defaults are set for both (the
53language and the region) from the ``Accept-Language`` header.
55Since the region is already "auto" by default, we only need to set the
56``use_local_search_results`` cookie and send the ``Accept-Language`` header. We
57have to set these values in both requests we send to Presearch; in the first
58request to get the request-ID from Presearch and in the final request to get the
59result list (see ``send_accept_language_header``).
67from urllib.parse
import urlencode
68from searx
import locales
73 "website":
"https://presearch.io",
74 "wikidiata_id":
"Q7240905",
75 "official_api_documentation":
"https://docs.presearch.io/nodes/api",
76 "use_official_api":
False,
77 "require_api_key":
False,
82time_range_support =
True
83send_accept_language_header =
True
84categories = [
"general",
"web"]
87"""must be any of ``search``, ``images``, ``videos``, ``news``"""
89base_url =
"https://presearch.com"
90safesearch_map = {0:
'false', 1:
'true', 2:
'true'}
94 if search_type
not in [
'search',
'images',
'videos',
'news']:
95 raise ValueError(f
'presearch search_type: {search_type}')
102 "page": params[
"pageno"],
105 if params[
"time_range"]:
106 args[
"time"] = params[
"time_range"]
108 url = f
"{base_url}/{search_type}?{urlencode(args)}"
111 'User-Agent': gen_useragent(),
114 f
" presearch_session=;"
115 f
" use_local_search_results=false;"
116 f
" use_safe_search={safesearch_map[params['safesearch']]}"
119 if params[
'searxng_locale'] !=
'all':
120 l = locales.get_locale(params[
'searxng_locale'])
132 headers[
'Accept-Language'] = f
"{l.language}-{l.territory},{l.language};" "q=0.9,*;" "q=0.5"
134 resp_text = get(url, headers=headers).text
136 for line
in resp_text.split(
"\n"):
137 if "window.searchId = " in line:
138 return line.split(
"= ")[1][:-1].replace(
'"',
"")
145 params[
"headers"][
"Accept"] =
"application/json"
146 params[
"url"] = f
"{base_url}/results?id={request_id}"
152 for x
in [
'wikipedia',
'google']:
153 if text.lower().endswith(x):
154 text = text[: -len(x)]
161 for item
in json_results.get(
'specialSections', {}).get(
'topStoriesCompact', {}).get(
'data', []):
164 'title': item[
'title'],
165 'thumbnail': item[
'image'],
167 'metadata': item.get(
'source'),
169 results.append(result)
171 for item
in json_results.get(
'standardResults', []):
174 'title': item[
'title'],
175 'content': html_to_text(item[
'description']),
177 results.append(result)
179 info = json_results.get(
'infoSection', {}).get(
'data')
182 for item
in info.get(
'about', []):
184 text = html_to_text(item)
187 label, value = text.split(
':', 1)
191 label, value = text.split(
' ', 1)
195 attributes.append({
'label': label,
'value': value})
197 for item
in [info.get(
'subtitle'), info.get(
'description')]:
206 'infobox': info[
'title'],
208 'img_src': info.get(
'image'),
209 'content':
' | '.join(content),
210 'attributes': attributes,
218 json_resp = resp.json()
220 if search_type ==
'search':
223 elif search_type ==
'images':
224 for item
in json_resp.get(
'images', []):
227 'template':
'images.html',
228 'title': item[
'title'],
229 'url': item.get(
'link'),
230 'img_src': item.get(
'image'),
231 'thumbnail_src': item.get(
'thumbnail'),
235 elif search_type ==
'videos':
239 for item
in json_resp.get(
'videos', []):
240 metadata = [x
for x
in [item.get(
'description'), item.get(
'duration')]
if x]
243 'title': item[
'title'],
244 'url': item.get(
'link'),
246 'metadata':
' / '.join(metadata),
247 'thumbnail': item.get(
'image'),
251 elif search_type ==
'news':
252 for item
in json_resp.get(
'news', []):
253 metadata = [x
for x
in [item.get(
'source'), item.get(
'time')]
if x]
256 'title': item[
'title'],
257 'url': item.get(
'link'),
258 'content': item.get(
'description',
''),
259 'metadata':
' / '.join(metadata),
260 'thumbnail': item.get(
'image'),
parse_search_query(json_results)
_strip_leading_strings(text)
_get_request_id(query, params)