.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
results.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=missing-module-docstring, missing-class-docstring
3from __future__ import annotations
4
5import warnings
6from collections import defaultdict
7from threading import RLock
8from typing import List, NamedTuple, Set
9
10from searx import logger as log
11import searx.engines
12from searx.metrics import histogram_observe, counter_add
13from searx.result_types import Result, LegacyResult, MainResult
14from searx.result_types.answer import AnswerSet, BaseAnswer
15
16
17def calculate_score(result, priority) -> float:
18 weight = 1.0
19
20 for result_engine in result['engines']:
21 if hasattr(searx.engines.engines.get(result_engine), 'weight'):
22 weight *= float(searx.engines.engines[result_engine].weight)
23
24 weight *= len(result['positions'])
25 score = 0
26
27 for position in result['positions']:
28 if priority == 'low':
29 continue
30 if priority == 'high':
31 score += weight
32 else:
33 score += weight / position
34
35 return score
36
37
38class Timing(NamedTuple):
39 engine: str
40 total: float
41 load: float
42
43
44class UnresponsiveEngine(NamedTuple):
45 engine: str
46 error_type: str
47 suspended: bool
48
49
51 """In the result container, the results are collected, sorted and duplicates
52 will be merged."""
53
54 # pylint: disable=too-many-statements
55
56 main_results_map: dict[int, MainResult | LegacyResult]
57 infoboxes: list[LegacyResult]
58 suggestions: set[str]
59 answers: AnswerSet
60 corrections: set[str]
61
62 def __init__(self):
63 self.main_results_map = {}
64 self.infoboxes = []
65 self.suggestions = set()
67 self.corrections = set()
68
69 self._number_of_results: list[int] = []
70 self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
71 self._closed: bool = False
72 self.paging: bool = False
73 self.unresponsive_engines: Set[UnresponsiveEngine] = set()
74 self.timings: List[Timing] = []
75 self.redirect_url: str | None = None
76 self.on_result = lambda _: True
77 self._lock = RLock()
78 self._main_results_sorted: list[MainResult | LegacyResult] = None # type: ignore
79
80 def extend(self, engine_name: str | None, results): # pylint: disable=too-many-branches
81 if self._closed:
82 log.debug("container is closed, ignoring results: %s", results)
83 return
84 main_count = 0
85
86 for result in list(results):
87
88 if isinstance(result, Result):
89 result.engine = result.engine or engine_name
90 result.normalize_result_fields()
91
92 if isinstance(result, BaseAnswer) and self.on_result(result):
93 self.answers.add(result)
94 elif isinstance(result, MainResult) and self.on_result(result):
95 main_count += 1
96 self._merge_main_result(result, main_count)
97 else:
98 # more types need to be implemented in the future ..
99 raise NotImplementedError(f"no handler implemented to process the result of type {result}")
100
101 else:
102 result["engine"] = result.get("engine") or engine_name or ""
103 result = LegacyResult(result) # for backward compatibility, will be romeved one day
104 result.normalize_result_fields()
105
106 if "suggestion" in result:
107 if self.on_result(result):
108 self.suggestions.add(result["suggestion"])
109 continue
110
111 if "answer" in result:
112 if self.on_result(result):
113 warnings.warn(
114 f"answer results from engine {result.engine}"
115 " are without typification / migrate to Answer class.",
116 DeprecationWarning,
117 )
118 self.answers.add(result) # type: ignore
119 continue
120
121 if "correction" in result:
122 if self.on_result(result):
123 self.corrections.add(result["correction"])
124 continue
125
126 if "infobox" in result:
127 if self.on_result(result):
128 self._merge_infobox(result)
129 continue
130
131 if "number_of_results" in result:
132 if self.on_result(result):
133 self._number_of_results.append(result["number_of_results"])
134 continue
135
136 if "engine_data" in result:
137 if self.on_result(result):
138 if result.engine:
139 self.engine_data[result.engine][result["key"]] = result["engine_data"]
140 continue
141
142 if self.on_result(result):
143 main_count += 1
144 self._merge_main_result(result, main_count)
145 continue
146
147 if engine_name in searx.engines.engines:
148 eng = searx.engines.engines[engine_name]
149 histogram_observe(main_count, "engine", eng.name, "result", "count")
150 if not self.paging and eng.paging:
151 self.paging = True
152
153 def _merge_infobox(self, new_infobox: LegacyResult):
154 add_infobox = True
155
156 new_id = getattr(new_infobox, "id", None)
157 if new_id is not None:
158 with self._lock:
159 for existing_infobox in self.infoboxes:
160 if new_id == getattr(existing_infobox, "id", None):
161 merge_two_infoboxes(existing_infobox, new_infobox)
162 add_infobox = False
163 if add_infobox:
164 self.infoboxes.append(new_infobox)
165
166 def _merge_main_result(self, result: MainResult | LegacyResult, position):
167 result_hash = hash(result)
168
169 with self._lock:
170
171 merged = self.main_results_map.get(result_hash)
172 if not merged:
173 # if there is no duplicate in the merged results, append result
174 result.positions = [position]
175 self.main_results_map[result_hash] = result
176 return
177
178 merge_two_main_results(merged, result)
179 # add the new position
180 merged.positions.append(position)
181
182 def close(self):
183 self._closed = True
184
185 for result in self.main_results_map.values():
186 result.score = calculate_score(result, result.priority)
187 for eng_name in result.engines:
188 counter_add(result.score, 'engine', eng_name, 'score')
189
190 def get_ordered_results(self) -> list[MainResult | LegacyResult]:
191 """Returns a sorted list of results to be displayed in the main result
192 area (:ref:`result types`)."""
193
194 if not self._closed:
195 self.close()
196
197 if self._main_results_sorted:
198 return self._main_results_sorted
199
200 # first pass, sort results by "score" (descanding)
201 results = sorted(self.main_results_map.values(), key=lambda x: x.score, reverse=True)
202
203 # pass 2 : group results by category and template
204 gresults = []
205 categoryPositions = {}
206 max_count = 8
207 max_distance = 20
208
209 for res in results:
210 # do we need to handle more than one category per engine?
211 engine = searx.engines.engines.get(res.engine or "")
212 if engine:
213 res.category = engine.categories[0] if len(engine.categories) > 0 else ""
214
215 # do we need to handle more than one category per engine?
216 category = f"{res.category}:{res.template}:{'img_src' if (res.thumbnail or res.img_src) else ''}"
217 grp = categoryPositions.get(category)
218
219 # group with previous results using the same category, if the group
220 # can accept more result and is not too far from the current
221 # position
222
223 if (grp is not None) and (grp["count"] > 0) and (len(gresults) - grp["index"] < max_distance):
224 # group with the previous results using the same category with
225 # this one
226 index = grp["index"]
227 gresults.insert(index, res)
228
229 # update every index after the current one (including the
230 # current one)
231 for item in categoryPositions.values():
232 v = item["index"]
233 if v >= index:
234 item["index"] = v + 1
235
236 # update this category
237 grp["count"] -= 1
238
239 else:
240 gresults.append(res)
241 # update categoryIndex
242 categoryPositions[category] = {"index": len(gresults), "count": max_count}
243 continue
244
245 self._main_results_sorted = gresults
246 return self._main_results_sorted
247
248 @property
249 def number_of_results(self) -> int:
250 """Returns the average of results number, returns zero if the average
251 result number is smaller than the actual result count."""
252
253 if not self._closed:
254 log.error("call to ResultContainer.number_of_results before ResultContainer.close")
255 return 0
256
257 with self._lock:
258 resultnum_sum = sum(self._number_of_results)
259 if not resultnum_sum or not self._number_of_results:
260 return 0
261
262 average = int(resultnum_sum / len(self._number_of_results))
263 if average < len(self.get_ordered_results()):
264 average = 0
265 return average
266
267 def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
268 with self._lock:
269 if self._closed:
270 log.error("call to ResultContainer.add_unresponsive_engine after ResultContainer.close")
271 return
272 if searx.engines.engines[engine_name].display_error_messages:
273 self.unresponsive_engines.add(UnresponsiveEngine(engine_name, error_type, suspended))
274
275 def add_timing(self, engine_name: str, engine_time: float, page_load_time: float):
276 with self._lock:
277 if self._closed:
278 log.error("call to ResultContainer.add_timing after ResultContainer.close")
279 return
280 self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
281
282 def get_timings(self):
283 with self._lock:
284 if not self._closed:
285 log.error("call to ResultContainer.get_timings before ResultContainer.close")
286 return []
287 return self.timings
288
289
290def merge_two_infoboxes(origin: LegacyResult, other: LegacyResult):
291 """Merges the values from ``other`` into ``origin``."""
292 # pylint: disable=too-many-branches
293 weight1 = getattr(searx.engines.engines[origin.engine], "weight", 1)
294 weight2 = getattr(searx.engines.engines[other.engine], "weight", 1)
295
296 if weight2 > weight1:
297 origin.engine = other.engine
298
299 origin.engines |= other.engines
300
301 if other.urls:
302 url_items = origin.get("urls", [])
303
304 for url2 in other.urls:
305 unique_url = True
306 entity_url2 = url2.get("entity")
307
308 for url1 in origin.get("urls", []):
309 if (entity_url2 is not None and entity_url2 == url1.get("entity")) or (
310 url1.get("url") == url2.get("url")
311 ):
312 unique_url = False
313 break
314 if unique_url:
315 url_items.append(url2)
316
317 origin.urls = url_items
318
319 if other.img_src:
320 if not origin.img_src:
321 origin.img_src = other.img_src
322 elif weight2 > weight1:
323 origin.img_src = other.img_src
324
325 if other.attributes:
326 if not origin.attributes:
327 origin.attributes = other.attributes
328 else:
329 attr_names_1 = set()
330 for attr in origin.attributes:
331 label = attr.get("label")
332 if label:
333 attr_names_1.add(label)
334
335 entity = attr.get("entity")
336 if entity:
337 attr_names_1.add(entity)
338
339 for attr in other.attributes:
340 if attr.get("label") not in attr_names_1 and attr.get('entity') not in attr_names_1:
341 origin.attributes.append(attr)
342
343 if other.content:
344 if not origin.content:
345 origin.content = other.content
346 elif len(other.content) > len(origin.content):
347 origin.content = other.content
348
349
350def merge_two_main_results(origin: MainResult | LegacyResult, other: MainResult | LegacyResult):
351 """Merges the values from ``other`` into ``origin``."""
352
353 if len(other.content) > len(origin.content):
354 # use content with more text
355 origin.content = other.content
356
357 # use title with more text
358 if len(other.title) > len(origin.title):
359 origin.title = other.title
360
361 # merge all result's parameters not found in origin
362 if isinstance(other, MainResult) and isinstance(origin, MainResult):
363 origin.defaults_from(other)
364 elif isinstance(other, LegacyResult) and isinstance(origin, LegacyResult):
365 origin.defaults_from(other)
366
367 # add engine to list of result-engines
368 origin.engines.add(other.engine or "")
369
370 # use https, ftps, .. if possible
371 if origin.parsed_url and not origin.parsed_url.scheme.endswith("s"):
372 if other.parsed_url and other.parsed_url.scheme.endswith("s"):
373 origin.parsed_url = origin.parsed_url._replace(scheme=other.parsed_url.scheme)
374 origin.url = origin.parsed_url.geturl()
add_unresponsive_engine(self, str engine_name, str error_type, bool suspended=False)
Definition results.py:267
dict[str, dict[str, str]] engine_data
Definition results.py:70
extend(self, str|None engine_name, results)
Definition results.py:80
list[MainResult|LegacyResult] _main_results_sorted
Definition results.py:78
Set[UnresponsiveEngine] unresponsive_engines
Definition results.py:73
_merge_main_result(self, MainResult|LegacyResult result, position)
Definition results.py:166
_merge_infobox(self, LegacyResult new_infobox)
Definition results.py:153
list[MainResult|LegacyResult] get_ordered_results(self)
Definition results.py:190
add_timing(self, str engine_name, float engine_time, float page_load_time)
Definition results.py:275
::1337x
Definition 1337x.py:1
merge_two_infoboxes(LegacyResult origin, LegacyResult other)
Definition results.py:290
merge_two_main_results(MainResult|LegacyResult origin, MainResult|LegacyResult other)
Definition results.py:350
float calculate_score(result, priority)
Definition results.py:17