.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
results.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=missing-module-docstring, missing-class-docstring
3from __future__ import annotations
4
5import warnings
6from collections import defaultdict
7from threading import RLock
8from typing import List, NamedTuple, Set
9
10from searx import logger as log
11import searx.engines
12from searx.metrics import histogram_observe, counter_add
13from searx.result_types import Result, LegacyResult, MainResult
14from searx.result_types.answer import AnswerSet, BaseAnswer
15
16
17def calculate_score(result, priority) -> float:
18 weight = 1.0
19
20 for result_engine in result['engines']:
21 if hasattr(searx.engines.engines.get(result_engine), 'weight'):
22 weight *= float(searx.engines.engines[result_engine].weight)
23
24 weight *= len(result['positions'])
25 score = 0
26
27 for position in result['positions']:
28 if priority == 'low':
29 continue
30 if priority == 'high':
31 score += weight
32 else:
33 score += weight / position
34
35 return score
36
37
38class Timing(NamedTuple):
39 engine: str
40 total: float
41 load: float
42
43
44class UnresponsiveEngine(NamedTuple):
45 engine: str
46 error_type: str
47 suspended: bool
48
49
51 """In the result container, the results are collected, sorted and duplicates
52 will be merged."""
53
54 # pylint: disable=too-many-statements
55
56 main_results_map: dict[int, MainResult | LegacyResult]
57 infoboxes: list[LegacyResult]
58 suggestions: set[str]
59 answers: AnswerSet
60 corrections: set[str]
61
62 def __init__(self):
63 self.main_results_map = {}
64 self.infoboxes = []
65 self.suggestions = set()
67 self.corrections = set()
68
69 self._number_of_results: list[int] = []
70 self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
71 self._closed: bool = False
72 self.paging: bool = False
73 self.unresponsive_engines: Set[UnresponsiveEngine] = set()
74 self.timings: List[Timing] = []
75 self.redirect_url: str | None = None
76 self.on_result = lambda _: True
77 self._lock = RLock()
78 self._main_results_sorted: list[MainResult | LegacyResult] = None # type: ignore
79
80 def extend(self, engine_name: str | None, results): # pylint: disable=too-many-branches
81 if self._closed:
82 log.debug("container is closed, ignoring results: %s", results)
83 return
84 main_count = 0
85
86 for result in list(results):
87
88 if isinstance(result, Result):
89 result.engine = result.engine or engine_name
90 result.normalize_result_fields()
91 if not self.on_result(result):
92 continue
93
94 if isinstance(result, BaseAnswer):
95 self.answers.add(result)
96 elif isinstance(result, MainResult):
97 main_count += 1
98 self._merge_main_result(result, main_count)
99 else:
100 # more types need to be implemented in the future ..
101 raise NotImplementedError(f"no handler implemented to process the result of type {result}")
102
103 else:
104 result["engine"] = result.get("engine") or engine_name or ""
105 result = LegacyResult(result) # for backward compatibility, will be romeved one day
106 result.normalize_result_fields()
107
108 if "suggestion" in result:
109 if self.on_result(result):
110 self.suggestions.add(result["suggestion"])
111 continue
112
113 if "answer" in result:
114 if self.on_result(result):
115 warnings.warn(
116 f"answer results from engine {result.engine}"
117 " are without typification / migrate to Answer class.",
118 DeprecationWarning,
119 )
120 self.answers.add(result) # type: ignore
121 continue
122
123 if "correction" in result:
124 if self.on_result(result):
125 self.corrections.add(result["correction"])
126 continue
127
128 if "infobox" in result:
129 if self.on_result(result):
130 self._merge_infobox(result)
131 continue
132
133 if "number_of_results" in result:
134 if self.on_result(result):
135 self._number_of_results.append(result["number_of_results"])
136 continue
137
138 if "engine_data" in result:
139 if self.on_result(result):
140 if result.engine:
141 self.engine_data[result.engine][result["key"]] = result["engine_data"]
142 continue
143
144 if self.on_result(result):
145 main_count += 1
146 self._merge_main_result(result, main_count)
147 continue
148
149 if engine_name in searx.engines.engines:
150 eng = searx.engines.engines[engine_name]
151 histogram_observe(main_count, "engine", eng.name, "result", "count")
152 if not self.paging and eng.paging:
153 self.paging = True
154
155 def _merge_infobox(self, new_infobox: LegacyResult):
156 add_infobox = True
157
158 new_id = getattr(new_infobox, "id", None)
159 if new_id is not None:
160 with self._lock:
161 for existing_infobox in self.infoboxes:
162 if new_id == getattr(existing_infobox, "id", None):
163 merge_two_infoboxes(existing_infobox, new_infobox)
164 add_infobox = False
165 if add_infobox:
166 self.infoboxes.append(new_infobox)
167
168 def _merge_main_result(self, result: MainResult | LegacyResult, position):
169 result_hash = hash(result)
170
171 with self._lock:
172
173 merged = self.main_results_map.get(result_hash)
174 if not merged:
175 # if there is no duplicate in the merged results, append result
176 result.positions = [position]
177 self.main_results_map[result_hash] = result
178 return
179
180 merge_two_main_results(merged, result)
181 # add the new position
182 merged.positions.append(position)
183
184 def close(self):
185 self._closed = True
186
187 for result in self.main_results_map.values():
188 result.score = calculate_score(result, result.priority)
189 for eng_name in result.engines:
190 counter_add(result.score, 'engine', eng_name, 'score')
191
192 def get_ordered_results(self) -> list[MainResult | LegacyResult]:
193 """Returns a sorted list of results to be displayed in the main result
194 area (:ref:`result types`)."""
195
196 if not self._closed:
197 self.close()
198
199 if self._main_results_sorted:
200 return self._main_results_sorted
201
202 # first pass, sort results by "score" (descanding)
203 results = sorted(self.main_results_map.values(), key=lambda x: x.score, reverse=True)
204
205 # pass 2 : group results by category and template
206 gresults = []
207 categoryPositions = {}
208 max_count = 8
209 max_distance = 20
210
211 for res in results:
212 # do we need to handle more than one category per engine?
213 engine = searx.engines.engines.get(res.engine or "")
214 if engine:
215 res.category = engine.categories[0] if len(engine.categories) > 0 else ""
216
217 # do we need to handle more than one category per engine?
218 category = f"{res.category}:{res.template}:{'img_src' if (res.thumbnail or res.img_src) else ''}"
219 grp = categoryPositions.get(category)
220
221 # group with previous results using the same category, if the group
222 # can accept more result and is not too far from the current
223 # position
224
225 if (grp is not None) and (grp["count"] > 0) and (len(gresults) - grp["index"] < max_distance):
226 # group with the previous results using the same category with
227 # this one
228 index = grp["index"]
229 gresults.insert(index, res)
230
231 # update every index after the current one (including the
232 # current one)
233 for item in categoryPositions.values():
234 v = item["index"]
235 if v >= index:
236 item["index"] = v + 1
237
238 # update this category
239 grp["count"] -= 1
240
241 else:
242 gresults.append(res)
243 # update categoryIndex
244 categoryPositions[category] = {"index": len(gresults), "count": max_count}
245 continue
246
247 self._main_results_sorted = gresults
248 return self._main_results_sorted
249
250 @property
251 def number_of_results(self) -> int:
252 """Returns the average of results number, returns zero if the average
253 result number is smaller than the actual result count."""
254
255 if not self._closed:
256 log.error("call to ResultContainer.number_of_results before ResultContainer.close")
257 return 0
258
259 with self._lock:
260 resultnum_sum = sum(self._number_of_results)
261 if not resultnum_sum or not self._number_of_results:
262 return 0
263
264 average = int(resultnum_sum / len(self._number_of_results))
265 if average < len(self.get_ordered_results()):
266 average = 0
267 return average
268
269 def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
270 with self._lock:
271 if self._closed:
272 log.error("call to ResultContainer.add_unresponsive_engine after ResultContainer.close")
273 return
274 if searx.engines.engines[engine_name].display_error_messages:
275 self.unresponsive_engines.add(UnresponsiveEngine(engine_name, error_type, suspended))
276
277 def add_timing(self, engine_name: str, engine_time: float, page_load_time: float):
278 with self._lock:
279 if self._closed:
280 log.error("call to ResultContainer.add_timing after ResultContainer.close")
281 return
282 self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
283
284 def get_timings(self):
285 with self._lock:
286 if not self._closed:
287 log.error("call to ResultContainer.get_timings before ResultContainer.close")
288 return []
289 return self.timings
290
291
292def merge_two_infoboxes(origin: LegacyResult, other: LegacyResult):
293 """Merges the values from ``other`` into ``origin``."""
294 # pylint: disable=too-many-branches
295 weight1 = getattr(searx.engines.engines[origin.engine], "weight", 1)
296 weight2 = getattr(searx.engines.engines[other.engine], "weight", 1)
297
298 if weight2 > weight1:
299 origin.engine = other.engine
300
301 origin.engines |= other.engines
302
303 if other.urls:
304 url_items = origin.get("urls", [])
305
306 for url2 in other.urls:
307 unique_url = True
308 entity_url2 = url2.get("entity")
309
310 for url1 in origin.get("urls", []):
311 if (entity_url2 is not None and entity_url2 == url1.get("entity")) or (
312 url1.get("url") == url2.get("url")
313 ):
314 unique_url = False
315 break
316 if unique_url:
317 url_items.append(url2)
318
319 origin.urls = url_items
320
321 if other.img_src:
322 if not origin.img_src:
323 origin.img_src = other.img_src
324 elif weight2 > weight1:
325 origin.img_src = other.img_src
326
327 if other.attributes:
328 if not origin.attributes:
329 origin.attributes = other.attributes
330 else:
331 attr_names_1 = set()
332 for attr in origin.attributes:
333 label = attr.get("label")
334 if label:
335 attr_names_1.add(label)
336
337 entity = attr.get("entity")
338 if entity:
339 attr_names_1.add(entity)
340
341 for attr in other.attributes:
342 if attr.get("label") not in attr_names_1 and attr.get('entity') not in attr_names_1:
343 origin.attributes.append(attr)
344
345 if other.content:
346 if not origin.content:
347 origin.content = other.content
348 elif len(other.content) > len(origin.content):
349 origin.content = other.content
350
351
352def merge_two_main_results(origin: MainResult | LegacyResult, other: MainResult | LegacyResult):
353 """Merges the values from ``other`` into ``origin``."""
354
355 if len(other.content) > len(origin.content):
356 # use content with more text
357 origin.content = other.content
358
359 # use title with more text
360 if len(other.title) > len(origin.title):
361 origin.title = other.title
362
363 # merge all result's parameters not found in origin
364 if isinstance(other, MainResult) and isinstance(origin, MainResult):
365 origin.defaults_from(other)
366 elif isinstance(other, LegacyResult) and isinstance(origin, LegacyResult):
367 origin.defaults_from(other)
368
369 # add engine to list of result-engines
370 origin.engines.add(other.engine or "")
371
372 # use https, ftps, .. if possible
373 if origin.parsed_url and not origin.parsed_url.scheme.endswith("s"):
374 if other.parsed_url and other.parsed_url.scheme.endswith("s"):
375 origin.parsed_url = origin.parsed_url._replace(scheme=other.parsed_url.scheme)
376 origin.url = origin.parsed_url.geturl()
add_unresponsive_engine(self, str engine_name, str error_type, bool suspended=False)
Definition results.py:269
dict[str, dict[str, str]] engine_data
Definition results.py:70
extend(self, str|None engine_name, results)
Definition results.py:80
list[MainResult|LegacyResult] _main_results_sorted
Definition results.py:78
Set[UnresponsiveEngine] unresponsive_engines
Definition results.py:73
_merge_main_result(self, MainResult|LegacyResult result, position)
Definition results.py:168
_merge_infobox(self, LegacyResult new_infobox)
Definition results.py:155
list[MainResult|LegacyResult] get_ordered_results(self)
Definition results.py:192
add_timing(self, str engine_name, float engine_time, float page_load_time)
Definition results.py:277
::1337x
Definition 1337x.py:1
merge_two_infoboxes(LegacyResult origin, LegacyResult other)
Definition results.py:292
merge_two_main_results(MainResult|LegacyResult origin, MainResult|LegacyResult other)
Definition results.py:352
float calculate_score(result, priority)
Definition results.py:17