.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.results.ResultContainer Class Reference
+ Collaboration diagram for searx.results.ResultContainer:

Public Member Functions

 __init__ (self)
 
 extend (self, str|None engine_name, results)
 
 close (self)
 
 get_ordered_results (self)
 
 results_length (self)
 
int number_of_results (self)
 
 add_unresponsive_engine (self, str engine_name, str error_type, bool suspended=False)
 
 add_timing (self, str engine_name, float engine_time, float page_load_time)
 
 get_timings (self)
 

Public Attributes

list infoboxes = []
 
set[str] suggestions = set()
 
 answers = AnswerSet()
 
 corrections = set()
 
dict[str, str|dict] engine_data = defaultdict(dict)
 
bool paging = False
 
Set[UnresponsiveEngineunresponsive_engines = set()
 
list timings = []
 
 redirect_url = None
 
bool on_result = lambda _: True
 

Protected Member Functions

 _merge_infobox (self, infobox)
 
 _is_valid_url_result (self, result, error_msgs)
 

Protected Attributes

list _merged_results = []
 
list _number_of_results = []
 
bool _closed = False
 
 _lock = RLock()
 

Private Member Functions

 __merge_url_result (self, result, position)
 
 __find_duplicated_http_result (self, result)
 
 __merge_duplicated_http_result (self, duplicated, result, position)
 
 __merge_result_no_url (self, result, position)
 

Static Private Attributes

tuple __slots__
 

Detailed Description

docstring for ResultContainer

Definition at line 168 of file results.py.

Constructor & Destructor Documentation

◆ __init__()

searx.results.ResultContainer.__init__ ( self)

Definition at line 188 of file results.py.

188 def __init__(self):
189 super().__init__()
190 self._merged_results: list[LegacyResult] = []
191 self.infoboxes: list[dict] = []
192 self.suggestions: set[str] = set()
193 self.answers = AnswerSet()
194 self.corrections = set()
195 self._number_of_results: list[int] = []
196 self.engine_data: dict[str, str | dict] = defaultdict(dict)
197 self._closed: bool = False
198 self.paging: bool = False
199 self.unresponsive_engines: Set[UnresponsiveEngine] = set()
200 self.timings: List[Timing] = []
201 self.redirect_url = None
202 self.on_result = lambda _: True
203 self._lock = RLock()
204

Member Function Documentation

◆ __find_duplicated_http_result()

searx.results.ResultContainer.__find_duplicated_http_result ( self,
result )
private

Definition at line 318 of file results.py.

318 def __find_duplicated_http_result(self, result):
319 result_template = result.get('template')
320 for merged_result in self._merged_results:
321 if not merged_result.get('parsed_url'):
322 continue
323
324 if compare_urls(result['parsed_url'], merged_result['parsed_url']) and result_template == merged_result.get(
325 'template'
326 ):
327 if result_template != 'images.html':
328 # not an image, same template, same url : it's a duplicate
329 return merged_result
330
331 # it's an image
332 # it's a duplicate if the parsed_url, template and img_src are different
333 if result.get('img_src', '') == merged_result.get('img_src', ''):
334 return merged_result
335 return None
336

◆ __merge_duplicated_http_result()

searx.results.ResultContainer.__merge_duplicated_http_result ( self,
duplicated,
result,
position )
private

Definition at line 337 of file results.py.

337 def __merge_duplicated_http_result(self, duplicated, result, position):
338 # use content with more text
339 if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
340 duplicated['content'] = result['content']
341
342 # use title with more text
343 if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')):
344 duplicated['title'] = result['title']
345
346 # merge all result's parameters not found in duplicate
347 for key in result.keys():
348 if not duplicated.get(key):
349 duplicated[key] = result.get(key)
350
351 # add the new position
352 duplicated['positions'].append(position)
353
354 # add engine to list of result-engines
355 duplicated['engines'].add(result['engine'])
356
357 # use https if possible
358 if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
359 duplicated['url'] = result['parsed_url'].geturl()
360 duplicated['parsed_url'] = result['parsed_url']
361

◆ __merge_result_no_url()

searx.results.ResultContainer.__merge_result_no_url ( self,
result,
position )
private

Definition at line 362 of file results.py.

362 def __merge_result_no_url(self, result, position):
363 result['engines'] = set([result['engine']])
364 result['positions'] = [position]
365 with self._lock:
366 self._merged_results.append(result)
367

◆ __merge_url_result()

searx.results.ResultContainer.__merge_url_result ( self,
result,
position )
private

Definition at line 306 of file results.py.

306 def __merge_url_result(self, result, position):
307 result['engines'] = set([result['engine']])
308 with self._lock:
309 duplicated = self.__find_duplicated_http_result(result)
310 if duplicated:
311 self.__merge_duplicated_http_result(duplicated, result, position)
312 return
313
314 # if there is no duplicate found, append result
315 result['positions'] = [position]
316 self._merged_results.append(result)
317

◆ _is_valid_url_result()

searx.results.ResultContainer._is_valid_url_result ( self,
result,
error_msgs )
protected

Definition at line 286 of file results.py.

286 def _is_valid_url_result(self, result, error_msgs):
287 if 'url' in result:
288 if not isinstance(result['url'], str):
289 logger.debug('result: invalid URL: %s', str(result))
290 error_msgs.add('invalid URL')
291 return False
292
293 if 'title' in result and not isinstance(result['title'], str):
294 logger.debug('result: invalid title: %s', str(result))
295 error_msgs.add('invalid title')
296 return False
297
298 if 'content' in result:
299 if not isinstance(result['content'], str):
300 logger.debug('result: invalid content: %s', str(result))
301 error_msgs.add('invalid content')
302 return False
303
304 return True
305

◆ _merge_infobox()

searx.results.ResultContainer._merge_infobox ( self,
infobox )
protected

Definition at line 271 of file results.py.

271 def _merge_infobox(self, infobox):
272 add_infobox = True
273 infobox_id = infobox.get('id', None)
274 infobox['engines'] = set([infobox['engine']])
275 if infobox_id is not None:
276 parsed_url_infobox_id = urlparse(infobox_id)
277 with self._lock:
278 for existingIndex in self.infoboxes:
279 if compare_urls(urlparse(existingIndex.get('id', '')), parsed_url_infobox_id):
280 merge_two_infoboxes(existingIndex, infobox)
281 add_infobox = False
282
283 if add_infobox:
284 self.infoboxes.append(infobox)
285

◆ add_timing()

searx.results.ResultContainer.add_timing ( self,
str engine_name,
float engine_time,
float page_load_time )

Definition at line 471 of file results.py.

471 def add_timing(self, engine_name: str, engine_time: float, page_load_time: float):
472 with self._lock:
473 if self._closed:
474 logger.error("call to ResultContainer.add_timing after ResultContainer.close")
475 return
476 self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
477

◆ add_unresponsive_engine()

searx.results.ResultContainer.add_unresponsive_engine ( self,
str engine_name,
str error_type,
bool suspended = False )

Definition at line 463 of file results.py.

463 def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
464 with self._lock:
465 if self._closed:
466 logger.error("call to ResultContainer.add_unresponsive_engine after ResultContainer.close")
467 return
468 if engines[engine_name].display_error_messages:
469 self.unresponsive_engines.add(UnresponsiveEngine(engine_name, error_type, suspended))
470

◆ close()

searx.results.ResultContainer.close ( self)

Definition at line 368 of file results.py.

368 def close(self):
369 self._closed = True
370
371 for result in self._merged_results:
372 result['score'] = result_score(result, result.get('priority'))
373 # removing html content and whitespace duplications
374 if result.get('content'):
375 result['content'] = result['content'].strip()
376 if result.get('title'):
377 result['title'] = ' '.join(result['title'].strip().split())
378
379 for result_engine in result['engines']:
380 counter_add(result['score'], 'engine', result_engine, 'score')
381
382 results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)
383
384 # pass 2 : group results by category and template
385 gresults = []
386 categoryPositions = {}
387
388 for res in results:
389 if not res.get('url'):
390 continue
391
392 # do we need to handle more than one category per engine?
393 engine = engines[res['engine']]
394 res['category'] = engine.categories[0] if len(engine.categories) > 0 else ''
395
396 # do we need to handle more than one category per engine?
397 category = (
398 res['category']
399 + ':'
400 + res.get('template', '')
401 + ':'
402 + ('img_src' if 'img_src' in res or 'thumbnail' in res else '')
403 )
404
405 current = None if category not in categoryPositions else categoryPositions[category]
406
407 # group with previous results using the same category
408 # if the group can accept more result and is not too far
409 # from the current position
410 if current is not None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
411 # group with the previous results using
412 # the same category with this one
413 index = current['index']
414 gresults.insert(index, res)
415
416 # update every index after the current one
417 # (including the current one)
418 for k in categoryPositions: # pylint: disable=consider-using-dict-items
419 v = categoryPositions[k]['index']
420 if v >= index:
421 categoryPositions[k]['index'] = v + 1
422
423 # update this category
424 current['count'] -= 1
425
426 else:
427 # same category
428 gresults.append(res)
429
430 # update categoryIndex
431 categoryPositions[category] = {'index': len(gresults), 'count': 8}
432
433 # update _merged_results
434 self._merged_results = gresults
435

◆ extend()

searx.results.ResultContainer.extend ( self,
str | None engine_name,
results )

Definition at line 205 of file results.py.

205 def extend(self, engine_name: str | None, results): # pylint: disable=too-many-branches
206 if self._closed:
207 return
208
209 standard_result_count = 0
210 error_msgs = set()
211
212 for result in list(results):
213
214 if isinstance(result, Result):
215 result.engine = result.engine or engine_name
216 result.normalize_result_fields()
217
218 if isinstance(result, BaseAnswer) and self.on_result(result):
219 self.answers.add(result)
220 else:
221 # more types need to be implemented in the future ..
222 raise NotImplementedError(f"no handler implemented to process the result of type {result}")
223
224 else:
225 result['engine'] = result.get('engine') or engine_name or ""
226 result = LegacyResult(result) # for backward compatibility, will be romeved one day
227
228 if 'suggestion' in result and self.on_result(result):
229 self.suggestions.add(result['suggestion'])
230 elif 'answer' in result and self.on_result(result):
231 warnings.warn(
232 f"answer results from engine {result.engine}"
233 " are without typification / migrate to Answer class.",
234 DeprecationWarning,
235 )
236 self.answers.add(result)
237 elif 'correction' in result and self.on_result(result):
238 self.corrections.add(result['correction'])
239 elif 'infobox' in result and self.on_result(result):
240 self._merge_infobox(result)
241 elif 'number_of_results' in result and self.on_result(result):
242 self._number_of_results.append(result['number_of_results'])
243 elif 'engine_data' in result and self.on_result(result):
244 self.engine_data[result.engine][result['key']] = result['engine_data']
245 elif result.url:
246 # standard result (url, title, content)
247 if not self._is_valid_url_result(result, error_msgs):
248 continue
249 # normalize the result
250 result.normalize_result_fields()
251 # call on_result call searx.search.SearchWithPlugins._on_result
252 # which calls the plugins
253 if not self.on_result(result):
254 continue
255 self.__merge_url_result(result, standard_result_count + 1)
256 standard_result_count += 1
257 elif self.on_result(result):
258 self.__merge_result_no_url(result, standard_result_count + 1)
259 standard_result_count += 1
260
261 if len(error_msgs) > 0:
262 for msg in error_msgs:
263 count_error(engine_name, 'some results are invalids: ' + msg, secondary=True)
264
265 if engine_name in engines:
266 histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count')
267
268 if not self.paging and engine_name in engines and engines[engine_name].paging:
269 self.paging = True
270

◆ get_ordered_results()

searx.results.ResultContainer.get_ordered_results ( self)

Definition at line 436 of file results.py.

436 def get_ordered_results(self):
437 if not self._closed:
438 self.close()
439 return self._merged_results
440

◆ get_timings()

searx.results.ResultContainer.get_timings ( self)

Definition at line 478 of file results.py.

478 def get_timings(self):
479 with self._lock:
480 if not self._closed:
481 logger.error("call to ResultContainer.get_timings before ResultContainer.close")
482 return []
483 return self.timings

◆ number_of_results()

int searx.results.ResultContainer.number_of_results ( self)
Returns the average of results number, returns zero if the average
result number is smaller than the actual result count.

Definition at line 445 of file results.py.

445 def number_of_results(self) -> int:
446 """Returns the average of results number, returns zero if the average
447 result number is smaller than the actual result count."""
448
449 with self._lock:
450 if not self._closed:
451 logger.error("call to ResultContainer.number_of_results before ResultContainer.close")
452 return 0
453
454 resultnum_sum = sum(self._number_of_results)
455 if not resultnum_sum or not self._number_of_results:
456 return 0
457
458 average = int(resultnum_sum / len(self._number_of_results))
459 if average < self.results_length():
460 average = 0
461 return average
462

◆ results_length()

searx.results.ResultContainer.results_length ( self)

Definition at line 441 of file results.py.

441 def results_length(self):
442 return len(self._merged_results)
443

Member Data Documentation

◆ __slots__

tuple searx.results.ResultContainer.__slots__
staticprivate
Initial value:
= (
'_merged_results',
'infoboxes',
'suggestions',
'answers',
'corrections',
'_number_of_results',
'_closed',
'paging',
'unresponsive_engines',
'timings',
'redirect_url',
'engine_data',
'on_result',
'_lock',
)

Definition at line 171 of file results.py.

◆ _closed

bool searx.results.ResultContainer._closed = False
protected

Definition at line 197 of file results.py.

◆ _lock

searx.results.ResultContainer._lock = RLock()
protected

Definition at line 203 of file results.py.

◆ _merged_results

searx.results.ResultContainer._merged_results = []
protected

Definition at line 190 of file results.py.

◆ _number_of_results

list searx.results.ResultContainer._number_of_results = []
protected

Definition at line 195 of file results.py.

◆ answers

searx.results.ResultContainer.answers = AnswerSet()

Definition at line 193 of file results.py.

◆ corrections

searx.results.ResultContainer.corrections = set()

Definition at line 194 of file results.py.

◆ engine_data

dict[str, str | dict] searx.results.ResultContainer.engine_data = defaultdict(dict)

Definition at line 196 of file results.py.

Referenced by searx.search.models.SearchQuery.__copy__().

◆ infoboxes

list searx.results.ResultContainer.infoboxes = []

Definition at line 191 of file results.py.

◆ on_result

bool searx.results.ResultContainer.on_result = lambda _: True

Definition at line 202 of file results.py.

◆ paging

bool searx.results.ResultContainer.paging = False

Definition at line 198 of file results.py.

◆ redirect_url

searx.results.ResultContainer.redirect_url = None

Definition at line 201 of file results.py.

◆ suggestions

set[str] searx.results.ResultContainer.suggestions = set()

Definition at line 192 of file results.py.

◆ timings

list searx.results.ResultContainer.timings = []

Definition at line 200 of file results.py.

◆ unresponsive_engines

Set[UnresponsiveEngine] searx.results.ResultContainer.unresponsive_engines = set()

Definition at line 199 of file results.py.


The documentation for this class was generated from the following file: