.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.results.ResultContainer Class Reference
+ Collaboration diagram for searx.results.ResultContainer:

Public Member Functions

 __init__ (self)
 
 extend (self, engine_name, results)
 
 close (self)
 
 get_ordered_results (self)
 
 results_length (self)
 
int number_of_results (self)
 
 add_unresponsive_engine (self, str engine_name, str error_type, bool suspended=False)
 
 add_timing (self, str engine_name, float engine_time, float page_load_time)
 
 get_timings (self)
 

Public Attributes

list infoboxes = []
 
 suggestions = set()
 
dict answers = {}
 
 corrections = set()
 
 engine_data = defaultdict(dict)
 
bool paging = False
 
Set[UnresponsiveEngineunresponsive_engines = set()
 
list timings = []
 
 redirect_url = None
 
bool on_result = lambda _: True
 
int _closed = 0
 
bool infoboxes = False
 

Protected Member Functions

 _merge_infobox (self, infobox)
 
 _is_valid_url_result (self, result, error_msgs)
 
 _normalize_url_result (self, result)
 

Protected Attributes

list _merged_results = []
 
list _number_of_results = []
 
bool _closed = False
 
 _lock = RLock()
 

Private Member Functions

 __merge_url_result (self, result, position)
 
 __find_duplicated_http_result (self, result)
 
 __merge_duplicated_http_result (self, duplicated, result, position)
 
 __merge_result_no_url (self, result, position)
 

Static Private Attributes

tuple __slots__
 

Detailed Description

docstring for ResultContainer

Definition at line 165 of file results.py.

Constructor & Destructor Documentation

◆ __init__()

searx.results.ResultContainer.__init__ ( self)

Definition at line 185 of file results.py.

185 def __init__(self):
186 super().__init__()
187 self._merged_results = []
188 self.infoboxes = []
189 self.suggestions = set()
190 self.answers = {}
191 self.corrections = set()
192 self._number_of_results = []
193 self.engine_data = defaultdict(dict)
194 self._closed = False
195 self.paging = False
196 self.unresponsive_engines: Set[UnresponsiveEngine] = set()
197 self.timings: List[Timing] = []
198 self.redirect_url = None
199 self.on_result = lambda _: True
200 self._lock = RLock()
201

Member Function Documentation

◆ __find_duplicated_http_result()

searx.results.ResultContainer.__find_duplicated_http_result ( self,
result )
private

Definition at line 316 of file results.py.

316 def __find_duplicated_http_result(self, result):
317 result_template = result.get('template')
318 for merged_result in self._merged_results:
319 if 'parsed_url' not in merged_result:
320 continue
321 if compare_urls(result['parsed_url'], merged_result['parsed_url']) and result_template == merged_result.get(
322 'template'
323 ):
324 if result_template != 'images.html':
325 # not an image, same template, same url : it's a duplicate
326 return merged_result
327
328 # it's an image
329 # it's a duplicate if the parsed_url, template and img_src are different
330 if result.get('img_src', '') == merged_result.get('img_src', ''):
331 return merged_result
332 return None
333

◆ __merge_duplicated_http_result()

searx.results.ResultContainer.__merge_duplicated_http_result ( self,
duplicated,
result,
position )
private

Definition at line 334 of file results.py.

334 def __merge_duplicated_http_result(self, duplicated, result, position):
335 # using content with more text
336 if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
337 duplicated['content'] = result['content']
338
339 # merge all result's parameters not found in duplicate
340 for key in result.keys():
341 if not duplicated.get(key):
342 duplicated[key] = result.get(key)
343
344 # add the new position
345 duplicated['positions'].append(position)
346
347 # add engine to list of result-engines
348 duplicated['engines'].add(result['engine'])
349
350 # using https if possible
351 if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
352 duplicated['url'] = result['parsed_url'].geturl()
353 duplicated['parsed_url'] = result['parsed_url']
354

◆ __merge_result_no_url()

searx.results.ResultContainer.__merge_result_no_url ( self,
result,
position )
private

Definition at line 355 of file results.py.

355 def __merge_result_no_url(self, result, position):
356 result['engines'] = set([result['engine']])
357 result['positions'] = [position]
358 with self._lock:
359 self._merged_results.append(result)
360

◆ __merge_url_result()

searx.results.ResultContainer.__merge_url_result ( self,
result,
position )
private

Definition at line 304 of file results.py.

304 def __merge_url_result(self, result, position):
305 result['engines'] = set([result['engine']])
306 with self._lock:
307 duplicated = self.__find_duplicated_http_result(result)
308 if duplicated:
309 self.__merge_duplicated_http_result(duplicated, result, position)
310 return
311
312 # if there is no duplicate found, append result
313 result['positions'] = [position]
314 self._merged_results.append(result)
315

◆ _is_valid_url_result()

searx.results.ResultContainer._is_valid_url_result ( self,
result,
error_msgs )
protected

Definition at line 263 of file results.py.

263 def _is_valid_url_result(self, result, error_msgs):
264 if 'url' in result:
265 if not isinstance(result['url'], str):
266 logger.debug('result: invalid URL: %s', str(result))
267 error_msgs.add('invalid URL')
268 return False
269
270 if 'title' in result and not isinstance(result['title'], str):
271 logger.debug('result: invalid title: %s', str(result))
272 error_msgs.add('invalid title')
273 return False
274
275 if 'content' in result:
276 if not isinstance(result['content'], str):
277 logger.debug('result: invalid content: %s', str(result))
278 error_msgs.add('invalid content')
279 return False
280
281 return True
282

◆ _merge_infobox()

searx.results.ResultContainer._merge_infobox ( self,
infobox )
protected

Definition at line 248 of file results.py.

248 def _merge_infobox(self, infobox):
249 add_infobox = True
250 infobox_id = infobox.get('id', None)
251 infobox['engines'] = set([infobox['engine']])
252 if infobox_id is not None:
253 parsed_url_infobox_id = urlparse(infobox_id)
254 with self._lock:
255 for existingIndex in self.infoboxes:
256 if compare_urls(urlparse(existingIndex.get('id', '')), parsed_url_infobox_id):
257 merge_two_infoboxes(existingIndex, infobox)
258 add_infobox = False
259
260 if add_infobox:
261 self.infoboxes.append(infobox)
262

◆ _normalize_url_result()

searx.results.ResultContainer._normalize_url_result ( self,
result )
protected
Return True if the result is valid

Definition at line 283 of file results.py.

283 def _normalize_url_result(self, result):
284 """Return True if the result is valid"""
285 result['parsed_url'] = urlparse(result['url'])
286
287 # if the result has no scheme, use http as default
288 if not result['parsed_url'].scheme:
289 result['parsed_url'] = result['parsed_url']._replace(scheme="http")
290 result['url'] = result['parsed_url'].geturl()
291
292 # avoid duplicate content between the content and title fields
293 if result.get('content') == result.get('title'):
294 del result['content']
295
296 # make sure there is a template
297 if 'template' not in result:
298 result['template'] = 'default.html'
299
300 # strip multiple spaces and carriage returns from content
301 if result.get('content'):
302 result['content'] = WHITESPACE_REGEX.sub(' ', result['content'])
303

◆ add_timing()

searx.results.ResultContainer.add_timing ( self,
str engine_name,
float engine_time,
float page_load_time )

Definition at line 461 of file results.py.

461 def add_timing(self, engine_name: str, engine_time: float, page_load_time: float):
462 with self._lock:
463 if self._closed:
464 logger.error("call to ResultContainer.add_timing after ResultContainer.close")
465 return
466 self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
467

◆ add_unresponsive_engine()

searx.results.ResultContainer.add_unresponsive_engine ( self,
str engine_name,
str error_type,
bool suspended = False )

Definition at line 453 of file results.py.

453 def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
454 with self._lock:
455 if self._closed:
456 logger.error("call to ResultContainer.add_unresponsive_engine after ResultContainer.close")
457 return
458 if engines[engine_name].display_error_messages:
459 self.unresponsive_engines.add(UnresponsiveEngine(engine_name, error_type, suspended))
460

◆ close()

searx.results.ResultContainer.close ( self)

Definition at line 361 of file results.py.

361 def close(self):
362 self._closed = True
363
364 for result in self._merged_results:
365 result['score'] = result_score(result, result.get('priority'))
366 # removing html content and whitespace duplications
367 if result.get('content'):
368 result['content'] = result['content'].strip()
369 if result.get('title'):
370 result['title'] = ' '.join(result['title'].strip().split())
371
372 for result_engine in result['engines']:
373 counter_add(result['score'], 'engine', result_engine, 'score')
374
375 results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)
376
377 # pass 2 : group results by category and template
378 gresults = []
379 categoryPositions = {}
380
381 for res in results:
382 # do we need to handle more than one category per engine?
383 engine = engines[res['engine']]
384 res['category'] = engine.categories[0] if len(engine.categories) > 0 else ''
385
386 # do we need to handle more than one category per engine?
387 category = (
388 res['category']
389 + ':'
390 + res.get('template', '')
391 + ':'
392 + ('img_src' if 'img_src' in res or 'thumbnail' in res else '')
393 )
394
395 current = None if category not in categoryPositions else categoryPositions[category]
396
397 # group with previous results using the same category
398 # if the group can accept more result and is not too far
399 # from the current position
400 if current is not None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
401 # group with the previous results using
402 # the same category with this one
403 index = current['index']
404 gresults.insert(index, res)
405
406 # update every index after the current one
407 # (including the current one)
408 for k in categoryPositions: # pylint: disable=consider-using-dict-items
409 v = categoryPositions[k]['index']
410 if v >= index:
411 categoryPositions[k]['index'] = v + 1
412
413 # update this category
414 current['count'] -= 1
415
416 else:
417 # same category
418 gresults.append(res)
419
420 # update categoryIndex
421 categoryPositions[category] = {'index': len(gresults), 'count': 8}
422
423 # update _merged_results
424 self._merged_results = gresults
425

◆ extend()

searx.results.ResultContainer.extend ( self,
engine_name,
results )

Definition at line 202 of file results.py.

202 def extend(self, engine_name, results): # pylint: disable=too-many-branches
203 if self._closed:
204 return
205
206 standard_result_count = 0
207 error_msgs = set()
208 for result in list(results):
209 result['engine'] = engine_name
210 if 'suggestion' in result and self.on_result(result):
211 self.suggestions.add(result['suggestion'])
212 elif 'answer' in result and self.on_result(result):
213 self.answers[result['answer']] = result
214 elif 'correction' in result and self.on_result(result):
215 self.corrections.add(result['correction'])
216 elif 'infobox' in result and self.on_result(result):
217 self._merge_infobox(result)
218 elif 'number_of_results' in result and self.on_result(result):
219 self._number_of_results.append(result['number_of_results'])
220 elif 'engine_data' in result and self.on_result(result):
221 self.engine_data[engine_name][result['key']] = result['engine_data']
222 elif 'url' in result:
223 # standard result (url, title, content)
224 if not self._is_valid_url_result(result, error_msgs):
225 continue
226 # normalize the result
227 self._normalize_url_result(result)
228 # call on_result call searx.search.SearchWithPlugins._on_result
229 # which calls the plugins
230 if not self.on_result(result):
231 continue
232 self.__merge_url_result(result, standard_result_count + 1)
233 standard_result_count += 1
234 elif self.on_result(result):
235 self.__merge_result_no_url(result, standard_result_count + 1)
236 standard_result_count += 1
237
238 if len(error_msgs) > 0:
239 for msg in error_msgs:
240 count_error(engine_name, 'some results are invalids: ' + msg, secondary=True)
241
242 if engine_name in engines:
243 histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count')
244
245 if not self.paging and engine_name in engines and engines[engine_name].paging:
246 self.paging = True
247

◆ get_ordered_results()

searx.results.ResultContainer.get_ordered_results ( self)

Definition at line 426 of file results.py.

426 def get_ordered_results(self):
427 if not self._closed:
428 self.close()
429 return self._merged_results
430

◆ get_timings()

searx.results.ResultContainer.get_timings ( self)

Definition at line 468 of file results.py.

468 def get_timings(self):
469 with self._lock:
470 if not self._closed:
471 logger.error("call to ResultContainer.get_timings before ResultContainer.close")
472 return []
473 return self.timings

◆ number_of_results()

int searx.results.ResultContainer.number_of_results ( self)
Returns the average of results number, returns zero if the average
result number is smaller than the actual result count.

Definition at line 435 of file results.py.

435 def number_of_results(self) -> int:
436 """Returns the average of results number, returns zero if the average
437 result number is smaller than the actual result count."""
438
439 with self._lock:
440 if not self._closed:
441 logger.error("call to ResultContainer.number_of_results before ResultContainer.close")
442 return 0
443
444 resultnum_sum = sum(self._number_of_results)
445 if not resultnum_sum or not self._number_of_results:
446 return 0
447
448 average = int(resultnum_sum / len(self._number_of_results))
449 if average < self.results_length():
450 average = 0
451 return average
452

◆ results_length()

searx.results.ResultContainer.results_length ( self)

Definition at line 431 of file results.py.

431 def results_length(self):
432 return len(self._merged_results)
433

Member Data Documentation

◆ __slots__

tuple searx.results.ResultContainer.__slots__
staticprivate
Initial value:
= (
'_merged_results',
'infoboxes',
'suggestions',
'answers',
'corrections',
'_number_of_results',
'_closed',
'paging',
'unresponsive_engines',
'timings',
'redirect_url',
'engine_data',
'on_result',
'_lock',
)

Definition at line 168 of file results.py.

◆ _closed [1/2]

searx.results.ResultContainer._closed = False
protected

Definition at line 194 of file results.py.

◆ _closed [2/2]

int searx.results.ResultContainer._closed = 0

Definition at line 203 of file results.py.

◆ _lock

◆ _merged_results

searx.results.ResultContainer._merged_results = []
protected

Definition at line 187 of file results.py.

◆ _number_of_results

searx.results.ResultContainer._number_of_results = []
protected

Definition at line 192 of file results.py.

◆ answers

dict searx.results.ResultContainer.answers = {}

Definition at line 190 of file results.py.

◆ corrections

searx.results.ResultContainer.corrections = set()

Definition at line 191 of file results.py.

◆ engine_data

searx.results.ResultContainer.engine_data = defaultdict(dict)

Definition at line 193 of file results.py.

Referenced by searx.search.models.SearchQuery.__copy__().

◆ infoboxes [1/2]

list searx.results.ResultContainer.infoboxes = []

Definition at line 188 of file results.py.

◆ infoboxes [2/2]

bool searx.results.ResultContainer.infoboxes = False

Definition at line 255 of file results.py.

◆ on_result

bool searx.results.ResultContainer.on_result = lambda _: True

Definition at line 199 of file results.py.

◆ paging

bool searx.results.ResultContainer.paging = False

Definition at line 195 of file results.py.

◆ redirect_url

searx.results.ResultContainer.redirect_url = None

Definition at line 198 of file results.py.

◆ suggestions

searx.results.ResultContainer.suggestions = set()

Definition at line 189 of file results.py.

◆ timings

list searx.results.ResultContainer.timings = []

Definition at line 197 of file results.py.

◆ unresponsive_engines

Set[UnresponsiveEngine] searx.results.ResultContainer.unresponsive_engines = set()

Definition at line 196 of file results.py.


The documentation for this class was generated from the following file: