.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.results.ResultContainer Class Reference
+ Collaboration diagram for searx.results.ResultContainer:

Public Member Functions

 __init__ (self)
 
 extend (self, engine_name, results)
 
 close (self)
 
 get_ordered_results (self)
 
 results_length (self)
 
int number_of_results (self)
 
 add_unresponsive_engine (self, str engine_name, str error_type, bool suspended=False)
 
 add_timing (self, str engine_name, float engine_time, float page_load_time)
 
 get_timings (self)
 

Public Attributes

list infoboxes = []
 
 suggestions = set()
 
dict answers = {}
 
 corrections = set()
 
 engine_data = defaultdict(dict)
 
bool paging = False
 
Set[UnresponsiveEngineunresponsive_engines = set()
 
list timings = []
 
 redirect_url = None
 
bool on_result = lambda _: True
 
int _closed = 0
 
bool infoboxes = False
 

Protected Member Functions

 _merge_infobox (self, infobox)
 
 _is_valid_url_result (self, result, error_msgs)
 
 _normalize_url_result (self, result)
 

Protected Attributes

list _merged_results = []
 
list _number_of_results = []
 
bool _closed = False
 
 _lock = RLock()
 

Private Member Functions

 __merge_url_result (self, result, position)
 
 __find_duplicated_http_result (self, result)
 
 __merge_duplicated_http_result (self, duplicated, result, position)
 
 __merge_result_no_url (self, result, position)
 

Static Private Attributes

tuple __slots__
 

Detailed Description

docstring for ResultContainer

Definition at line 164 of file results.py.

Constructor & Destructor Documentation

◆ __init__()

searx.results.ResultContainer.__init__ ( self)

Definition at line 184 of file results.py.

184 def __init__(self):
185 super().__init__()
186 self._merged_results = []
187 self.infoboxes = []
188 self.suggestions = set()
189 self.answers = {}
190 self.corrections = set()
191 self._number_of_results = []
192 self.engine_data = defaultdict(dict)
193 self._closed = False
194 self.paging = False
195 self.unresponsive_engines: Set[UnresponsiveEngine] = set()
196 self.timings: List[Timing] = []
197 self.redirect_url = None
198 self.on_result = lambda _: True
199 self._lock = RLock()
200

Member Function Documentation

◆ __find_duplicated_http_result()

searx.results.ResultContainer.__find_duplicated_http_result ( self,
result )
private

Definition at line 315 of file results.py.

315 def __find_duplicated_http_result(self, result):
316 result_template = result.get('template')
317 for merged_result in self._merged_results:
318 if 'parsed_url' not in merged_result:
319 continue
320 if compare_urls(result['parsed_url'], merged_result['parsed_url']) and result_template == merged_result.get(
321 'template'
322 ):
323 if result_template != 'images.html':
324 # not an image, same template, same url : it's a duplicate
325 return merged_result
326
327 # it's an image
328 # it's a duplicate if the parsed_url, template and img_src are different
329 if result.get('img_src', '') == merged_result.get('img_src', ''):
330 return merged_result
331 return None
332

◆ __merge_duplicated_http_result()

searx.results.ResultContainer.__merge_duplicated_http_result ( self,
duplicated,
result,
position )
private

Definition at line 333 of file results.py.

333 def __merge_duplicated_http_result(self, duplicated, result, position):
334 # use content with more text
335 if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
336 duplicated['content'] = result['content']
337
338 # use title with more text
339 if result_content_len(result.get('title', '')) > len(duplicated.get('title', '')):
340 duplicated['title'] = result['title']
341
342 # merge all result's parameters not found in duplicate
343 for key in result.keys():
344 if not duplicated.get(key):
345 duplicated[key] = result.get(key)
346
347 # add the new position
348 duplicated['positions'].append(position)
349
350 # add engine to list of result-engines
351 duplicated['engines'].add(result['engine'])
352
353 # use https if possible
354 if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
355 duplicated['url'] = result['parsed_url'].geturl()
356 duplicated['parsed_url'] = result['parsed_url']
357

◆ __merge_result_no_url()

searx.results.ResultContainer.__merge_result_no_url ( self,
result,
position )
private

Definition at line 358 of file results.py.

358 def __merge_result_no_url(self, result, position):
359 result['engines'] = set([result['engine']])
360 result['positions'] = [position]
361 with self._lock:
362 self._merged_results.append(result)
363

◆ __merge_url_result()

searx.results.ResultContainer.__merge_url_result ( self,
result,
position )
private

Definition at line 303 of file results.py.

303 def __merge_url_result(self, result, position):
304 result['engines'] = set([result['engine']])
305 with self._lock:
306 duplicated = self.__find_duplicated_http_result(result)
307 if duplicated:
308 self.__merge_duplicated_http_result(duplicated, result, position)
309 return
310
311 # if there is no duplicate found, append result
312 result['positions'] = [position]
313 self._merged_results.append(result)
314

◆ _is_valid_url_result()

searx.results.ResultContainer._is_valid_url_result ( self,
result,
error_msgs )
protected

Definition at line 262 of file results.py.

262 def _is_valid_url_result(self, result, error_msgs):
263 if 'url' in result:
264 if not isinstance(result['url'], str):
265 logger.debug('result: invalid URL: %s', str(result))
266 error_msgs.add('invalid URL')
267 return False
268
269 if 'title' in result and not isinstance(result['title'], str):
270 logger.debug('result: invalid title: %s', str(result))
271 error_msgs.add('invalid title')
272 return False
273
274 if 'content' in result:
275 if not isinstance(result['content'], str):
276 logger.debug('result: invalid content: %s', str(result))
277 error_msgs.add('invalid content')
278 return False
279
280 return True
281

◆ _merge_infobox()

searx.results.ResultContainer._merge_infobox ( self,
infobox )
protected

Definition at line 247 of file results.py.

247 def _merge_infobox(self, infobox):
248 add_infobox = True
249 infobox_id = infobox.get('id', None)
250 infobox['engines'] = set([infobox['engine']])
251 if infobox_id is not None:
252 parsed_url_infobox_id = urlparse(infobox_id)
253 with self._lock:
254 for existingIndex in self.infoboxes:
255 if compare_urls(urlparse(existingIndex.get('id', '')), parsed_url_infobox_id):
256 merge_two_infoboxes(existingIndex, infobox)
257 add_infobox = False
258
259 if add_infobox:
260 self.infoboxes.append(infobox)
261

◆ _normalize_url_result()

searx.results.ResultContainer._normalize_url_result ( self,
result )
protected
Return True if the result is valid

Definition at line 282 of file results.py.

282 def _normalize_url_result(self, result):
283 """Return True if the result is valid"""
284 result['parsed_url'] = urlparse(result['url'])
285
286 # if the result has no scheme, use http as default
287 if not result['parsed_url'].scheme:
288 result['parsed_url'] = result['parsed_url']._replace(scheme="http")
289 result['url'] = result['parsed_url'].geturl()
290
291 # avoid duplicate content between the content and title fields
292 if result.get('content') == result.get('title'):
293 del result['content']
294
295 # make sure there is a template
296 if 'template' not in result:
297 result['template'] = 'default.html'
298
299 # strip multiple spaces and carriage returns from content
300 if result.get('content'):
301 result['content'] = WHITESPACE_REGEX.sub(' ', result['content'])
302

◆ add_timing()

searx.results.ResultContainer.add_timing ( self,
str engine_name,
float engine_time,
float page_load_time )

Definition at line 464 of file results.py.

464 def add_timing(self, engine_name: str, engine_time: float, page_load_time: float):
465 with self._lock:
466 if self._closed:
467 logger.error("call to ResultContainer.add_timing after ResultContainer.close")
468 return
469 self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
470

◆ add_unresponsive_engine()

searx.results.ResultContainer.add_unresponsive_engine ( self,
str engine_name,
str error_type,
bool suspended = False )

Definition at line 456 of file results.py.

456 def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
457 with self._lock:
458 if self._closed:
459 logger.error("call to ResultContainer.add_unresponsive_engine after ResultContainer.close")
460 return
461 if engines[engine_name].display_error_messages:
462 self.unresponsive_engines.add(UnresponsiveEngine(engine_name, error_type, suspended))
463

◆ close()

searx.results.ResultContainer.close ( self)

Definition at line 364 of file results.py.

364 def close(self):
365 self._closed = True
366
367 for result in self._merged_results:
368 result['score'] = result_score(result, result.get('priority'))
369 # removing html content and whitespace duplications
370 if result.get('content'):
371 result['content'] = result['content'].strip()
372 if result.get('title'):
373 result['title'] = ' '.join(result['title'].strip().split())
374
375 for result_engine in result['engines']:
376 counter_add(result['score'], 'engine', result_engine, 'score')
377
378 results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)
379
380 # pass 2 : group results by category and template
381 gresults = []
382 categoryPositions = {}
383
384 for res in results:
385 # do we need to handle more than one category per engine?
386 engine = engines[res['engine']]
387 res['category'] = engine.categories[0] if len(engine.categories) > 0 else ''
388
389 # do we need to handle more than one category per engine?
390 category = (
391 res['category']
392 + ':'
393 + res.get('template', '')
394 + ':'
395 + ('img_src' if 'img_src' in res or 'thumbnail' in res else '')
396 )
397
398 current = None if category not in categoryPositions else categoryPositions[category]
399
400 # group with previous results using the same category
401 # if the group can accept more result and is not too far
402 # from the current position
403 if current is not None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
404 # group with the previous results using
405 # the same category with this one
406 index = current['index']
407 gresults.insert(index, res)
408
409 # update every index after the current one
410 # (including the current one)
411 for k in categoryPositions: # pylint: disable=consider-using-dict-items
412 v = categoryPositions[k]['index']
413 if v >= index:
414 categoryPositions[k]['index'] = v + 1
415
416 # update this category
417 current['count'] -= 1
418
419 else:
420 # same category
421 gresults.append(res)
422
423 # update categoryIndex
424 categoryPositions[category] = {'index': len(gresults), 'count': 8}
425
426 # update _merged_results
427 self._merged_results = gresults
428

◆ extend()

searx.results.ResultContainer.extend ( self,
engine_name,
results )

Definition at line 201 of file results.py.

201 def extend(self, engine_name, results): # pylint: disable=too-many-branches
202 if self._closed:
203 return
204
205 standard_result_count = 0
206 error_msgs = set()
207 for result in list(results):
208 result['engine'] = engine_name
209 if 'suggestion' in result and self.on_result(result):
210 self.suggestions.add(result['suggestion'])
211 elif 'answer' in result and self.on_result(result):
212 self.answers[result['answer']] = result
213 elif 'correction' in result and self.on_result(result):
214 self.corrections.add(result['correction'])
215 elif 'infobox' in result and self.on_result(result):
216 self._merge_infobox(result)
217 elif 'number_of_results' in result and self.on_result(result):
218 self._number_of_results.append(result['number_of_results'])
219 elif 'engine_data' in result and self.on_result(result):
220 self.engine_data[engine_name][result['key']] = result['engine_data']
221 elif 'url' in result:
222 # standard result (url, title, content)
223 if not self._is_valid_url_result(result, error_msgs):
224 continue
225 # normalize the result
226 self._normalize_url_result(result)
227 # call on_result call searx.search.SearchWithPlugins._on_result
228 # which calls the plugins
229 if not self.on_result(result):
230 continue
231 self.__merge_url_result(result, standard_result_count + 1)
232 standard_result_count += 1
233 elif self.on_result(result):
234 self.__merge_result_no_url(result, standard_result_count + 1)
235 standard_result_count += 1
236
237 if len(error_msgs) > 0:
238 for msg in error_msgs:
239 count_error(engine_name, 'some results are invalids: ' + msg, secondary=True)
240
241 if engine_name in engines:
242 histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count')
243
244 if not self.paging and engine_name in engines and engines[engine_name].paging:
245 self.paging = True
246

◆ get_ordered_results()

searx.results.ResultContainer.get_ordered_results ( self)

Definition at line 429 of file results.py.

429 def get_ordered_results(self):
430 if not self._closed:
431 self.close()
432 return self._merged_results
433

◆ get_timings()

searx.results.ResultContainer.get_timings ( self)

Definition at line 471 of file results.py.

471 def get_timings(self):
472 with self._lock:
473 if not self._closed:
474 logger.error("call to ResultContainer.get_timings before ResultContainer.close")
475 return []
476 return self.timings

◆ number_of_results()

int searx.results.ResultContainer.number_of_results ( self)
Returns the average of results number, returns zero if the average
result number is smaller than the actual result count.

Definition at line 438 of file results.py.

438 def number_of_results(self) -> int:
439 """Returns the average of results number, returns zero if the average
440 result number is smaller than the actual result count."""
441
442 with self._lock:
443 if not self._closed:
444 logger.error("call to ResultContainer.number_of_results before ResultContainer.close")
445 return 0
446
447 resultnum_sum = sum(self._number_of_results)
448 if not resultnum_sum or not self._number_of_results:
449 return 0
450
451 average = int(resultnum_sum / len(self._number_of_results))
452 if average < self.results_length():
453 average = 0
454 return average
455

◆ results_length()

searx.results.ResultContainer.results_length ( self)

Definition at line 434 of file results.py.

434 def results_length(self):
435 return len(self._merged_results)
436

Member Data Documentation

◆ __slots__

tuple searx.results.ResultContainer.__slots__
staticprivate
Initial value:
= (
'_merged_results',
'infoboxes',
'suggestions',
'answers',
'corrections',
'_number_of_results',
'_closed',
'paging',
'unresponsive_engines',
'timings',
'redirect_url',
'engine_data',
'on_result',
'_lock',
)

Definition at line 167 of file results.py.

◆ _closed [1/2]

searx.results.ResultContainer._closed = False
protected

Definition at line 193 of file results.py.

◆ _closed [2/2]

int searx.results.ResultContainer._closed = 0

Definition at line 202 of file results.py.

◆ _lock

◆ _merged_results

searx.results.ResultContainer._merged_results = []
protected

Definition at line 186 of file results.py.

◆ _number_of_results

searx.results.ResultContainer._number_of_results = []
protected

Definition at line 191 of file results.py.

◆ answers

dict searx.results.ResultContainer.answers = {}

Definition at line 189 of file results.py.

◆ corrections

searx.results.ResultContainer.corrections = set()

Definition at line 190 of file results.py.

◆ engine_data

searx.results.ResultContainer.engine_data = defaultdict(dict)

Definition at line 192 of file results.py.

Referenced by searx.search.models.SearchQuery.__copy__().

◆ infoboxes [1/2]

list searx.results.ResultContainer.infoboxes = []

Definition at line 187 of file results.py.

◆ infoboxes [2/2]

bool searx.results.ResultContainer.infoboxes = False

Definition at line 254 of file results.py.

◆ on_result

bool searx.results.ResultContainer.on_result = lambda _: True

Definition at line 198 of file results.py.

◆ paging

bool searx.results.ResultContainer.paging = False

Definition at line 194 of file results.py.

◆ redirect_url

searx.results.ResultContainer.redirect_url = None

Definition at line 197 of file results.py.

◆ suggestions

searx.results.ResultContainer.suggestions = set()

Definition at line 188 of file results.py.

◆ timings

list searx.results.ResultContainer.timings = []

Definition at line 196 of file results.py.

◆ unresponsive_engines

Set[UnresponsiveEngine] searx.results.ResultContainer.unresponsive_engines = set()

Definition at line 195 of file results.py.


The documentation for this class was generated from the following file: