.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.results.ResultContainer Class Reference

Public Member Functions

 __init__ (self)
 
 extend (self, engine_name, results)
 
 close (self)
 
 get_ordered_results (self)
 
 results_length (self)
 
int number_of_results (self)
 
 add_unresponsive_engine (self, str engine_name, str error_type, bool suspended=False)
 
 add_timing (self, str engine_name, float engine_time, float page_load_time)
 
 get_timings (self)
 

Public Attributes

 infoboxes
 
 suggestions
 
 answers
 
 corrections
 
 engine_data
 
 paging
 
 redirect_url
 
 on_result
 

Protected Member Functions

 _merge_infobox (self, infobox)
 
 _is_valid_url_result (self, result, error_msgs)
 
 _normalize_url_result (self, result)
 

Protected Attributes

 _merged_results
 
 _number_of_results
 
 _closed
 
 _lock
 

Private Member Functions

 __merge_url_result (self, result, position)
 
 __find_duplicated_http_result (self, result)
 
 __merge_duplicated_http_result (self, duplicated, result, position)
 
 __merge_result_no_url (self, result, position)
 

Static Private Attributes

tuple __slots__
 

Detailed Description

docstring for ResultContainer

Definition at line 157 of file results.py.

Constructor & Destructor Documentation

◆ __init__()

searx.results.ResultContainer.__init__ ( self)

Definition at line 177 of file results.py.

177 def __init__(self):
178 super().__init__()
179 self._merged_results = []
180 self.infoboxes = []
181 self.suggestions = set()
182 self.answers = {}
183 self.corrections = set()
184 self._number_of_results = []
185 self.engine_data = defaultdict(dict)
186 self._closed = False
187 self.paging = False
188 self.unresponsive_engines: Set[UnresponsiveEngine] = set()
189 self.timings: List[Timing] = []
190 self.redirect_url = None
191 self.on_result = lambda _: True
192 self._lock = RLock()
193

Member Function Documentation

◆ __find_duplicated_http_result()

searx.results.ResultContainer.__find_duplicated_http_result ( self,
result )
private

Definition at line 308 of file results.py.

308 def __find_duplicated_http_result(self, result):
309 result_template = result.get('template')
310 for merged_result in self._merged_results:
311 if 'parsed_url' not in merged_result:
312 continue
313 if compare_urls(result['parsed_url'], merged_result['parsed_url']) and result_template == merged_result.get(
314 'template'
315 ):
316 if result_template != 'images.html':
317 # not an image, same template, same url : it's a duplicate
318 return merged_result
319
320 # it's an image
321 # it's a duplicate if the parsed_url, template and img_src are different
322 if result.get('img_src', '') == merged_result.get('img_src', ''):
323 return merged_result
324 return None
325

◆ __merge_duplicated_http_result()

searx.results.ResultContainer.__merge_duplicated_http_result ( self,
duplicated,
result,
position )
private

Definition at line 326 of file results.py.

326 def __merge_duplicated_http_result(self, duplicated, result, position):
327 # using content with more text
328 if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')):
329 duplicated['content'] = result['content']
330
331 # merge all result's parameters not found in duplicate
332 for key in result.keys():
333 if not duplicated.get(key):
334 duplicated[key] = result.get(key)
335
336 # add the new position
337 duplicated['positions'].append(position)
338
339 # add engine to list of result-engines
340 duplicated['engines'].add(result['engine'])
341
342 # using https if possible
343 if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https':
344 duplicated['url'] = result['parsed_url'].geturl()
345 duplicated['parsed_url'] = result['parsed_url']
346

◆ __merge_result_no_url()

searx.results.ResultContainer.__merge_result_no_url ( self,
result,
position )
private

Definition at line 347 of file results.py.

347 def __merge_result_no_url(self, result, position):
348 result['engines'] = set([result['engine']])
349 result['positions'] = [position]
350 with self._lock:
351 self._merged_results.append(result)
352

◆ __merge_url_result()

searx.results.ResultContainer.__merge_url_result ( self,
result,
position )
private

Definition at line 296 of file results.py.

296 def __merge_url_result(self, result, position):
297 result['engines'] = set([result['engine']])
298 with self._lock:
299 duplicated = self.__find_duplicated_http_result(result)
300 if duplicated:
301 self.__merge_duplicated_http_result(duplicated, result, position)
302 return
303
304 # if there is no duplicate found, append result
305 result['positions'] = [position]
306 self._merged_results.append(result)
307

◆ _is_valid_url_result()

searx.results.ResultContainer._is_valid_url_result ( self,
result,
error_msgs )
protected

Definition at line 255 of file results.py.

255 def _is_valid_url_result(self, result, error_msgs):
256 if 'url' in result:
257 if not isinstance(result['url'], str):
258 logger.debug('result: invalid URL: %s', str(result))
259 error_msgs.add('invalid URL')
260 return False
261
262 if 'title' in result and not isinstance(result['title'], str):
263 logger.debug('result: invalid title: %s', str(result))
264 error_msgs.add('invalid title')
265 return False
266
267 if 'content' in result:
268 if not isinstance(result['content'], str):
269 logger.debug('result: invalid content: %s', str(result))
270 error_msgs.add('invalid content')
271 return False
272
273 return True
274

◆ _merge_infobox()

searx.results.ResultContainer._merge_infobox ( self,
infobox )
protected

Definition at line 240 of file results.py.

240 def _merge_infobox(self, infobox):
241 add_infobox = True
242 infobox_id = infobox.get('id', None)
243 infobox['engines'] = set([infobox['engine']])
244 if infobox_id is not None:
245 parsed_url_infobox_id = urlparse(infobox_id)
246 with self._lock:
247 for existingIndex in self.infoboxes:
248 if compare_urls(urlparse(existingIndex.get('id', '')), parsed_url_infobox_id):
249 merge_two_infoboxes(existingIndex, infobox)
250 add_infobox = False
251
252 if add_infobox:
253 self.infoboxes.append(infobox)
254

◆ _normalize_url_result()

searx.results.ResultContainer._normalize_url_result ( self,
result )
protected
Return True if the result is valid

Definition at line 275 of file results.py.

275 def _normalize_url_result(self, result):
276 """Return True if the result is valid"""
277 result['parsed_url'] = urlparse(result['url'])
278
279 # if the result has no scheme, use http as default
280 if not result['parsed_url'].scheme:
281 result['parsed_url'] = result['parsed_url']._replace(scheme="http")
282 result['url'] = result['parsed_url'].geturl()
283
284 # avoid duplicate content between the content and title fields
285 if result.get('content') == result.get('title'):
286 del result['content']
287
288 # make sure there is a template
289 if 'template' not in result:
290 result['template'] = 'default.html'
291
292 # strip multiple spaces and carriage returns from content
293 if result.get('content'):
294 result['content'] = WHITESPACE_REGEX.sub(' ', result['content'])
295

◆ add_timing()

searx.results.ResultContainer.add_timing ( self,
str engine_name,
float engine_time,
float page_load_time )

Definition at line 446 of file results.py.

446 def add_timing(self, engine_name: str, engine_time: float, page_load_time: float):
447 self.timings.append(Timing(engine_name, total=engine_time, load=page_load_time))
448

◆ add_unresponsive_engine()

searx.results.ResultContainer.add_unresponsive_engine ( self,
str engine_name,
str error_type,
bool suspended = False )

Definition at line 442 of file results.py.

442 def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
443 if engines[engine_name].display_error_messages:
444 self.unresponsive_engines.add(UnresponsiveEngine(engine_name, error_type, suspended))
445

◆ close()

searx.results.ResultContainer.close ( self)

Definition at line 353 of file results.py.

353 def close(self):
354 self._closed = True
355
356 for result in self._merged_results:
357 score = result_score(result)
358 result['score'] = score
359
360 # removing html content and whitespace duplications
361 if result.get('content'):
362 result['content'] = utils.html_to_text(result['content']).strip()
363 if result.get('title'):
364 result['title'] = ' '.join(utils.html_to_text(result['title']).strip().split())
365
366 for result_engine in result['engines']:
367 counter_add(score, 'engine', result_engine, 'score')
368
369 results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)
370
371 # pass 2 : group results by category and template
372 gresults = []
373 categoryPositions = {}
374
375 for res in results:
376 # do we need to handle more than one category per engine?
377 engine = engines[res['engine']]
378 res['category'] = engine.categories[0] if len(engine.categories) > 0 else ''
379
380 # do we need to handle more than one category per engine?
381 category = (
382 res['category']
383 + ':'
384 + res.get('template', '')
385 + ':'
386 + ('img_src' if 'img_src' in res or 'thumbnail' in res else '')
387 )
388
389 current = None if category not in categoryPositions else categoryPositions[category]
390
391 # group with previous results using the same category
392 # if the group can accept more result and is not too far
393 # from the current position
394 if current is not None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
395 # group with the previous results using
396 # the same category with this one
397 index = current['index']
398 gresults.insert(index, res)
399
400 # update every index after the current one
401 # (including the current one)
402 for k in categoryPositions: # pylint: disable=consider-using-dict-items
403 v = categoryPositions[k]['index']
404 if v >= index:
405 categoryPositions[k]['index'] = v + 1
406
407 # update this category
408 current['count'] -= 1
409
410 else:
411 # same category
412 gresults.append(res)
413
414 # update categoryIndex
415 categoryPositions[category] = {'index': len(gresults), 'count': 8}
416
417 # update _merged_results
418 self._merged_results = gresults
419

◆ extend()

searx.results.ResultContainer.extend ( self,
engine_name,
results )

Definition at line 194 of file results.py.

194 def extend(self, engine_name, results): # pylint: disable=too-many-branches
195 if self._closed:
196 return
197
198 standard_result_count = 0
199 error_msgs = set()
200 for result in list(results):
201 result['engine'] = engine_name
202 if 'suggestion' in result and self.on_result(result):
203 self.suggestions.add(result['suggestion'])
204 elif 'answer' in result and self.on_result(result):
205 self.answers[result['answer']] = result
206 elif 'correction' in result and self.on_result(result):
207 self.corrections.add(result['correction'])
208 elif 'infobox' in result and self.on_result(result):
209 self._merge_infobox(result)
210 elif 'number_of_results' in result and self.on_result(result):
211 self._number_of_results.append(result['number_of_results'])
212 elif 'engine_data' in result and self.on_result(result):
213 self.engine_data[engine_name][result['key']] = result['engine_data']
214 elif 'url' in result:
215 # standard result (url, title, content)
216 if not self._is_valid_url_result(result, error_msgs):
217 continue
218 # normalize the result
219 self._normalize_url_result(result)
220 # call on_result call searx.search.SearchWithPlugins._on_result
221 # which calls the plugins
222 if not self.on_result(result):
223 continue
224 self.__merge_url_result(result, standard_result_count + 1)
225 standard_result_count += 1
226 elif self.on_result(result):
227 self.__merge_result_no_url(result, standard_result_count + 1)
228 standard_result_count += 1
229
230 if len(error_msgs) > 0:
231 for msg in error_msgs:
232 count_error(engine_name, 'some results are invalids: ' + msg, secondary=True)
233
234 if engine_name in engines:
235 histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count')
236
237 if not self.paging and engine_name in engines and engines[engine_name].paging:
238 self.paging = True
239

◆ get_ordered_results()

searx.results.ResultContainer.get_ordered_results ( self)

Definition at line 420 of file results.py.

420 def get_ordered_results(self):
421 if not self._closed:
422 self.close()
423 return self._merged_results
424

◆ get_timings()

searx.results.ResultContainer.get_timings ( self)

Definition at line 449 of file results.py.

449 def get_timings(self):
450 return self.timings

◆ number_of_results()

int searx.results.ResultContainer.number_of_results ( self)
Returns the average of results number, returns zero if the average
result number is smaller than the actual result count.

Definition at line 429 of file results.py.

429 def number_of_results(self) -> int:
430 """Returns the average of results number, returns zero if the average
431 result number is smaller than the actual result count."""
432
433 resultnum_sum = sum(self._number_of_results)
434 if not resultnum_sum or not self._number_of_results:
435 return 0
436
437 average = int(resultnum_sum / len(self._number_of_results))
438 if average < self.results_length():
439 average = 0
440 return average
441

◆ results_length()

searx.results.ResultContainer.results_length ( self)

Definition at line 425 of file results.py.

425 def results_length(self):
426 return len(self._merged_results)
427

Member Data Documentation

◆ __slots__

tuple searx.results.ResultContainer.__slots__
staticprivate
Initial value:
= (
'_merged_results',
'infoboxes',
'suggestions',
'answers',
'corrections',
'_number_of_results',
'_closed',
'paging',
'unresponsive_engines',
'timings',
'redirect_url',
'engine_data',
'on_result',
'_lock',
)

Definition at line 160 of file results.py.

◆ _closed

searx.results.ResultContainer._closed
protected

Definition at line 186 of file results.py.

◆ _lock

◆ _merged_results

searx.results.ResultContainer._merged_results
protected

Definition at line 179 of file results.py.

◆ _number_of_results

searx.results.ResultContainer._number_of_results
protected

Definition at line 184 of file results.py.

◆ answers

searx.results.ResultContainer.answers

Definition at line 182 of file results.py.

◆ corrections

searx.results.ResultContainer.corrections

Definition at line 183 of file results.py.

◆ engine_data

searx.results.ResultContainer.engine_data

Definition at line 185 of file results.py.

Referenced by searx.search.models.SearchQuery.__copy__().

◆ infoboxes

searx.results.ResultContainer.infoboxes

Definition at line 180 of file results.py.

◆ on_result

searx.results.ResultContainer.on_result

Definition at line 191 of file results.py.

◆ paging

searx.results.ResultContainer.paging

Definition at line 187 of file results.py.

◆ redirect_url

searx.results.ResultContainer.redirect_url

Definition at line 190 of file results.py.

◆ suggestions

searx.results.ResultContainer.suggestions

Definition at line 181 of file results.py.


The documentation for this class was generated from the following file: