.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.results Namespace Reference

Classes

class  ResultContainer
 
class  Timing
 
class  UnresponsiveEngine
 

Functions

 result_content_len (content)
 
 compare_urls (url_a, url_b)
 
 merge_two_infoboxes (infobox1, infobox2)
 
 result_score (result, priority)
 

Variables

 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 

Function Documentation

◆ compare_urls()

searx.results.compare_urls ( url_a,
url_b )
Lazy compare between two URL.
"www.example.com" and "example.com" are equals.
"www.example.com/path/" and "www.example.com/path" are equals.
"https://www.example.com/" and "http://www.example.com/" are equals.

Args:
    url_a (ParseResult): first URL
    url_b (ParseResult): second URL

Returns:
    bool: True if url_a and url_b are equals

Definition at line 30 of file results.py.

30def compare_urls(url_a, url_b):
31 """Lazy compare between two URL.
32 "www.example.com" and "example.com" are equals.
33 "www.example.com/path/" and "www.example.com/path" are equals.
34 "https://www.example.com/" and "http://www.example.com/" are equals.
35
36 Args:
37 url_a (ParseResult): first URL
38 url_b (ParseResult): second URL
39
40 Returns:
41 bool: True if url_a and url_b are equals
42 """
43 # ignore www. in comparison
44 if url_a.netloc.startswith('www.'):
45 host_a = url_a.netloc.replace('www.', '', 1)
46 else:
47 host_a = url_a.netloc
48 if url_b.netloc.startswith('www.'):
49 host_b = url_b.netloc.replace('www.', '', 1)
50 else:
51 host_b = url_b.netloc
52
53 if host_a != host_b or url_a.query != url_b.query or url_a.fragment != url_b.fragment:
54 return False
55
56 # remove / from the end of the url if required
57 path_a = url_a.path[:-1] if url_a.path.endswith('/') else url_a.path
58 path_b = url_b.path[:-1] if url_b.path.endswith('/') else url_b.path
59
60 return unquote(path_a) == unquote(path_b)
61
62

Referenced by merge_two_infoboxes().

+ Here is the caller graph for this function:

◆ merge_two_infoboxes()

searx.results.merge_two_infoboxes ( infobox1,
infobox2 )

Definition at line 63 of file results.py.

63def merge_two_infoboxes(infobox1, infobox2): # pylint: disable=too-many-branches, too-many-statements
64 # get engines weights
65 if hasattr(engines[infobox1['engine']], 'weight'):
66 weight1 = engines[infobox1['engine']].weight
67 else:
68 weight1 = 1
69 if hasattr(engines[infobox2['engine']], 'weight'):
70 weight2 = engines[infobox2['engine']].weight
71 else:
72 weight2 = 1
73
74 if weight2 > weight1:
75 infobox1['engine'] = infobox2['engine']
76
77 infobox1['engines'] |= infobox2['engines']
78
79 if 'urls' in infobox2:
80 urls1 = infobox1.get('urls', None)
81 if urls1 is None:
82 urls1 = []
83
84 for url2 in infobox2.get('urls', []):
85 unique_url = True
86 parsed_url2 = urlparse(url2.get('url', ''))
87 entity_url2 = url2.get('entity')
88 for url1 in urls1:
89 if (entity_url2 is not None and url1.get('entity') == entity_url2) or compare_urls(
90 urlparse(url1.get('url', '')), parsed_url2
91 ):
92 unique_url = False
93 break
94 if unique_url:
95 urls1.append(url2)
96
97 infobox1['urls'] = urls1
98
99 if 'img_src' in infobox2:
100 img1 = infobox1.get('img_src', None)
101 img2 = infobox2.get('img_src')
102 if img1 is None:
103 infobox1['img_src'] = img2
104 elif weight2 > weight1:
105 infobox1['img_src'] = img2
106
107 if 'attributes' in infobox2:
108 attributes1 = infobox1.get('attributes')
109 if attributes1 is None:
110 infobox1['attributes'] = attributes1 = []
111
112 attributeSet = set()
113 for attribute in attributes1:
114 label = attribute.get('label')
115 if label not in attributeSet:
116 attributeSet.add(label)
117 entity = attribute.get('entity')
118 if entity not in attributeSet:
119 attributeSet.add(entity)
120
121 for attribute in infobox2.get('attributes', []):
122 if attribute.get('label') not in attributeSet and attribute.get('entity') not in attributeSet:
123 attributes1.append(attribute)
124
125 if 'content' in infobox2:
126 content1 = infobox1.get('content', None)
127 content2 = infobox2.get('content', '')
128 if content1 is not None:
129 if result_content_len(content2) > result_content_len(content1):
130 infobox1['content'] = content2
131 else:
132 infobox1['content'] = content2
133
134

References compare_urls(), and result_content_len().

+ Here is the call graph for this function:

◆ result_content_len()

searx.results.result_content_len ( content)

Definition at line 24 of file results.py.

24def result_content_len(content):
25 if isinstance(content, str):
26 return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
27 return 0
28
29

Referenced by merge_two_infoboxes().

+ Here is the caller graph for this function:

◆ result_score()

searx.results.result_score ( result,
priority )

Definition at line 135 of file results.py.

135def result_score(result, priority):
136 weight = 1.0
137
138 for result_engine in result['engines']:
139 if hasattr(engines.get(result_engine), 'weight'):
140 weight *= float(engines[result_engine].weight)
141
142 weight *= len(result['positions'])
143 score = 0
144
145 for position in result['positions']:
146 if priority == 'low':
147 continue
148 if priority == 'high':
149 score += weight
150 else:
151 score += weight / position
152
153 return score
154
155

Variable Documentation

◆ CONTENT_LEN_IGNORED_CHARS_REGEX

searx.results.CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)

Definition at line 20 of file results.py.