.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.results Namespace Reference

Classes

class  ResultContainer
 
class  Timing
 
class  UnresponsiveEngine
 

Functions

 result_content_len (content)
 
 compare_urls (url_a, url_b)
 
 merge_two_infoboxes (infobox1, infobox2)
 
 result_score (result, priority)
 

Variables

 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 

Function Documentation

◆ compare_urls()

searx.results.compare_urls ( url_a,
url_b )
Lazy compare between two URL.
"www.example.com" and "example.com" are equals.
"www.example.com/path/" and "www.example.com/path" are equals.
"https://www.example.com/" and "http://www.example.com/" are equals.

Args:
    url_a (ParseResult): first URL
    url_b (ParseResult): second URL

Returns:
    bool: True if url_a and url_b are equals

Definition at line 26 of file results.py.

26def compare_urls(url_a, url_b):
27 """Lazy compare between two URL.
28 "www.example.com" and "example.com" are equals.
29 "www.example.com/path/" and "www.example.com/path" are equals.
30 "https://www.example.com/" and "http://www.example.com/" are equals.
31
32 Args:
33 url_a (ParseResult): first URL
34 url_b (ParseResult): second URL
35
36 Returns:
37 bool: True if url_a and url_b are equals
38 """
39 # ignore www. in comparison
40 if url_a.netloc.startswith('www.'):
41 host_a = url_a.netloc.replace('www.', '', 1)
42 else:
43 host_a = url_a.netloc
44 if url_b.netloc.startswith('www.'):
45 host_b = url_b.netloc.replace('www.', '', 1)
46 else:
47 host_b = url_b.netloc
48
49 if host_a != host_b or url_a.query != url_b.query or url_a.fragment != url_b.fragment:
50 return False
51
52 # remove / from the end of the url if required
53 path_a = url_a.path[:-1] if url_a.path.endswith('/') else url_a.path
54 path_b = url_b.path[:-1] if url_b.path.endswith('/') else url_b.path
55
56 return unquote(path_a) == unquote(path_b)
57
58

Referenced by searx.results.merge_two_infoboxes().

+ Here is the caller graph for this function:

◆ merge_two_infoboxes()

searx.results.merge_two_infoboxes ( infobox1,
infobox2 )

Definition at line 59 of file results.py.

59def merge_two_infoboxes(infobox1, infobox2): # pylint: disable=too-many-branches, too-many-statements
60 # get engines weights
61 if hasattr(engines[infobox1['engine']], 'weight'):
62 weight1 = engines[infobox1['engine']].weight
63 else:
64 weight1 = 1
65 if hasattr(engines[infobox2['engine']], 'weight'):
66 weight2 = engines[infobox2['engine']].weight
67 else:
68 weight2 = 1
69
70 if weight2 > weight1:
71 infobox1['engine'] = infobox2['engine']
72
73 infobox1['engines'] |= infobox2['engines']
74
75 if 'urls' in infobox2:
76 urls1 = infobox1.get('urls', None)
77 if urls1 is None:
78 urls1 = []
79
80 for url2 in infobox2.get('urls', []):
81 unique_url = True
82 parsed_url2 = urlparse(url2.get('url', ''))
83 entity_url2 = url2.get('entity')
84 for url1 in urls1:
85 if (entity_url2 is not None and url1.get('entity') == entity_url2) or compare_urls(
86 urlparse(url1.get('url', '')), parsed_url2
87 ):
88 unique_url = False
89 break
90 if unique_url:
91 urls1.append(url2)
92
93 infobox1['urls'] = urls1
94
95 if 'img_src' in infobox2:
96 img1 = infobox1.get('img_src', None)
97 img2 = infobox2.get('img_src')
98 if img1 is None:
99 infobox1['img_src'] = img2
100 elif weight2 > weight1:
101 infobox1['img_src'] = img2
102
103 if 'attributes' in infobox2:
104 attributes1 = infobox1.get('attributes')
105 if attributes1 is None:
106 infobox1['attributes'] = attributes1 = []
107
108 attributeSet = set()
109 for attribute in attributes1:
110 label = attribute.get('label')
111 if label not in attributeSet:
112 attributeSet.add(label)
113 entity = attribute.get('entity')
114 if entity not in attributeSet:
115 attributeSet.add(entity)
116
117 for attribute in infobox2.get('attributes', []):
118 if attribute.get('label') not in attributeSet and attribute.get('entity') not in attributeSet:
119 attributes1.append(attribute)
120
121 if 'content' in infobox2:
122 content1 = infobox1.get('content', None)
123 content2 = infobox2.get('content', '')
124 if content1 is not None:
125 if result_content_len(content2) > result_content_len(content1):
126 infobox1['content'] = content2
127 else:
128 infobox1['content'] = content2
129
130

References searx.results.compare_urls(), and searx.results.result_content_len().

+ Here is the call graph for this function:

◆ result_content_len()

searx.results.result_content_len ( content)

Definition at line 20 of file results.py.

20def result_content_len(content):
21 if isinstance(content, str):
22 return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
23 return 0
24
25

Referenced by searx.results.merge_two_infoboxes().

+ Here is the caller graph for this function:

◆ result_score()

searx.results.result_score ( result,
priority )

Definition at line 131 of file results.py.

131def result_score(result, priority):
132 weight = 1.0
133
134 for result_engine in result['engines']:
135 if hasattr(engines.get(result_engine), 'weight'):
136 weight *= float(engines[result_engine].weight)
137
138 weight *= len(result['positions'])
139 score = 0
140
141 for position in result['positions']:
142 if priority == 'low':
143 continue
144 if priority == 'high':
145 score += weight
146 else:
147 score += weight / position
148
149 return score
150
151

Variable Documentation

◆ CONTENT_LEN_IGNORED_CHARS_REGEX

searx.results.CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)

Definition at line 15 of file results.py.

◆ WHITESPACE_REGEX

searx.results.WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

Definition at line 16 of file results.py.