.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.results Namespace Reference

Classes

class  ResultContainer
 
class  Timing
 
class  UnresponsiveEngine
 

Functions

 result_content_len (content)
 
 compare_urls (url_a, url_b)
 
 merge_two_infoboxes (infobox1, infobox2)
 
 result_score (result, priority)
 

Variables

 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 

Function Documentation

◆ compare_urls()

searx.results.compare_urls ( url_a,
url_b )
Lazy compare between two URL.
"www.example.com" and "example.com" are equals.
"www.example.com/path/" and "www.example.com/path" are equals.
"https://www.example.com/" and "http://www.example.com/" are equals.

Args:
    url_a (ParseResult): first URL
    url_b (ParseResult): second URL

Returns:
    bool: True if url_a and url_b are equals

Definition at line 27 of file results.py.

27def compare_urls(url_a, url_b):
28 """Lazy compare between two URL.
29 "www.example.com" and "example.com" are equals.
30 "www.example.com/path/" and "www.example.com/path" are equals.
31 "https://www.example.com/" and "http://www.example.com/" are equals.
32
33 Args:
34 url_a (ParseResult): first URL
35 url_b (ParseResult): second URL
36
37 Returns:
38 bool: True if url_a and url_b are equals
39 """
40 # ignore www. in comparison
41 if url_a.netloc.startswith('www.'):
42 host_a = url_a.netloc.replace('www.', '', 1)
43 else:
44 host_a = url_a.netloc
45 if url_b.netloc.startswith('www.'):
46 host_b = url_b.netloc.replace('www.', '', 1)
47 else:
48 host_b = url_b.netloc
49
50 if host_a != host_b or url_a.query != url_b.query or url_a.fragment != url_b.fragment:
51 return False
52
53 # remove / from the end of the url if required
54 path_a = url_a.path[:-1] if url_a.path.endswith('/') else url_a.path
55 path_b = url_b.path[:-1] if url_b.path.endswith('/') else url_b.path
56
57 return unquote(path_a) == unquote(path_b)
58
59

Referenced by searx.results.merge_two_infoboxes().

+ Here is the caller graph for this function:

◆ merge_two_infoboxes()

searx.results.merge_two_infoboxes ( infobox1,
infobox2 )

Definition at line 60 of file results.py.

60def merge_two_infoboxes(infobox1, infobox2): # pylint: disable=too-many-branches, too-many-statements
61 # get engines weights
62 if hasattr(engines[infobox1['engine']], 'weight'):
63 weight1 = engines[infobox1['engine']].weight
64 else:
65 weight1 = 1
66 if hasattr(engines[infobox2['engine']], 'weight'):
67 weight2 = engines[infobox2['engine']].weight
68 else:
69 weight2 = 1
70
71 if weight2 > weight1:
72 infobox1['engine'] = infobox2['engine']
73
74 infobox1['engines'] |= infobox2['engines']
75
76 if 'urls' in infobox2:
77 urls1 = infobox1.get('urls', None)
78 if urls1 is None:
79 urls1 = []
80
81 for url2 in infobox2.get('urls', []):
82 unique_url = True
83 parsed_url2 = urlparse(url2.get('url', ''))
84 entity_url2 = url2.get('entity')
85 for url1 in urls1:
86 if (entity_url2 is not None and url1.get('entity') == entity_url2) or compare_urls(
87 urlparse(url1.get('url', '')), parsed_url2
88 ):
89 unique_url = False
90 break
91 if unique_url:
92 urls1.append(url2)
93
94 infobox1['urls'] = urls1
95
96 if 'img_src' in infobox2:
97 img1 = infobox1.get('img_src', None)
98 img2 = infobox2.get('img_src')
99 if img1 is None:
100 infobox1['img_src'] = img2
101 elif weight2 > weight1:
102 infobox1['img_src'] = img2
103
104 if 'attributes' in infobox2:
105 attributes1 = infobox1.get('attributes')
106 if attributes1 is None:
107 infobox1['attributes'] = attributes1 = []
108
109 attributeSet = set()
110 for attribute in attributes1:
111 label = attribute.get('label')
112 if label not in attributeSet:
113 attributeSet.add(label)
114 entity = attribute.get('entity')
115 if entity not in attributeSet:
116 attributeSet.add(entity)
117
118 for attribute in infobox2.get('attributes', []):
119 if attribute.get('label') not in attributeSet and attribute.get('entity') not in attributeSet:
120 attributes1.append(attribute)
121
122 if 'content' in infobox2:
123 content1 = infobox1.get('content', None)
124 content2 = infobox2.get('content', '')
125 if content1 is not None:
126 if result_content_len(content2) > result_content_len(content1):
127 infobox1['content'] = content2
128 else:
129 infobox1['content'] = content2
130
131

References searx.results.compare_urls(), and searx.results.result_content_len().

+ Here is the call graph for this function:

◆ result_content_len()

searx.results.result_content_len ( content)

Definition at line 21 of file results.py.

21def result_content_len(content):
22 if isinstance(content, str):
23 return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
24 return 0
25
26

Referenced by searx.results.merge_two_infoboxes().

+ Here is the caller graph for this function:

◆ result_score()

searx.results.result_score ( result,
priority )

Definition at line 132 of file results.py.

132def result_score(result, priority):
133 weight = 1.0
134
135 for result_engine in result['engines']:
136 if hasattr(engines[result_engine], 'weight'):
137 weight *= float(engines[result_engine].weight)
138
139 weight *= len(result['positions'])
140 score = 0
141
142 for position in result['positions']:
143 if priority == 'low':
144 continue
145 if priority == 'high':
146 score += weight
147 else:
148 score += weight / position
149
150 return score
151
152

Variable Documentation

◆ CONTENT_LEN_IGNORED_CHARS_REGEX

searx.results.CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)

Definition at line 16 of file results.py.

◆ WHITESPACE_REGEX

searx.results.WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

Definition at line 17 of file results.py.