.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
searx.webutils Namespace Reference

Classes

class  CSVWriter
 
class  JSONEncoder
 

Functions

 get_translated_errors (Iterable[UnresponsiveEngine] unresponsive_engines)
 
None write_csv_response (CSVWriter csv, ResultContainer rc)
 
str get_json_response (SearchQuery sq, ResultContainer rc)
 
 get_themes (templates_path)
 
str get_hash_for_file (pathlib.Path file)
 
Dict[str, str] get_static_files (str static_path)
 
 get_result_templates (templates_path)
 
 new_hmac (secret_key, url)
 
 is_hmac_of (secret_key, value, hmac_to_check)
 
 prettify_url (url, max_length=74)
 
bool contains_cjko (str s)
 
str regex_highlight_cjk (str word)
 
 highlight_content (content, query)
 
str searxng_l10n_timespan (datetime dt)
 
List[Tuple[str, Iterable[Engine]]] group_engines_in_tab (Iterable[Engine] engines)
 

Variables

 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 
 logger = logger.getChild('webutils')
 
 timeout_text = gettext('timeout')
 
 parsing_error_text = gettext('parsing error')
 
 http_protocol_error_text = gettext('HTTP protocol error')
 
 network_error_text = gettext('network error')
 
 ssl_cert_error_text = gettext("SSL error: certificate validation has failed")
 
dict exception_classname_to_text
 
str NO_SUBGROUPING = 'without further subgrouping'
 

Function Documentation

◆ contains_cjko()

bool searx.webutils.contains_cjko ( str s)
This function check whether or not a string contains Chinese, Japanese,
or Korean characters. It employs regex and uses the u escape sequence to
match any character in a set of Unicode ranges.

Args:
    s (str): string to be checked.

Returns:
    bool: True if the input s contains the characters and False otherwise.

Definition at line 233 of file webutils.py.

233def contains_cjko(s: str) -> bool:
234 """This function check whether or not a string contains Chinese, Japanese,
235 or Korean characters. It employs regex and uses the u escape sequence to
236 match any character in a set of Unicode ranges.
237
238 Args:
239 s (str): string to be checked.
240
241 Returns:
242 bool: True if the input s contains the characters and False otherwise.
243 """
244 unicode_ranges = (
245 '\u4e00-\u9fff' # Chinese characters
246 '\u3040-\u309f' # Japanese hiragana
247 '\u30a0-\u30ff' # Japanese katakana
248 '\u4e00-\u9faf' # Japanese kanji
249 '\uac00-\ud7af' # Korean hangul syllables
250 '\u1100-\u11ff' # Korean hangul jamo
251 )
252 return bool(re.search(fr'[{unicode_ranges}]', s))
253
254

Referenced by regex_highlight_cjk().

+ Here is the caller graph for this function:

◆ get_hash_for_file()

str searx.webutils.get_hash_for_file ( pathlib.Path file)

Definition at line 180 of file webutils.py.

180def get_hash_for_file(file: pathlib.Path) -> str:
181 m = hashlib.sha1()
182 with file.open('rb') as f:
183 m.update(f.read())
184 return m.hexdigest()
185
186

Referenced by get_static_files().

+ Here is the caller graph for this function:

◆ get_json_response()

str searx.webutils.get_json_response ( SearchQuery sq,
ResultContainer rc )
Returns the JSON string of the results to a query (``application/json``)

Definition at line 159 of file webutils.py.

159def get_json_response(sq: SearchQuery, rc: ResultContainer) -> str:
160 """Returns the JSON string of the results to a query (``application/json``)"""
161 data = {
162 'query': sq.query,
163 'number_of_results': rc.number_of_results,
164 'results': [_.as_dict() for _ in rc.get_ordered_results()],
165 'answers': [_.as_dict() for _ in rc.answers],
166 'corrections': list(rc.corrections),
167 'infoboxes': rc.infoboxes,
168 'suggestions': list(rc.suggestions),
169 'unresponsive_engines': get_translated_errors(rc.unresponsive_engines),
170 }
171 response = json.dumps(data, cls=JSONEncoder)
172 return response
173
174

References get_translated_errors().

+ Here is the call graph for this function:

◆ get_result_templates()

searx.webutils.get_result_templates ( templates_path)

Definition at line 206 of file webutils.py.

206def get_result_templates(templates_path):
207 result_templates = set()
208 templates_path_length = len(templates_path) + 1
209 for directory, _, files in os.walk(templates_path):
210 if directory.endswith('result_templates'):
211 for filename in files:
212 f = os.path.join(directory[templates_path_length:], filename)
213 result_templates.add(f)
214 return result_templates
215
216

◆ get_static_files()

Dict[str, str] searx.webutils.get_static_files ( str static_path)

Definition at line 187 of file webutils.py.

187def get_static_files(static_path: str) -> Dict[str, str]:
188 static_files: Dict[str, str] = {}
189 static_path_path = pathlib.Path(static_path)
190
191 def walk(path: pathlib.Path):
192 for file in path.iterdir():
193 if file.name.startswith('.'):
194 # ignore hidden file
195 continue
196 if file.is_file():
197 static_files[str(file.relative_to(static_path_path))] = get_hash_for_file(file)
198 if file.is_dir() and file.name not in ('node_modules', 'src'):
199 # ignore "src" and "node_modules" directories
200 walk(file)
201
202 walk(static_path_path)
203 return static_files
204
205

References get_hash_for_file().

+ Here is the call graph for this function:

◆ get_themes()

searx.webutils.get_themes ( templates_path)
Returns available themes list.

Definition at line 175 of file webutils.py.

175def get_themes(templates_path):
176 """Returns available themes list."""
177 return os.listdir(templates_path)
178
179

◆ get_translated_errors()

searx.webutils.get_translated_errors ( Iterable[UnresponsiveEngine] unresponsive_engines)

Definition at line 69 of file webutils.py.

69def get_translated_errors(unresponsive_engines: Iterable[UnresponsiveEngine]):
70 translated_errors = []
71
72 for unresponsive_engine in unresponsive_engines:
73 error_user_text = exception_classname_to_text.get(unresponsive_engine.error_type)
74 if not error_user_text:
75 error_user_text = exception_classname_to_text[None]
76 error_msg = gettext(error_user_text)
77 if unresponsive_engine.suspended:
78 error_msg = gettext('Suspended') + ': ' + error_msg
79 translated_errors.append((unresponsive_engine.engine, error_msg))
80
81 return sorted(translated_errors, key=lambda e: e[0])
82
83

Referenced by get_json_response().

+ Here is the caller graph for this function:

◆ group_engines_in_tab()

List[Tuple[str, Iterable[Engine]]] searx.webutils.group_engines_in_tab ( Iterable[Engine] engines)
Groups an Iterable of engines by their first non tab category (first subgroup)

Definition at line 321 of file webutils.py.

321def group_engines_in_tab(engines: Iterable[Engine]) -> List[Tuple[str, Iterable[Engine]]]:
322 """Groups an Iterable of engines by their first non tab category (first subgroup)"""
323
324 def get_subgroup(eng):
325 non_tab_categories = [c for c in eng.categories if c not in tabs + [DEFAULT_CATEGORY]]
326 return non_tab_categories[0] if len(non_tab_categories) > 0 else NO_SUBGROUPING
327
328 def group_sort_key(group):
329 return (group[0] == NO_SUBGROUPING, group[0].lower())
330
331 def engine_sort_key(engine):
332 return (engine.about.get('language', ''), engine.name)
333
334 tabs = list(settings['categories_as_tabs'].keys())
335 subgroups = itertools.groupby(sorted(engines, key=get_subgroup), get_subgroup)
336 sorted_groups = sorted(((name, list(engines)) for name, engines in subgroups), key=group_sort_key)
337
338 ret_val = []
339 for groupname, _engines in sorted_groups:
340 group_bang = '!' + groupname.replace(' ', '_') if groupname != NO_SUBGROUPING else ''
341 ret_val.append((groupname, group_bang, sorted(_engines, key=engine_sort_key)))
342
343 return ret_val

◆ highlight_content()

searx.webutils.highlight_content ( content,
query )

Definition at line 275 of file webutils.py.

275def highlight_content(content, query):
276
277 if not content:
278 return None
279
280 # ignoring html contents
281 if content.find('<') != -1:
282 return content
283
284 querysplit = query.split()
285 queries = []
286 for qs in querysplit:
287 qs = qs.replace("'", "").replace('"', '').replace(" ", "")
288 if len(qs) > 0:
289 queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U))
290 if len(queries) > 0:
291 regex = re.compile("|".join(map(regex_highlight_cjk, queries)))
292 return regex.sub(lambda match: f'<span class="highlight">{match.group(0)}</span>'.replace('\\', r'\\'), content)
293 return content
294
295

References regex_highlight_cjk().

+ Here is the call graph for this function:

◆ is_hmac_of()

searx.webutils.is_hmac_of ( secret_key,
value,
hmac_to_check )

Definition at line 221 of file webutils.py.

221def is_hmac_of(secret_key, value, hmac_to_check):
222 hmac_of_value = new_hmac(secret_key, value)
223 return len(hmac_of_value) == len(hmac_to_check) and hmac.compare_digest(hmac_of_value, hmac_to_check)
224
225

References new_hmac().

+ Here is the call graph for this function:

◆ new_hmac()

searx.webutils.new_hmac ( secret_key,
url )

Definition at line 217 of file webutils.py.

217def new_hmac(secret_key, url):
218 return hmac.new(secret_key.encode(), url, hashlib.sha256).hexdigest()
219
220

Referenced by is_hmac_of().

+ Here is the caller graph for this function:

◆ prettify_url()

searx.webutils.prettify_url ( url,
max_length = 74 )

Definition at line 226 of file webutils.py.

226def prettify_url(url, max_length=74):
227 if len(url) > max_length:
228 chunk_len = int(max_length / 2 + 1)
229 return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
230 return url
231
232

◆ regex_highlight_cjk()

str searx.webutils.regex_highlight_cjk ( str word)
Generate the regex pattern to match for a given word according
to whether or not the word contains CJK characters or not.
If the word is and/or contains CJK character, the regex pattern
will match standalone word by taking into account the presence
of whitespace before and after it; if not, it will match any presence
of the word throughout the text, ignoring the whitespace.

Args:
    word (str): the word to be matched with regex pattern.

Returns:
    str: the regex pattern for the word.

Definition at line 255 of file webutils.py.

255def regex_highlight_cjk(word: str) -> str:
256 """Generate the regex pattern to match for a given word according
257 to whether or not the word contains CJK characters or not.
258 If the word is and/or contains CJK character, the regex pattern
259 will match standalone word by taking into account the presence
260 of whitespace before and after it; if not, it will match any presence
261 of the word throughout the text, ignoring the whitespace.
262
263 Args:
264 word (str): the word to be matched with regex pattern.
265
266 Returns:
267 str: the regex pattern for the word.
268 """
269 rword = re.escape(word)
270 if contains_cjko(rword):
271 return fr'({rword})'
272 return fr'\b({rword})(?!\w)'
273
274

References contains_cjko().

Referenced by highlight_content().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ searxng_l10n_timespan()

str searx.webutils.searxng_l10n_timespan ( datetime dt)
Returns a human-readable and translated string indicating how long ago
a date was in the past / the time span of the date to the present.

On January 1st, midnight, the returned string only indicates how many years
ago the date was.

Definition at line 296 of file webutils.py.

296def searxng_l10n_timespan(dt: datetime) -> str: # pylint: disable=invalid-name
297 """Returns a human-readable and translated string indicating how long ago
298 a date was in the past / the time span of the date to the present.
299
300 On January 1st, midnight, the returned string only indicates how many years
301 ago the date was.
302 """
303 # TODO, check if timezone is calculated right # pylint: disable=fixme
304 d = dt.date()
305 t = dt.time()
306 if d.month == 1 and d.day == 1 and t.hour == 0 and t.minute == 0 and t.second == 0:
307 return str(d.year)
308 if dt.replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
309 timedifference = datetime.now() - dt.replace(tzinfo=None)
310 minutes = int((timedifference.seconds / 60) % 60)
311 hours = int(timedifference.seconds / 60 / 60)
312 if hours == 0:
313 return gettext('{minutes} minute(s) ago').format(minutes=minutes)
314 return gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)
315 return format_date(dt)
316
317

◆ write_csv_response()

None searx.webutils.write_csv_response ( CSVWriter csv,
ResultContainer rc )
Write rows of the results to a query (``application/csv``) into a CSV
table (:py:obj:`CSVWriter`).  First line in the table contain the column
names.  The column "type" specifies the type, the following types are
included in the table:

- result
- answer
- suggestion
- correction

Definition at line 112 of file webutils.py.

112def write_csv_response(csv: CSVWriter, rc: ResultContainer) -> None: # pylint: disable=redefined-outer-name
113 """Write rows of the results to a query (``application/csv``) into a CSV
114 table (:py:obj:`CSVWriter`). First line in the table contain the column
115 names. The column "type" specifies the type, the following types are
116 included in the table:
117
118 - result
119 - answer
120 - suggestion
121 - correction
122
123 """
124
125 keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type')
126 csv.writerow(keys)
127
128 for res in rc.get_ordered_results():
129 row = res.as_dict()
130 row['host'] = row['parsed_url'].netloc
131 row['type'] = 'result'
132 csv.writerow([row.get(key, '') for key in keys])
133
134 for a in rc.answers:
135 row = a.as_dict()
136 row['host'] = row['parsed_url'].netloc
137 csv.writerow([row.get(key, '') for key in keys])
138
139 for a in rc.suggestions:
140 row = {'title': a, 'type': 'suggestion'}
141 csv.writerow([row.get(key, '') for key in keys])
142
143 for a in rc.corrections:
144 row = {'title': a, 'type': 'correction'}
145 csv.writerow([row.get(key, '') for key in keys])
146
147

Variable Documentation

◆ exception_classname_to_text

dict searx.webutils.exception_classname_to_text
Initial value:
1= {
2 None: gettext('unexpected crash'),
3 'timeout': timeout_text,
4 'asyncio.TimeoutError': timeout_text,
5 'httpx.TimeoutException': timeout_text,
6 'httpx.ConnectTimeout': timeout_text,
7 'httpx.ReadTimeout': timeout_text,
8 'httpx.WriteTimeout': timeout_text,
9 'httpx.HTTPStatusError': gettext('HTTP error'),
10 'httpx.ConnectError': gettext("HTTP connection error"),
11 'httpx.RemoteProtocolError': http_protocol_error_text,
12 'httpx.LocalProtocolError': http_protocol_error_text,
13 'httpx.ProtocolError': http_protocol_error_text,
14 'httpx.ReadError': network_error_text,
15 'httpx.WriteError': network_error_text,
16 'httpx.ProxyError': gettext("proxy error"),
17 'searx.exceptions.SearxEngineCaptchaException': gettext("CAPTCHA"),
18 'searx.exceptions.SearxEngineTooManyRequestsException': gettext("too many requests"),
19 'searx.exceptions.SearxEngineAccessDeniedException': gettext("access denied"),
20 'searx.exceptions.SearxEngineAPIException': gettext("server API error"),
21 'searx.exceptions.SearxEngineXPathException': parsing_error_text,
22 'KeyError': parsing_error_text,
23 'json.decoder.JSONDecodeError': parsing_error_text,
24 'lxml.etree.ParserError': parsing_error_text,
25 'ssl.SSLCertVerificationError': ssl_cert_error_text, # for Python > 3.7
26 'ssl.CertificateError': ssl_cert_error_text, # for Python 3.7
27}

Definition at line 40 of file webutils.py.

◆ http_protocol_error_text

searx.webutils.http_protocol_error_text = gettext('HTTP protocol error')

Definition at line 37 of file webutils.py.

◆ logger

searx.webutils.logger = logger.getChild('webutils')

Definition at line 33 of file webutils.py.

◆ network_error_text

searx.webutils.network_error_text = gettext('network error')

Definition at line 38 of file webutils.py.

◆ NO_SUBGROUPING

str searx.webutils.NO_SUBGROUPING = 'without further subgrouping'

Definition at line 318 of file webutils.py.

◆ parsing_error_text

searx.webutils.parsing_error_text = gettext('parsing error')

Definition at line 36 of file webutils.py.

◆ ssl_cert_error_text

searx.webutils.ssl_cert_error_text = gettext("SSL error: certificate validation has failed")

Definition at line 39 of file webutils.py.

◆ timeout_text

searx.webutils.timeout_text = gettext('timeout')

Definition at line 35 of file webutils.py.

◆ VALID_LANGUAGE_CODE

searx.webutils.VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')

Definition at line 31 of file webutils.py.