.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
webapp.py
Go to the documentation of this file.
1#!/usr/bin/env python
2# SPDX-License-Identifier: AGPL-3.0-or-later
3"""WebbApp
4
5"""
6# pylint: disable=use-dict-literal
7
8import hashlib
9import hmac
10import json
11import os
12import sys
13import base64
14
15from timeit import default_timer
16from html import escape
17from io import StringIO
18import typing
19from typing import List, Dict, Iterable
20
21import urllib
22import urllib.parse
23from urllib.parse import urlencode, urlparse, unquote
24
25import httpx
26
27from pygments import highlight
28from pygments.lexers import get_lexer_by_name
29from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module
30
31import flask
32
33from flask import (
34 Flask,
35 render_template,
36 url_for,
37 make_response,
38 redirect,
39 send_from_directory,
40)
41from flask.wrappers import Response
42from flask.json import jsonify
43
44from flask_babel import (
45 Babel,
46 gettext,
47 format_decimal,
48)
49
50from searx import (
51 logger,
52 get_setting,
53 settings,
54 searx_debug,
55)
56
57from searx import infopage
58from searx import limiter
59from searx.botdetection import link_token
60
61from searx.data import ENGINE_DESCRIPTIONS
62from searx.results import Timing
63from searx.settings_defaults import OUTPUT_FORMATS
64from searx.settings_loader import get_default_settings_path
65from searx.exceptions import SearxParameterException
66from searx.engines import (
67 DEFAULT_CATEGORY,
68 categories,
69 engines,
70 engine_shortcuts,
71)
72
73from searx import webutils
74from searx.webutils import (
75 highlight_content,
76 get_static_files,
77 get_result_templates,
78 get_themes,
79 exception_classname_to_text,
80 new_hmac,
81 is_hmac_of,
82 is_flask_run_cmdline,
83 group_engines_in_tab,
84)
85from searx.webadapter import (
86 get_search_query_from_webapp,
87 get_selected_categories,
88 parse_lang,
89)
90from searx.utils import (
91 gen_useragent,
92 dict_subset,
93)
94from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
95from searx.query import RawTextQuery
96from searx.plugins import Plugin, plugins, initialize as plugin_initialize
97from searx.plugins.oa_doi_rewrite import get_doi_resolver
98from searx.preferences import (
99 Preferences,
100 ClientPref,
101 ValidationException,
102)
103from searx.answerers import (
104 answerers,
105 ask,
106)
107from searx.metrics import (
108 get_engines_stats,
109 get_engine_errors,
110 get_reliabilities,
111 histogram,
112 counter,
113)
114from searx.flaskfix import patch_application
115
116from searx.locales import (
117 LOCALE_NAMES,
118 RTL_LOCALES,
119 localeselector,
120 locales_initialize,
121 match_locale,
122)
123
124# renaming names from searx imports ...
125from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
126from searx.redisdb import initialize as redis_initialize
127from searx.sxng_locales import sxng_locales
128from searx.search import SearchWithPlugins, initialize as search_initialize
129from searx.network import stream as http_stream, set_context_network_name
130from searx.search.checker import get_result as checker_get_result
131
132logger = logger.getChild('webapp')
133
134# check secret_key
135if not searx_debug and settings['server']['secret_key'] == 'ultrasecretkey':
136 logger.error('server.secret_key is not changed. Please use something else instead of ultrasecretkey.')
137 sys.exit(1)
138
139# about static
140logger.debug('static directory is %s', settings['ui']['static_path'])
141static_files = get_static_files(settings['ui']['static_path'])
142
143# about templates
144logger.debug('templates directory is %s', settings['ui']['templates_path'])
145default_theme = settings['ui']['default_theme']
146templates_path = settings['ui']['templates_path']
147themes = get_themes(templates_path)
148result_templates = get_result_templates(templates_path)
149
150STATS_SORT_PARAMETERS = {
151 'name': (False, 'name', ''),
152 'score': (True, 'score_per_result', 0),
153 'result_count': (True, 'result_count', 0),
154 'time': (False, 'total', 0),
155 'reliability': (False, 'reliability', 100),
156}
157
158# Flask app
159app = Flask(__name__, static_folder=settings['ui']['static_path'], template_folder=templates_path)
160
161app.jinja_env.trim_blocks = True
162app.jinja_env.lstrip_blocks = True
163app.jinja_env.add_extension('jinja2.ext.loopcontrols') # pylint: disable=no-member
164app.jinja_env.filters['group_engines_in_tab'] = group_engines_in_tab # pylint: disable=no-member
165app.secret_key = settings['server']['secret_key']
166
167
168class ExtendedRequest(flask.Request):
169 """This class is never initialized and only used for type checking."""
170
171 preferences: Preferences
172 errors: List[str]
173 user_plugins: List[Plugin]
174 form: Dict[str, str]
175 start_time: float
176 render_time: float
177 timings: List[Timing]
178
179
180request = typing.cast(ExtendedRequest, flask.request)
181
182
184 locale = localeselector()
185 logger.debug("%s uses locale `%s`", urllib.parse.quote(request.url), locale)
186 return locale
187
188
189babel = Babel(app, locale_selector=get_locale)
190
191
192def _get_browser_language(req, lang_list):
193 client = ClientPref.from_http_request(req)
194 locale = match_locale(client.locale_tag, lang_list, fallback='en')
195 return locale
196
197
199 """Get locale name for <html lang="...">
200 Chrom* browsers don't detect the language when there is a subtag (ie a territory).
201 For example "zh-TW" is detected but not "zh-Hant-TW".
202 This function returns a locale without the subtag.
203 """
204 parts = locale.split('-')
205 return parts[0].lower() + '-' + parts[-1].upper()
206
207
208# code-highlighter
209@app.template_filter('code_highlighter')
210def code_highlighter(codelines, language=None):
211 if not language:
212 language = 'text'
213
214 try:
215 # find lexer by programming language
216 lexer = get_lexer_by_name(language, stripall=True)
217
218 except Exception as e: # pylint: disable=broad-except
219 logger.warning("pygments lexer: %s " % e)
220 # if lexer is not found, using default one
221 lexer = get_lexer_by_name('text', stripall=True)
222
223 html_code = ''
224 tmp_code = ''
225 last_line = None
226 line_code_start = None
227
228 # parse lines
229 for line, code in codelines:
230 if not last_line:
231 line_code_start = line
232
233 # new codeblock is detected
234 if last_line is not None and last_line + 1 != line:
235
236 # highlight last codepart
237 formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight")
238 html_code = html_code + highlight(tmp_code, lexer, formatter)
239
240 # reset conditions for next codepart
241 tmp_code = ''
242 line_code_start = line
243
244 # add codepart
245 tmp_code += code + '\n'
246
247 # update line
248 last_line = line
249
250 # highlight last codepart
251 formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight")
252 html_code = html_code + highlight(tmp_code, lexer, formatter)
253
254 return html_code
255
256
257def get_result_template(theme_name: str, template_name: str):
258 themed_path = theme_name + '/result_templates/' + template_name
259 if themed_path in result_templates:
260 return themed_path
261 return 'result_templates/' + template_name
262
263
264def custom_url_for(endpoint: str, **values):
265 suffix = ""
266 if endpoint == 'static' and values.get('filename'):
267 file_hash = static_files.get(values['filename'])
268 if not file_hash:
269 # try file in the current theme
270 theme_name = request.preferences.get_value('theme')
271 filename_with_theme = "themes/{}/{}".format(theme_name, values['filename'])
272 file_hash = static_files.get(filename_with_theme)
273 if file_hash:
274 values['filename'] = filename_with_theme
275 if get_setting('ui.static_use_hash') and file_hash:
276 suffix = "?" + file_hash
277 if endpoint == 'info' and 'locale' not in values:
278 locale = request.preferences.get_value('locale')
279 if _INFO_PAGES.get_page(values['pagename'], locale) is None:
280 locale = _INFO_PAGES.locale_default
281 values['locale'] = locale
282 return url_for(endpoint, **values) + suffix
283
284
285def morty_proxify(url: str):
286 if url.startswith('//'):
287 url = 'https:' + url
288
289 if not settings['result_proxy']['url']:
290 return url
291
292 url_params = dict(mortyurl=url)
293
294 if settings['result_proxy']['key']:
295 url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'], url.encode(), hashlib.sha256).hexdigest()
296
297 return '{0}?{1}'.format(settings['result_proxy']['url'], urlencode(url_params))
298
299
300def image_proxify(url: str):
301
302 if url.startswith('//'):
303 url = 'https:' + url
304
305 if not request.preferences.get_value('image_proxy'):
306 return url
307
308 if url.startswith('data:image/'):
309 # 50 is an arbitrary number to get only the beginning of the image.
310 partial_base64 = url[len('data:image/') : 50].split(';')
311 if (
312 len(partial_base64) == 2
313 and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp']
314 and partial_base64[1].startswith('base64,')
315 ):
316 return url
317 return None
318
319 if settings['result_proxy']['url']:
320 return morty_proxify(url)
321
322 h = new_hmac(settings['server']['secret_key'], url.encode())
323
324 return '{0}?{1}'.format(url_for('image_proxy'), urlencode(dict(url=url.encode(), h=h)))
325
326
328 return {
329 # when there is autocompletion
330 'no_item_found': gettext('No item found'),
331 # /preferences: the source of the engine description (wikipedata, wikidata, website)
332 'Source': gettext('Source'),
333 # infinite scroll
334 'error_loading_next_page': gettext('Error loading the next page'),
335 }
336
337
338def get_enabled_categories(category_names: Iterable[str]):
339 """The categories in ``category_names```for which there is no active engine
340 are filtered out and a reduced list is returned."""
341
342 enabled_engines = [item[0] for item in request.preferences.engines.get_enabled()]
343 enabled_categories = set()
344 for engine_name in enabled_engines:
345 enabled_categories.update(engines[engine_name].categories)
346 return [x for x in category_names if x in enabled_categories]
347
348
349def get_pretty_url(parsed_url: urllib.parse.ParseResult):
350 path = parsed_url.path
351 path = path[:-1] if len(path) > 0 and path[-1] == '/' else path
352 path = unquote(path.replace("/", " › "))
353 return [parsed_url.scheme + "://" + parsed_url.netloc, path]
354
355
357 req_pref = request.preferences
358 return {
359 'autocomplete_provider': req_pref.get_value('autocomplete'),
360 'autocomplete_min': get_setting('search.autocomplete_min'),
361 'http_method': req_pref.get_value('method'),
362 'infinite_scroll': req_pref.get_value('infinite_scroll'),
363 'translations': get_translations(),
364 'search_on_category_select': req_pref.get_value('search_on_category_select'),
365 'hotkeys': req_pref.get_value('hotkeys'),
366 'theme_static_path': custom_url_for('static', filename='themes/simple'),
367 }
368
369
370def render(template_name: str, **kwargs):
371 # pylint: disable=too-many-statements
372 kwargs['client_settings'] = str(
373 base64.b64encode(
374 bytes(
375 json.dumps(get_client_settings()),
376 encoding='utf-8',
377 )
378 ),
379 encoding='utf-8',
380 )
381
382 # values from the HTTP requests
383 kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
384 kwargs['cookies'] = request.cookies
385 kwargs['errors'] = request.errors
386 kwargs['link_token'] = link_token.get_token()
387
388 # values from the preferences
389 kwargs['preferences'] = request.preferences
390 kwargs['autocomplete'] = request.preferences.get_value('autocomplete')
391 kwargs['infinite_scroll'] = request.preferences.get_value('infinite_scroll')
392 kwargs['search_on_category_select'] = request.preferences.get_value('search_on_category_select')
393 kwargs['hotkeys'] = request.preferences.get_value('hotkeys')
394 kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
395 kwargs['advanced_search'] = request.preferences.get_value('advanced_search')
396 kwargs['query_in_title'] = request.preferences.get_value('query_in_title')
397 kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
398 kwargs['theme'] = request.preferences.get_value('theme')
399 kwargs['method'] = request.preferences.get_value('method')
400 kwargs['categories_as_tabs'] = list(settings['categories_as_tabs'].keys())
401 kwargs['categories'] = get_enabled_categories(settings['categories_as_tabs'].keys())
402 kwargs['DEFAULT_CATEGORY'] = DEFAULT_CATEGORY
403
404 # i18n
405 kwargs['sxng_locales'] = [l for l in sxng_locales if l[0] in settings['search']['languages']]
406
407 locale = request.preferences.get_value('locale')
408 kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale)
409
410 if locale in RTL_LOCALES and 'rtl' not in kwargs:
411 kwargs['rtl'] = True
412
413 if 'current_language' not in kwargs:
414 kwargs['current_language'] = parse_lang(request.preferences, {}, RawTextQuery('', []))
415
416 # values from settings
417 kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
418 kwargs['instance_name'] = get_setting('general.instance_name')
419 kwargs['searx_version'] = VERSION_STRING
420 kwargs['searx_git_url'] = GIT_URL
421 kwargs['enable_metrics'] = get_setting('general.enable_metrics')
422 kwargs['get_setting'] = get_setting
423 kwargs['get_pretty_url'] = get_pretty_url
424
425 # values from settings: donation_url
426 donation_url = get_setting('general.donation_url')
427 if donation_url is True:
428 donation_url = custom_url_for('info', pagename='donate')
429 kwargs['donation_url'] = donation_url
430
431 # helpers to create links to other pages
432 kwargs['url_for'] = custom_url_for # override url_for function in templates
433 kwargs['image_proxify'] = image_proxify
434 kwargs['proxify'] = morty_proxify if settings['result_proxy']['url'] is not None else None
435 kwargs['proxify_results'] = settings['result_proxy']['proxify_results']
436 kwargs['cache_url'] = settings['ui']['cache_url']
437 kwargs['get_result_template'] = get_result_template
438 kwargs['doi_resolver'] = get_doi_resolver(request.preferences)
439 kwargs['opensearch_url'] = (
440 url_for('opensearch')
441 + '?'
442 + urlencode(
443 {
444 'method': request.preferences.get_value('method'),
445 'autocomplete': request.preferences.get_value('autocomplete'),
446 }
447 )
448 )
449 kwargs['urlparse'] = urlparse
450
451 # scripts from plugins
452 kwargs['scripts'] = set()
453 for plugin in request.user_plugins:
454 for script in plugin.js_dependencies:
455 kwargs['scripts'].add(script)
456
457 # styles from plugins
458 kwargs['styles'] = set()
459 for plugin in request.user_plugins:
460 for css in plugin.css_dependencies:
461 kwargs['styles'].add(css)
462
463 start_time = default_timer()
464 result = render_template('{}/{}'.format(kwargs['theme'], template_name), **kwargs)
465 request.render_time += default_timer() - start_time # pylint: disable=assigning-non-slot
466
467 return result
468
469
470@app.before_request
472 request.start_time = default_timer() # pylint: disable=assigning-non-slot
473 request.render_time = 0 # pylint: disable=assigning-non-slot
474 request.timings = [] # pylint: disable=assigning-non-slot
475 request.errors = [] # pylint: disable=assigning-non-slot
476
477 client_pref = ClientPref.from_http_request(request)
478 # pylint: disable=redefined-outer-name
479 preferences = Preferences(themes, list(categories.keys()), engines, plugins, client_pref)
480
481 user_agent = request.headers.get('User-Agent', '').lower()
482 if 'webkit' in user_agent and 'android' in user_agent:
483 preferences.key_value_settings['method'].value = 'GET'
484 request.preferences = preferences # pylint: disable=assigning-non-slot
485
486 try:
487 preferences.parse_dict(request.cookies)
488
489 except Exception as e: # pylint: disable=broad-except
490 logger.exception(e, exc_info=True)
491 request.errors.append(gettext('Invalid settings, please edit your preferences'))
492
493 # merge GET, POST vars
494 # request.form
495 request.form = dict(request.form.items()) # pylint: disable=assigning-non-slot
496 for k, v in request.args.items():
497 if k not in request.form:
498 request.form[k] = v
499
500 if request.form.get('preferences'):
501 preferences.parse_encoded_data(request.form['preferences'])
502 else:
503 try:
504 preferences.parse_dict(request.form)
505 except Exception as e: # pylint: disable=broad-except
506 logger.exception(e, exc_info=True)
507 request.errors.append(gettext('Invalid settings'))
508
509 # language is defined neither in settings nor in preferences
510 # use browser headers
511 if not preferences.get_value("language"):
512 language = _get_browser_language(request, settings['search']['languages'])
513 preferences.parse_dict({"language": language})
514 logger.debug('set language %s (from browser)', preferences.get_value("language"))
515
516 # locale is defined neither in settings nor in preferences
517 # use browser headers
518 if not preferences.get_value("locale"):
519 locale = _get_browser_language(request, LOCALE_NAMES.keys())
520 preferences.parse_dict({"locale": locale})
521 logger.debug('set locale %s (from browser)', preferences.get_value("locale"))
522
523 # request.user_plugins
524 request.user_plugins = [] # pylint: disable=assigning-non-slot
525 allowed_plugins = preferences.plugins.get_enabled()
526 disabled_plugins = preferences.plugins.get_disabled()
527 for plugin in plugins:
528 if (plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins:
529 request.user_plugins.append(plugin)
530
531
532@app.after_request
533def add_default_headers(response: flask.Response):
534 # set default http headers
535 for header, value in settings['server']['default_http_headers'].items():
536 if header in response.headers:
537 continue
538 response.headers[header] = value
539 return response
540
541
542@app.after_request
543def post_request(response: flask.Response):
544 total_time = default_timer() - request.start_time
545 timings_all = [
546 'total;dur=' + str(round(total_time * 1000, 3)),
547 'render;dur=' + str(round(request.render_time * 1000, 3)),
548 ]
549 if len(request.timings) > 0:
550 timings = sorted(request.timings, key=lambda t: t.total)
551 timings_total = [
552 'total_' + str(i) + '_' + t.engine + ';dur=' + str(round(t.total * 1000, 3)) for i, t in enumerate(timings)
553 ]
554 timings_load = [
555 'load_' + str(i) + '_' + t.engine + ';dur=' + str(round(t.load * 1000, 3))
556 for i, t in enumerate(timings)
557 if t.load
558 ]
559 timings_all = timings_all + timings_total + timings_load
560 response.headers.add('Server-Timing', ', '.join(timings_all))
561 return response
562
563
564def index_error(output_format: str, error_message: str):
565 if output_format == 'json':
566 return Response(json.dumps({'error': error_message}), mimetype='application/json')
567 if output_format == 'csv':
568 response = Response('', mimetype='application/csv')
569 cont_disp = 'attachment;Filename=searx.csv'
570 response.headers.add('Content-Disposition', cont_disp)
571 return response
572
573 if output_format == 'rss':
574 response_rss = render(
575 'opensearch_response_rss.xml',
576 results=[],
577 q=request.form['q'] if 'q' in request.form else '',
578 number_of_results=0,
579 error_message=error_message,
580 )
581 return Response(response_rss, mimetype='text/xml')
582
583 # html
584 request.errors.append(gettext('search error'))
585 return render(
586 # fmt: off
587 'index.html',
588 selected_categories=get_selected_categories(request.preferences, request.form),
589 # fmt: on
590 )
591
592
593@app.route('/', methods=['GET', 'POST'])
594def index():
595 """Render index page."""
596
597 # redirect to search if there's a query in the request
598 if request.form.get('q'):
599 query = ('?' + request.query_string.decode()) if request.query_string else ''
600 return redirect(url_for('search') + query, 308)
601
602 return render(
603 # fmt: off
604 'index.html',
605 selected_categories=get_selected_categories(request.preferences, request.form),
606 current_locale = request.preferences.get_value("locale"),
607 # fmt: on
608 )
609
610
611@app.route('/healthz', methods=['GET'])
612def health():
613 return Response('OK', mimetype='text/plain')
614
615
616@app.route('/client<token>.css', methods=['GET', 'POST'])
617def client_token(token=None):
618 link_token.ping(request, token)
619 return Response('', mimetype='text/css')
620
621
622@app.route('/search', methods=['GET', 'POST'])
623def search():
624 """Search query in q and return results.
625
626 Supported outputs: html, json, csv, rss.
627 """
628 # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches
629 # pylint: disable=too-many-statements
630
631 # output_format
632 output_format = request.form.get('format', 'html')
633 if output_format not in OUTPUT_FORMATS:
634 output_format = 'html'
635
636 if output_format not in settings['search']['formats']:
637 flask.abort(403)
638
639 # check if there is query (not None and not an empty string)
640 if not request.form.get('q'):
641 if output_format == 'html':
642 return render(
643 # fmt: off
644 'index.html',
645 selected_categories=get_selected_categories(request.preferences, request.form),
646 # fmt: on
647 )
648 return index_error(output_format, 'No query'), 400
649
650 # search
651 search_query = None
652 raw_text_query = None
653 result_container = None
654 try:
655 search_query, raw_text_query, _, _, selected_locale = get_search_query_from_webapp(
656 request.preferences, request.form
657 )
658 search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name
659 result_container = search.search()
660
661 except SearxParameterException as e:
662 logger.exception('search error: SearxParameterException')
663 return index_error(output_format, e.message), 400
664 except Exception as e: # pylint: disable=broad-except
665 logger.exception(e, exc_info=True)
666 return index_error(output_format, gettext('search error')), 500
667
668 # 1. check if the result is a redirect for an external bang
669 if result_container.redirect_url:
670 return redirect(result_container.redirect_url)
671
672 # 2. add Server-Timing header for measuring performance characteristics of
673 # web applications
674 request.timings = result_container.get_timings() # pylint: disable=assigning-non-slot
675
676 # 3. formats without a template
677
678 if output_format == 'json':
679
680 response = webutils.get_json_response(search_query, result_container)
681 return Response(response, mimetype='application/json')
682
683 if output_format == 'csv':
684
685 csv = webutils.CSVWriter(StringIO())
686 webutils.write_csv_response(csv, result_container)
687 csv.stream.seek(0)
688
689 response = Response(csv.stream.read(), mimetype='application/csv')
690 cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
691 response.headers.add('Content-Disposition', cont_disp)
692 return response
693
694 # 4. formats rendered by a template / RSS & HTML
695
696 current_template = None
697 previous_result = None
698
699 results = result_container.get_ordered_results()
700
701 if search_query.redirect_to_first_result and results:
702 return redirect(results[0]['url'], 302)
703
704 for result in results:
705 if output_format == 'html':
706 if 'content' in result and result['content']:
707 result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
708 if 'title' in result and result['title']:
709 result['title'] = highlight_content(escape(result['title'] or ''), search_query.query)
710
711 if 'url' in result:
712 result['pretty_url'] = webutils.prettify_url(result['url'])
713 if result.get('publishedDate'): # do not try to get a date from an empty string or a None type
714 try: # test if publishedDate >= 1900 (datetime module bug)
715 result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
716 except ValueError:
717 result['publishedDate'] = None
718 else:
719 result['publishedDate'] = webutils.searxng_l10n_timespan(result['publishedDate'])
720
721 # set result['open_group'] = True when the template changes from the previous result
722 # set result['close_group'] = True when the template changes on the next result
723 if current_template != result.get('template'):
724 result['open_group'] = True
725 if previous_result:
726 previous_result['close_group'] = True # pylint: disable=unsupported-assignment-operation
727 current_template = result.get('template')
728 previous_result = result
729
730 if previous_result:
731 previous_result['close_group'] = True
732
733 # 4.a RSS
734
735 if output_format == 'rss':
736 response_rss = render(
737 'opensearch_response_rss.xml',
738 results=results,
739 answers=result_container.answers,
740 corrections=result_container.corrections,
741 suggestions=result_container.suggestions,
742 q=request.form['q'],
743 number_of_results=result_container.number_of_results,
744 )
745 return Response(response_rss, mimetype='text/xml')
746
747 # 4.b HTML
748
749 # suggestions: use RawTextQuery to get the suggestion URLs with the same bang
750 suggestion_urls = list(
751 map(
752 lambda suggestion: {'url': raw_text_query.changeQuery(suggestion).getFullQuery(), 'title': suggestion},
753 result_container.suggestions,
754 )
755 )
756
757 correction_urls = list(
758 map(
759 lambda correction: {'url': raw_text_query.changeQuery(correction).getFullQuery(), 'title': correction},
760 result_container.corrections,
761 )
762 )
763
764 # search_query.lang contains the user choice (all, auto, en, ...)
765 # when the user choice is "auto", search.search_query.lang contains the detected language
766 # otherwise it is equals to search_query.lang
767 return render(
768 # fmt: off
769 'results.html',
770 results = results,
771 q=request.form['q'],
772 selected_categories = search_query.categories,
773 pageno = search_query.pageno,
774 time_range = search_query.time_range or '',
775 number_of_results = format_decimal(result_container.number_of_results),
776 suggestions = suggestion_urls,
777 answers = result_container.answers,
778 corrections = correction_urls,
779 infoboxes = result_container.infoboxes,
780 engine_data = result_container.engine_data,
781 paging = result_container.paging,
782 unresponsive_engines = webutils.get_translated_errors(
783 result_container.unresponsive_engines
784 ),
785 current_locale = request.preferences.get_value("locale"),
786 current_language = selected_locale,
787 search_language = match_locale(
788 search.search_query.lang,
789 settings['search']['languages'],
790 fallback=request.preferences.get_value("language")
791 ),
792 timeout_limit = request.form.get('timeout_limit', None)
793 # fmt: on
794 )
795
796
797@app.route('/about', methods=['GET'])
798def about():
799 """Redirect to about page"""
800 # custom_url_for is going to add the locale
801 return redirect(custom_url_for('info', pagename='about'))
802
803
804@app.route('/info/<locale>/<pagename>', methods=['GET'])
805def info(pagename, locale):
806 """Render page of online user documentation"""
807 page = _INFO_PAGES.get_page(pagename, locale)
808 if page is None:
809 flask.abort(404)
810
811 user_locale = request.preferences.get_value('locale')
812 return render(
813 'info.html',
814 all_pages=_INFO_PAGES.iter_pages(user_locale, fallback_to_default=True),
815 active_page=page,
816 active_pagename=pagename,
817 )
818
819
820@app.route('/autocompleter', methods=['GET', 'POST'])
822 """Return autocompleter results"""
823
824 # run autocompleter
825 results = []
826
827 # set blocked engines
828 disabled_engines = request.preferences.engines.get_disabled()
829
830 # parse query
831 raw_text_query = RawTextQuery(request.form.get('q', ''), disabled_engines)
832 sug_prefix = raw_text_query.getQuery()
833
834 # normal autocompletion results only appear if no inner results returned
835 # and there is a query part
836 if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
837
838 # get SearXNG's locale and autocomplete backend from cookie
839 sxng_locale = request.preferences.get_value('language')
840 backend_name = request.preferences.get_value('autocomplete')
841
842 for result in search_autocomplete(backend_name, sug_prefix, sxng_locale):
843 # attention: this loop will change raw_text_query object and this is
844 # the reason why the sug_prefix was stored before (see above)
845 if result != sug_prefix:
846 results.append(raw_text_query.changeQuery(result).getFullQuery())
847
848 if len(raw_text_query.autocomplete_list) > 0:
849 for autocomplete_text in raw_text_query.autocomplete_list:
850 results.append(raw_text_query.get_autocomplete_full_query(autocomplete_text))
851
852 for answers in ask(raw_text_query):
853 for answer in answers:
854 results.append(str(answer['answer']))
855
856 if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
857 # the suggestion request comes from the searx search form
858 suggestions = json.dumps(results)
859 mimetype = 'application/json'
860 else:
861 # the suggestion request comes from browser's URL bar
862 suggestions = json.dumps([sug_prefix, results])
863 mimetype = 'application/x-suggestions+json'
864
865 suggestions = escape(suggestions, False)
866 return Response(suggestions, mimetype=mimetype)
867
868
869@app.route('/preferences', methods=['GET', 'POST'])
871 """Render preferences page && save user preferences"""
872
873 # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches
874 # pylint: disable=too-many-statements
875
876 # save preferences using the link the /preferences?preferences=...
877 if request.args.get('preferences') or request.form.get('preferences'):
878 resp = make_response(redirect(url_for('index', _external=True)))
879 return request.preferences.save(resp)
880
881 # save preferences
882 if request.method == 'POST':
883 resp = make_response(redirect(url_for('index', _external=True)))
884 try:
885 request.preferences.parse_form(request.form)
886 except ValidationException:
887 request.errors.append(gettext('Invalid settings, please edit your preferences'))
888 return resp
889 return request.preferences.save(resp)
890
891 # render preferences
892 image_proxy = request.preferences.get_value('image_proxy') # pylint: disable=redefined-outer-name
893 disabled_engines = request.preferences.engines.get_disabled()
894 allowed_plugins = request.preferences.plugins.get_enabled()
895
896 # stats for preferences page
897 filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items()))
898
899 engines_by_category = {}
900
901 for c in categories: # pylint: disable=consider-using-dict-items
902 engines_by_category[c] = [e for e in categories[c] if e.name in filtered_engines]
903 # sort the engines alphabetically since the order in settings.yml is meaningless.
904 list.sort(engines_by_category[c], key=lambda e: e.name)
905
906 # get first element [0], the engine time,
907 # and then the second element [1] : the time (the first one is the label)
908 stats = {} # pylint: disable=redefined-outer-name
909 max_rate95 = 0
910 for _, e in filtered_engines.items():
911 h = histogram('engine', e.name, 'time', 'total')
912 median = round(h.percentage(50), 1) if h.count > 0 else None
913 rate80 = round(h.percentage(80), 1) if h.count > 0 else None
914 rate95 = round(h.percentage(95), 1) if h.count > 0 else None
915
916 max_rate95 = max(max_rate95, rate95 or 0)
917
918 result_count_sum = histogram('engine', e.name, 'result', 'count').sum
919 successful_count = counter('engine', e.name, 'search', 'count', 'successful')
920 result_count = int(result_count_sum / float(successful_count)) if successful_count else 0
921
922 stats[e.name] = {
923 'time': median,
924 'rate80': rate80,
925 'rate95': rate95,
926 'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
927 'supports_selected_language': e.traits.is_locale_supported(
928 str(request.preferences.get_value('language') or 'all')
929 ),
930 'result_count': result_count,
931 }
932 # end of stats
933
934 # reliabilities
935 reliabilities = {}
936 engine_errors = get_engine_errors(filtered_engines)
937 checker_results = checker_get_result()
938 checker_results = (
939 checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
940 )
941 for _, e in filtered_engines.items():
942 checker_result = checker_results.get(e.name, {})
943 checker_success = checker_result.get('success', True)
944 errors = engine_errors.get(e.name) or []
945 if counter('engine', e.name, 'search', 'count', 'sent') == 0:
946 # no request
947 reliability = None
948 elif checker_success and not errors:
949 reliability = 100
950 elif 'simple' in checker_result.get('errors', {}):
951 # the basic (simple) test doesn't work: the engine is broken according to the checker
952 # even if there is no exception
953 reliability = 0
954 else:
955 # pylint: disable=consider-using-generator
956 reliability = 100 - sum([error['percentage'] for error in errors if not error.get('secondary')])
957
958 reliabilities[e.name] = {
959 'reliability': reliability,
960 'errors': [],
961 'checker': checker_results.get(e.name, {}).get('errors', {}).keys(),
962 }
963 # keep the order of the list checker_results[e.name]['errors'] and deduplicate.
964 # the first element has the highest percentage rate.
965 reliabilities_errors = []
966 for error in errors:
967 error_user_text = None
968 if error.get('secondary') or 'exception_classname' not in error:
969 continue
970 error_user_text = exception_classname_to_text.get(error.get('exception_classname'))
971 if not error:
972 error_user_text = exception_classname_to_text[None]
973 if error_user_text not in reliabilities_errors:
974 reliabilities_errors.append(error_user_text)
975 reliabilities[e.name]['errors'] = reliabilities_errors
976
977 # supports
978 supports = {}
979 for _, e in filtered_engines.items():
980 supports_selected_language = e.traits.is_locale_supported(
981 str(request.preferences.get_value('language') or 'all')
982 )
983 safesearch = e.safesearch
984 time_range_support = e.time_range_support
985 for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
986 if supports_selected_language and checker_test_name.startswith('lang_'):
987 supports_selected_language = '?'
988 elif safesearch and checker_test_name == 'safesearch':
989 safesearch = '?'
990 elif time_range_support and checker_test_name == 'time_range':
991 time_range_support = '?'
992 supports[e.name] = {
993 'supports_selected_language': supports_selected_language,
994 'safesearch': safesearch,
995 'time_range_support': time_range_support,
996 }
997
998 return render(
999 # fmt: off
1000 'preferences.html',
1001 selected_categories = get_selected_categories(request.preferences, request.form),
1002 locales = LOCALE_NAMES,
1003 current_locale = request.preferences.get_value("locale"),
1004 image_proxy = image_proxy,
1005 engines_by_category = engines_by_category,
1006 stats = stats,
1007 max_rate95 = max_rate95,
1008 reliabilities = reliabilities,
1009 supports = supports,
1010 answerers = [
1011 {'info': a.self_info(), 'keywords': a.keywords}
1012 for a in answerers
1013 ],
1014 disabled_engines = disabled_engines,
1015 autocomplete_backends = autocomplete_backends,
1016 shortcuts = {y: x for x, y in engine_shortcuts.items()},
1017 themes = themes,
1018 plugins = plugins,
1019 doi_resolvers = settings['doi_resolvers'],
1020 current_doi_resolver = get_doi_resolver(request.preferences),
1021 allowed_plugins = allowed_plugins,
1022 preferences_url_params = request.preferences.get_as_url_params(),
1023 locked_preferences = settings['preferences']['lock'],
1024 preferences = True
1025 # fmt: on
1026 )
1027
1028
1029@app.route('/image_proxy', methods=['GET'])
1031 # pylint: disable=too-many-return-statements, too-many-branches
1032
1033 url = request.args.get('url')
1034 if not url:
1035 return '', 400
1036
1037 if not is_hmac_of(settings['server']['secret_key'], url.encode(), request.args.get('h', '')):
1038 return '', 400
1039
1040 maximum_size = 5 * 1024 * 1024
1041 forward_resp = False
1042 resp = None
1043 try:
1044 request_headers = {
1045 'User-Agent': gen_useragent(),
1046 'Accept': 'image/webp,*/*',
1047 'Accept-Encoding': 'gzip, deflate',
1048 'Sec-GPC': '1',
1049 'DNT': '1',
1050 }
1051 set_context_network_name('image_proxy')
1052 resp, stream = http_stream(method='GET', url=url, headers=request_headers, allow_redirects=True)
1053 content_length = resp.headers.get('Content-Length')
1054 if content_length and content_length.isdigit() and int(content_length) > maximum_size:
1055 return 'Max size', 400
1056
1057 if resp.status_code != 200:
1058 logger.debug('image-proxy: wrong response code: %i', resp.status_code)
1059 if resp.status_code >= 400:
1060 return '', resp.status_code
1061 return '', 400
1062
1063 if not resp.headers.get('Content-Type', '').startswith('image/') and not resp.headers.get(
1064 'Content-Type', ''
1065 ).startswith('binary/octet-stream'):
1066 logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', ''))
1067 return '', 400
1068
1069 forward_resp = True
1070 except httpx.HTTPError:
1071 logger.exception('HTTP error')
1072 return '', 400
1073 finally:
1074 if resp and not forward_resp:
1075 # the code is about to return an HTTP 400 error to the browser
1076 # we make sure to close the response between searxng and the HTTP server
1077 try:
1078 resp.close()
1079 except httpx.HTTPError:
1080 logger.exception('HTTP error on closing')
1081
1082 def close_stream():
1083 nonlocal resp, stream
1084 try:
1085 if resp:
1086 resp.close()
1087 del resp
1088 del stream
1089 except httpx.HTTPError as e:
1090 logger.debug('Exception while closing response', e)
1091
1092 try:
1093 headers = dict_subset(resp.headers, {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'})
1094 response = Response(stream, mimetype=resp.headers['Content-Type'], headers=headers, direct_passthrough=True)
1095 response.call_on_close(close_stream)
1096 return response
1097 except httpx.HTTPError:
1098 close_stream()
1099 return '', 400
1100
1101
1102@app.route('/engine_descriptions.json', methods=['GET'])
1104 locale = get_locale().split('_')[0]
1105 result = ENGINE_DESCRIPTIONS['en'].copy()
1106 if locale != 'en':
1107 for engine, description in ENGINE_DESCRIPTIONS.get(locale, {}).items():
1108 result[engine] = description
1109 for engine, description in result.items():
1110 if len(description) == 2 and description[1] == 'ref':
1111 ref_engine, ref_lang = description[0].split(':')
1112 description = ENGINE_DESCRIPTIONS[ref_lang][ref_engine]
1113 if isinstance(description, str):
1114 description = [description, 'wikipedia']
1115 result[engine] = description
1116
1117 # overwrite by about:description (from settings)
1118 for engine_name, engine_mod in engines.items():
1119 descr = getattr(engine_mod, 'about', {}).get('description', None)
1120 if descr is not None:
1121 result[engine_name] = [descr, "SearXNG config"]
1122
1123 return jsonify(result)
1124
1125
1126@app.route('/stats', methods=['GET'])
1127def stats():
1128 """Render engine statistics page."""
1129 sort_order = request.args.get('sort', default='name', type=str)
1130 selected_engine_name = request.args.get('engine', default=None, type=str)
1131
1132 filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items()))
1133 if selected_engine_name:
1134 if selected_engine_name not in filtered_engines:
1135 selected_engine_name = None
1136 else:
1137 filtered_engines = [selected_engine_name]
1138
1139 checker_results = checker_get_result()
1140 checker_results = (
1141 checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
1142 )
1143
1144 engine_stats = get_engines_stats(filtered_engines)
1145 engine_reliabilities = get_reliabilities(filtered_engines, checker_results)
1146
1147 if sort_order not in STATS_SORT_PARAMETERS:
1148 sort_order = 'name'
1149
1150 reverse, key_name, default_value = STATS_SORT_PARAMETERS[sort_order]
1151
1152 def get_key(engine_stat):
1153 reliability = engine_reliabilities.get(engine_stat['name'], {}).get('reliability', 0)
1154 reliability_order = 0 if reliability else 1
1155 if key_name == 'reliability':
1156 key = reliability
1157 reliability_order = 0
1158 else:
1159 key = engine_stat.get(key_name) or default_value
1160 if reverse:
1161 reliability_order = 1 - reliability_order
1162 return (reliability_order, key, engine_stat['name'])
1163
1164 technical_report = []
1165 for error in engine_reliabilities.get(selected_engine_name, {}).get('errors', []):
1166 technical_report.append(
1167 f"\
1168 Error: {error['exception_classname'] or error['log_message']} \
1169 Parameters: {error['log_parameters']} \
1170 File name: {error['filename'] }:{ error['line_no'] } \
1171 Error Function: {error['function']} \
1172 Code: {error['code']} \
1173 ".replace(
1174 ' ' * 12, ''
1175 ).strip()
1176 )
1177 technical_report = ' '.join(technical_report)
1178
1179 engine_stats['time'] = sorted(engine_stats['time'], reverse=reverse, key=get_key)
1180 return render(
1181 # fmt: off
1182 'stats.html',
1183 sort_order = sort_order,
1184 engine_stats = engine_stats,
1185 engine_reliabilities = engine_reliabilities,
1186 selected_engine_name = selected_engine_name,
1187 searx_git_branch = GIT_BRANCH,
1188 technical_report = technical_report,
1189 # fmt: on
1190 )
1191
1192
1193@app.route('/stats/errors', methods=['GET'])
1195 filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items()))
1196 result = get_engine_errors(filtered_engines)
1197 return jsonify(result)
1198
1199
1200@app.route('/stats/checker', methods=['GET'])
1202 result = checker_get_result()
1203 return jsonify(result)
1204
1205
1206@app.route('/robots.txt', methods=['GET'])
1208 return Response(
1209 """User-agent: *
1210Allow: /info/en/about
1211Disallow: /stats
1212Disallow: /image_proxy
1213Disallow: /preferences
1214Disallow: /*?*q=*
1215""",
1216 mimetype='text/plain',
1217 )
1218
1219
1220@app.route('/opensearch.xml', methods=['GET'])
1222 method = request.preferences.get_value('method')
1223 autocomplete = request.preferences.get_value('autocomplete')
1224
1225 # chrome/chromium only supports HTTP GET....
1226 if request.headers.get('User-Agent', '').lower().find('webkit') >= 0:
1227 method = 'GET'
1228
1229 if method not in ('POST', 'GET'):
1230 method = 'POST'
1231
1232 ret = render('opensearch.xml', opensearch_method=method, autocomplete=autocomplete)
1233 resp = Response(response=ret, status=200, mimetype="application/opensearchdescription+xml")
1234 return resp
1235
1236
1237@app.route('/favicon.ico')
1239 theme = request.preferences.get_value("theme")
1240 return send_from_directory(
1241 os.path.join(app.root_path, settings['ui']['static_path'], 'themes', theme, 'img'), # pyright: ignore
1242 'favicon.png',
1243 mimetype='image/vnd.microsoft.icon',
1244 )
1245
1246
1247@app.route('/clear_cookies')
1249 resp = make_response(redirect(url_for('index', _external=True)))
1250 for cookie_name in request.cookies:
1251 resp.delete_cookie(cookie_name)
1252 return resp
1253
1254
1255@app.route('/config')
1257 """Return configuration in JSON format."""
1258 _engines = []
1259 for name, engine in engines.items():
1260 if not request.preferences.validate_token(engine):
1261 continue
1262
1263 _languages = engine.traits.languages.keys()
1264 _engines.append(
1265 {
1266 'name': name,
1267 'categories': engine.categories,
1268 'shortcut': engine.shortcut,
1269 'enabled': not engine.disabled,
1270 'paging': engine.paging,
1271 'language_support': engine.language_support,
1272 'languages': list(_languages),
1273 'regions': list(engine.traits.regions.keys()),
1274 'safesearch': engine.safesearch,
1275 'time_range_support': engine.time_range_support,
1276 'timeout': engine.timeout,
1277 }
1278 )
1279
1280 _plugins = []
1281 for _ in plugins:
1282 _plugins.append({'name': _.name, 'enabled': _.default_on})
1283
1284 _limiter_cfg = limiter.get_cfg()
1285
1286 return jsonify(
1287 {
1288 'categories': list(categories.keys()),
1289 'engines': _engines,
1290 'plugins': _plugins,
1291 'instance_name': settings['general']['instance_name'],
1292 'locales': LOCALE_NAMES,
1293 'default_locale': settings['ui']['default_locale'],
1294 'autocomplete': settings['search']['autocomplete'],
1295 'safe_search': settings['search']['safe_search'],
1296 'default_theme': settings['ui']['default_theme'],
1297 'version': VERSION_STRING,
1298 'brand': {
1299 'PRIVACYPOLICY_URL': get_setting('general.privacypolicy_url'),
1300 'CONTACT_URL': get_setting('general.contact_url'),
1301 'GIT_URL': GIT_URL,
1302 'GIT_BRANCH': GIT_BRANCH,
1303 'DOCS_URL': get_setting('brand.docs_url'),
1304 },
1305 'limiter': {
1306 'enabled': limiter.is_installed(),
1307 'botdetection.ip_limit.link_token': _limiter_cfg.get('botdetection.ip_limit.link_token'),
1308 'botdetection.ip_lists.pass_searxng_org': _limiter_cfg.get('botdetection.ip_lists.pass_searxng_org'),
1309 },
1310 'doi_resolvers': list(settings['doi_resolvers'].keys()),
1311 'default_doi_resolver': settings['default_doi_resolver'],
1312 'public_instance': settings['server']['public_instance'],
1313 }
1314 )
1315
1316
1317@app.errorhandler(404)
1319 return render('404.html'), 404
1320
1321
1322# see https://flask.palletsprojects.com/en/1.1.x/cli/
1323# True if "FLASK_APP=searx/webapp.py FLASK_ENV=development flask run"
1324flask_run_development = (
1325 os.environ.get("FLASK_APP") is not None and os.environ.get("FLASK_ENV") == 'development' and is_flask_run_cmdline()
1326)
1327
1328# True if reload feature is activated of werkzeug, False otherwise (including uwsgi, etc..)
1329# __name__ != "__main__" if searx.webapp is imported (make test, make docs, uwsgi...)
1330# see run() at the end of this file : searx_debug activates the reload feature.
1331werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__main__")
1332
1333# initialize the engines except on the first run of the werkzeug server.
1334if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"):
1335 locales_initialize()
1336 _INFO_PAGES = infopage.InfoPageSet()
1337 redis_initialize()
1338 plugin_initialize(app)
1339 search_initialize(enable_checker=True, check_network=True, enable_metrics=settings['general']['enable_metrics'])
1340 limiter.initialize(app, settings)
1341
1342
1343def run():
1344 logger.debug('starting webserver on %s:%s', settings['server']['bind_address'], settings['server']['port'])
1345 app.run(
1346 debug=searx_debug,
1347 use_debugger=searx_debug,
1348 port=settings['server']['port'],
1349 host=settings['server']['bind_address'],
1350 threaded=True,
1351 extra_files=[get_default_settings_path()],
1352 )
1353
1354
1355application = app
1356patch_application(app)
1357
1358if __name__ == "__main__":
1359 run()
::1337x
Definition 1337x.py:1
_get_browser_language(req, lang_list)
Definition webapp.py:192
engine_descriptions()
Definition webapp.py:1103
morty_proxify(str url)
Definition webapp.py:285
info(pagename, locale)
Definition webapp.py:805
autocompleter()
Definition webapp.py:821
index_error(str output_format, str error_message)
Definition webapp.py:564
image_proxify(str url)
Definition webapp.py:300
get_client_settings()
Definition webapp.py:356
client_token(token=None)
Definition webapp.py:617
code_highlighter(codelines, language=None)
Definition webapp.py:210
get_translations()
Definition webapp.py:327
_get_locale_rfc5646(locale)
Definition webapp.py:198
get_enabled_categories(Iterable[str] category_names)
Definition webapp.py:338
render(str template_name, **kwargs)
Definition webapp.py:370
get_result_template(str theme_name, str template_name)
Definition webapp.py:257
add_default_headers(flask.Response response)
Definition webapp.py:533
post_request(flask.Response response)
Definition webapp.py:543
get_pretty_url(urllib.parse.ParseResult parsed_url)
Definition webapp.py:349
page_not_found(_e)
Definition webapp.py:1318
custom_url_for(str endpoint, **values)
Definition webapp.py:264