.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
json_engine.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""The JSON engine is a *generic* engine with which it is possible to configure
3engines in the settings.
4
5.. todo::
6
7 - The JSON engine needs documentation!!
8
9 - The parameters of the JSON engine should be adapted to those of the XPath
10 engine.
11
12"""
13
14from collections.abc import Iterable
15from json import loads
16from urllib.parse import urlencode
17from searx.utils import to_string, html_to_text
18
19
20search_url = None
21url_query = None
22url_prefix = ""
23content_query = None
24title_query = None
25content_html_to_text = False
26title_html_to_text = False
27paging = False
28suggestion_query = ''
29results_query = ''
30
31cookies = {}
32headers = {}
33'''Some engines might offer different result based on cookies or headers.
34Possible use-case: To set safesearch cookie or header to moderate.'''
35
36# parameters for engines with paging support
37#
38# number of results on each page
39# (only needed if the site requires not a page number, but an offset)
40page_size = 1
41# number of the first page (usually 0 or 1)
42first_page_num = 1
43
44
45def iterate(iterable):
46 if isinstance(iterable, dict):
47 items = iterable.items()
48
49 else:
50 items = enumerate(iterable)
51 for index, value in items:
52 yield str(index), value
53
54
55def is_iterable(obj):
56 if isinstance(obj, str):
57 return False
58 return isinstance(obj, Iterable)
59
60
61def parse(query): # pylint: disable=redefined-outer-name
62 q = [] # pylint: disable=invalid-name
63 for part in query.split('/'):
64 if part == '':
65 continue
66 q.append(part)
67 return q
68
69
70def do_query(data, q): # pylint: disable=invalid-name
71 ret = []
72 if not q:
73 return ret
74
75 qkey = q[0]
76
77 for key, value in iterate(data):
78
79 if len(q) == 1:
80 if key == qkey:
81 ret.append(value)
82 elif is_iterable(value):
83 ret.extend(do_query(value, q))
84 else:
85 if not is_iterable(value):
86 continue
87 if key == qkey:
88 ret.extend(do_query(value, q[1:]))
89 else:
90 ret.extend(do_query(value, q))
91 return ret
92
93
94def query(data, query_string):
95 q = parse(query_string)
96
97 return do_query(data, q)
98
99
100def request(query, params): # pylint: disable=redefined-outer-name
101 query = urlencode({'q': query})[2:]
102
103 fp = {'query': query} # pylint: disable=invalid-name
104 if paging and search_url.find('{pageno}') >= 0:
105 fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num
106
107 params['cookies'].update(cookies)
108 params['headers'].update(headers)
109
110 params['url'] = search_url.format(**fp)
111 params['query'] = query
112
113 return params
114
115
116def identity(arg):
117 return arg
118
119
120def response(resp):
121 results = []
122 json = loads(resp.text)
123
124 title_filter = html_to_text if title_html_to_text else identity
125 content_filter = html_to_text if content_html_to_text else identity
126
127 if results_query:
128 rs = query(json, results_query) # pylint: disable=invalid-name
129 if not rs:
130 return results
131 for result in rs[0]:
132 try:
133 url = query(result, url_query)[0]
134 title = query(result, title_query)[0]
135 except: # pylint: disable=bare-except
136 continue
137 try:
138 content = query(result, content_query)[0]
139 except: # pylint: disable=bare-except
140 content = ""
141 results.append(
142 {
143 'url': url_prefix + to_string(url),
144 'title': title_filter(to_string(title)),
145 'content': content_filter(to_string(content)),
146 }
147 )
148 else:
149 for url, title, content in zip(query(json, url_query), query(json, title_query), query(json, content_query)):
150 results.append(
151 {
152 'url': url_prefix + to_string(url),
153 'title': title_filter(to_string(title)),
154 'content': content_filter(to_string(content)),
155 }
156 )
157
158 if not suggestion_query:
159 return results
160 for suggestion in query(json, suggestion_query):
161 results.append({'suggestion': suggestion})
162 return results