.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
command.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""With *command engines* administrators can run engines to integrate arbitrary
3shell commands.
4
5.. attention::
6
7 When creating and enabling a ``command`` engine on a public instance, you
8 must be careful to avoid leaking private data.
9
10The easiest solution is to limit the access by setting ``tokens`` as described
11in section :ref:`private engines`. The engine base is flexible. Only your
12imagination can limit the power of this engine (and maybe security concerns).
13
14Configuration
15=============
16
17The following options are available:
18
19``command``:
20 A comma separated list of the elements of the command. A special token
21 ``{{QUERY}}`` tells where to put the search terms of the user. Example:
22
23 .. code:: yaml
24
25 ['ls', '-l', '-h', '{{QUERY}}']
26
27``delimiter``:
28 A mapping containing a delimiter ``char`` and the *titles* of each element in
29 ``keys``.
30
31``parse_regex``:
32 A dict containing the regular expressions for each result key.
33
34``query_type``:
35
36 The expected type of user search terms. Possible values: ``path`` and
37 ``enum``.
38
39 ``path``:
40 Checks if the user provided path is inside the working directory. If not,
41 the query is not executed.
42
43 ``enum``:
44 Is a list of allowed search terms. If the user submits something which is
45 not included in the list, the query returns an error.
46
47``query_enum``:
48 A list containing allowed search terms if ``query_type`` is set to ``enum``.
49
50``working_dir``:
51 The directory where the command has to be executed. Default: ``./``.
52
53``result_separator``:
54 The character that separates results. Default: ``\\n``.
55
56Example
57=======
58
59The example engine below can be used to find files with a specific name in the
60configured working directory:
61
62.. code:: yaml
63
64 - name: find
65 engine: command
66 command: ['find', '.', '-name', '{{QUERY}}']
67 query_type: path
68 shortcut: fnd
69 delimiter:
70 chars: ' '
71 keys: ['line']
72
73Implementations
74===============
75"""
76
77import re
78from os.path import expanduser, isabs, realpath, commonprefix
79from shlex import split as shlex_split
80from subprocess import Popen, PIPE
81from threading import Thread
82
83from searx import logger
84from searx.result_types import EngineResults
85
86
87engine_type = 'offline'
88paging = True
89command = []
90delimiter = {}
91parse_regex = {}
92query_type = ''
93query_enum = []
94environment_variables = {}
95working_dir = realpath('.')
96result_separator = '\n'
97timeout = 4.0
98
99_command_logger = logger.getChild('command')
100_compiled_parse_regex = {}
101
102
103def init(engine_settings):
104 check_parsing_options(engine_settings)
105
106 if 'command' not in engine_settings:
107 raise ValueError('engine command : missing configuration key: command')
108
109 global command, working_dir, delimiter, parse_regex, environment_variables # pylint: disable=global-statement
110
111 command = engine_settings['command']
112
113 if 'working_dir' in engine_settings:
114 working_dir = engine_settings['working_dir']
115 if not isabs(engine_settings['working_dir']):
116 working_dir = realpath(working_dir)
117
118 if 'parse_regex' in engine_settings:
119 parse_regex = engine_settings['parse_regex']
120 for result_key, regex in parse_regex.items():
121 _compiled_parse_regex[result_key] = re.compile(regex, flags=re.MULTILINE)
122 if 'delimiter' in engine_settings:
123 delimiter = engine_settings['delimiter']
124
125 if 'environment_variables' in engine_settings:
126 environment_variables = engine_settings['environment_variables']
127
128
129def search(query, params) -> EngineResults:
130 res = EngineResults()
131 cmd = _get_command_to_run(query)
132 if not cmd:
133 return res
134
135 reader_thread = Thread(target=_get_results_from_process, args=(res, cmd, params['pageno']))
136 reader_thread.start()
137 reader_thread.join(timeout=timeout)
138
139 return res
140
141
143 params = shlex_split(query)
145
146 cmd = []
147 for c in command:
148 if c == '{{QUERY}}':
149 cmd.extend(params)
150 else:
151 cmd.append(c)
152
153 return cmd
154
155
156def _get_results_from_process(res: EngineResults, cmd, pageno):
157 leftover = ''
158 count = 0
159 start, end = __get_results_limits(pageno)
160 with Popen(cmd, stdout=PIPE, stderr=PIPE, env=environment_variables) as process:
161 line = process.stdout.readline()
162 while line:
163 buf = leftover + line.decode('utf-8')
164 raw_results = buf.split(result_separator)
165 if raw_results[-1]:
166 leftover = raw_results[-1]
167 raw_results = raw_results[:-1]
168
169 for raw_result in raw_results:
170 result = __parse_single_result(raw_result)
171 if result is None:
172 _command_logger.debug('skipped result:', raw_result)
173 continue
174
175 if start <= count and count <= end: # pylint: disable=chained-comparison
176 res.add(res.types.KeyValue(kvmap=result))
177
178 count += 1
179 if end < count:
180 return res
181
182 line = process.stdout.readline()
183
184 return_code = process.wait(timeout=timeout)
185 if return_code != 0:
186 raise RuntimeError('non-zero return code when running command', cmd, return_code)
187 return None
188
189
191 start = (pageno - 1) * 10
192 end = start + 9
193 return start, end
194
195
197 if not query_type:
198 return
199
200 if query_type == 'path':
201 query_path = params[-1]
202 query_path = expanduser(query_path)
203 if commonprefix([realpath(query_path), working_dir]) != working_dir:
204 raise ValueError('requested path is outside of configured working directory')
205 elif query_type == 'enum' and len(query_enum) > 0:
206 for param in params:
207 if param not in query_enum:
208 raise ValueError('submitted query params is not allowed', param, 'allowed params:', query_enum)
209
210
211def check_parsing_options(engine_settings):
212 """Checks if delimiter based parsing or regex parsing is configured correctly"""
213
214 if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
215 raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
216 if 'delimiter' in engine_settings and 'parse_regex' in engine_settings:
217 raise ValueError('failed to init settings for parsing lines: too many settings')
218
219 if 'delimiter' in engine_settings:
220 if 'chars' not in engine_settings['delimiter'] or 'keys' not in engine_settings['delimiter']:
221 raise ValueError
222
223
224def __parse_single_result(raw_result):
225 """Parses command line output based on configuration"""
226
227 result = {}
228
229 if delimiter:
230 elements = raw_result.split(delimiter['chars'], maxsplit=len(delimiter['keys']) - 1)
231 if len(elements) != len(delimiter['keys']):
232 return {}
233 for i in range(len(elements)): # pylint: disable=consider-using-enumerate
234 result[delimiter['keys'][i]] = elements[i]
235
236 if parse_regex:
237 for result_key, regex in _compiled_parse_regex.items():
238 found = regex.search(raw_result)
239 if not found:
240 return {}
241 result[result_key] = raw_result[found.start() : found.end()]
242
243 return result
__parse_single_result(raw_result)
Definition command.py:224
__get_results_limits(pageno)
Definition command.py:190
check_parsing_options(engine_settings)
Definition command.py:211
_get_results_from_process(EngineResults res, cmd, pageno)
Definition command.py:156
_get_command_to_run(query)
Definition command.py:142
init(engine_settings)
Definition command.py:103
__check_query_params(params)
Definition command.py:196