.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
client.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=missing-module-docstring, global-statement
3
4import asyncio
5import logging
6import random
7from ssl import SSLContext
8import threading
9from typing import Any, Dict
10
11import httpx
12from httpx_socks import AsyncProxyTransport
13from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError
14
15from searx import logger
16
17# Optional uvloop (support Python 3.6)
18try:
19 import uvloop
20except ImportError:
21 pass
22else:
23 uvloop.install()
24
25
26logger = logger.getChild('searx.network.client')
27LOOP = None
28SSLCONTEXTS: Dict[Any, SSLContext] = {}
29
30
31def shuffle_ciphers(ssl_context):
32 """Shuffle httpx's default ciphers of a SSL context randomly.
33
34 From `What Is TLS Fingerprint and How to Bypass It`_
35
36 > When implementing TLS fingerprinting, servers can't operate based on a
37 > locked-in whitelist database of fingerprints. New fingerprints appear
38 > when web clients or TLS libraries release new versions. So, they have to
39 > live off a blocklist database instead.
40 > ...
41 > It's safe to leave the first three as is but shuffle the remaining ciphers
42 > and you can bypass the TLS fingerprint check.
43
44 .. _What Is TLS Fingerprint and How to Bypass It:
45 https://www.zenrows.com/blog/what-is-tls-fingerprint#how-to-bypass-tls-fingerprinting
46
47 """
48 c_list = httpx._config.DEFAULT_CIPHERS.split(':') # pylint: disable=protected-access
49 sc_list, c_list = c_list[:3], c_list[3:]
50 random.shuffle(c_list)
51 ssl_context.set_ciphers(":".join(sc_list + c_list))
52
53
54def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False):
55 key = (proxy_url, cert, verify, trust_env, http2)
56 if key not in SSLCONTEXTS:
57 SSLCONTEXTS[key] = httpx.create_ssl_context(cert, verify, trust_env, http2)
58 shuffle_ciphers(SSLCONTEXTS[key])
59 return SSLCONTEXTS[key]
60
61
62class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):
63 """Block HTTP request
64
65 The constructor is blank because httpx.AsyncHTTPTransport.__init__ creates an SSLContext unconditionally:
66 https://github.com/encode/httpx/blob/0f61aa58d66680c239ce43c8cdd453e7dc532bfc/httpx/_transports/default.py#L271
67
68 Each SSLContext consumes more than 500kb of memory, since there is about one network per engine.
69
70 In consequence, this class overrides all public methods
71
72 For reference: https://github.com/encode/httpx/issues/2298
73 """
74
75 def __init__(self, *args, **kwargs):
76 # pylint: disable=super-init-not-called
77 # this on purpose if the base class is not called
78 pass
79
80 async def handle_async_request(self, request):
81 raise httpx.UnsupportedProtocol('HTTP protocol is disabled')
82
83 async def aclose(self) -> None:
84 pass
85
86 async def __aenter__(self):
87 return self
88
89 async def __aexit__(
90 self,
91 exc_type=None,
92 exc_value=None,
93 traceback=None,
94 ) -> None:
95 pass
96
97
98class AsyncProxyTransportFixed(AsyncProxyTransport):
99 """Fix httpx_socks.AsyncProxyTransport
100
101 Map python_socks exceptions to httpx.ProxyError exceptions
102 """
103
104 async def handle_async_request(self, request):
105 try:
106 return await super().handle_async_request(request)
107 except ProxyConnectionError as e:
108 raise httpx.ProxyError("ProxyConnectionError: " + e.strerror, request=request) from e
109 except ProxyTimeoutError as e:
110 raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e
111 except ProxyError as e:
112 raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
113
114
115def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries):
116 # support socks5h (requests compatibility):
117 # https://requests.readthedocs.io/en/master/user/advanced/#socks
118 # socks5:// hostname is resolved on client side
119 # socks5h:// hostname is resolved on proxy side
120 rdns = False
121 socks5h = 'socks5h://'
122 if proxy_url.startswith(socks5h):
123 proxy_url = 'socks5://' + proxy_url[len(socks5h) :]
124 rdns = True
125
126 proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
127 verify = get_sslcontexts(proxy_url, None, verify, True, http2) if verify is True else verify
129 proxy_type=proxy_type,
130 proxy_host=proxy_host,
131 proxy_port=proxy_port,
132 username=proxy_username,
133 password=proxy_password,
134 rdns=rdns,
135 loop=get_loop(),
136 verify=verify,
137 http2=http2,
138 local_address=local_address,
139 limits=limit,
140 retries=retries,
141 )
142
143
144def get_transport(verify, http2, local_address, proxy_url, limit, retries):
145 verify = get_sslcontexts(None, None, verify, True, http2) if verify is True else verify
146 return httpx.AsyncHTTPTransport(
147 # pylint: disable=protected-access
148 verify=verify,
149 http2=http2,
150 limits=limit,
151 proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,
152 local_address=local_address,
153 retries=retries,
154 )
155
156
158 # pylint: disable=too-many-arguments
159 enable_http,
160 verify,
161 enable_http2,
162 max_connections,
163 max_keepalive_connections,
164 keepalive_expiry,
165 proxies,
166 local_address,
167 retries,
168 max_redirects,
169 hook_log_response,
170):
171 limit = httpx.Limits(
172 max_connections=max_connections,
173 max_keepalive_connections=max_keepalive_connections,
174 keepalive_expiry=keepalive_expiry,
175 )
176 # See https://www.python-httpx.org/advanced/#routing
177 mounts = {}
178 for pattern, proxy_url in proxies.items():
179 if not enable_http and pattern.startswith('http://'):
180 continue
181 if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'):
182 mounts[pattern] = get_transport_for_socks_proxy(
183 verify, enable_http2, local_address, proxy_url, limit, retries
184 )
185 else:
186 mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries)
187
188 if not enable_http:
189 mounts['http://'] = AsyncHTTPTransportNoHttp()
190
191 transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
192
193 event_hooks = None
194 if hook_log_response:
195 event_hooks = {'response': [hook_log_response]}
196
197 return httpx.AsyncClient(
198 transport=transport,
199 mounts=mounts,
200 max_redirects=max_redirects,
201 event_hooks=event_hooks,
202 )
203
204
206 return LOOP
207
208
209def init():
210 # log
211 for logger_name in (
212 'httpx',
213 'httpcore.proxy',
214 'httpcore.connection',
215 'httpcore.http11',
216 'httpcore.http2',
217 'hpack.hpack',
218 'hpack.table',
219 ):
220 logging.getLogger(logger_name).setLevel(logging.WARNING)
221
222 # loop
223 def loop_thread():
224 global LOOP
225 LOOP = asyncio.new_event_loop()
226 LOOP.run_forever()
227
228 thread = threading.Thread(
229 target=loop_thread,
230 name='asyncio_loop',
231 daemon=True,
232 )
233 thread.start()
234
235
236init()
None __aexit__(self, exc_type=None, exc_value=None, traceback=None)
Definition client.py:94
new_client(enable_http, verify, enable_http2, max_connections, max_keepalive_connections, keepalive_expiry, proxies, local_address, retries, max_redirects, hook_log_response)
Definition client.py:170
get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries)
Definition client.py:115
get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False)
Definition client.py:54
shuffle_ciphers(ssl_context)
Definition client.py:31
get_transport(verify, http2, local_address, proxy_url, limit, retries)
Definition client.py:144