.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
client.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=missing-module-docstring, global-statement
3
4import typing as t
5from types import TracebackType
6
7import asyncio
8import logging
9import random
10from ssl import SSLContext
11import threading
12
13import httpx
14from httpx_socks import AsyncProxyTransport
15from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError
16import uvloop
17
18from searx import logger
19
20
21uvloop.install()
22
23CertTypes = str | tuple[str, str] | tuple[str, str, str]
24SslContextKeyType = tuple[str | None, CertTypes | None, bool, bool]
25
26logger = logger.getChild('searx.network.client')
27LOOP: asyncio.AbstractEventLoop = None # pyright: ignore[reportAssignmentType]
28
29SSLCONTEXTS: dict[SslContextKeyType, SSLContext] = {}
30
31
32def shuffle_ciphers(ssl_context: SSLContext):
33 """Shuffle httpx's default ciphers of a SSL context randomly.
34
35 From `What Is TLS Fingerprint and How to Bypass It`_
36
37 > When implementing TLS fingerprinting, servers can't operate based on a
38 > locked-in whitelist database of fingerprints. New fingerprints appear
39 > when web clients or TLS libraries release new versions. So, they have to
40 > live off a blocklist database instead.
41 > ...
42 > It's safe to leave the first three as is but shuffle the remaining ciphers
43 > and you can bypass the TLS fingerprint check.
44
45 .. _What Is TLS Fingerprint and How to Bypass It:
46 https://www.zenrows.com/blog/what-is-tls-fingerprint#how-to-bypass-tls-fingerprinting
47
48 """
49 c_list = [cipher["name"] for cipher in ssl_context.get_ciphers()]
50 sc_list, c_list = c_list[:3], c_list[3:]
51 random.shuffle(c_list)
52 ssl_context.set_ciphers(":".join(sc_list + c_list))
53
54
56 proxy_url: str | None = None, cert: CertTypes | None = None, verify: bool = True, trust_env: bool = True
57) -> SSLContext:
58 key: SslContextKeyType = (proxy_url, cert, verify, trust_env)
59 if key not in SSLCONTEXTS:
60 SSLCONTEXTS[key] = httpx.create_ssl_context(verify, cert, trust_env)
61 shuffle_ciphers(SSLCONTEXTS[key])
62 return SSLCONTEXTS[key]
63
64
65class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):
66 """Block HTTP request
67
68 The constructor is blank because httpx.AsyncHTTPTransport.__init__ creates an SSLContext unconditionally:
69 https://github.com/encode/httpx/blob/0f61aa58d66680c239ce43c8cdd453e7dc532bfc/httpx/_transports/default.py#L271
70
71 Each SSLContext consumes more than 500kb of memory, since there is about one network per engine.
72
73 In consequence, this class overrides all public methods
74
75 For reference: https://github.com/encode/httpx/issues/2298
76 """
77
78 def __init__(self, *args, **kwargs): # type: ignore
79 # pylint: disable=super-init-not-called
80 # this on purpose if the base class is not called
81 pass
82
83 async def handle_async_request(self, request: httpx.Request):
84 raise httpx.UnsupportedProtocol('HTTP protocol is disabled')
85
86 async def aclose(self) -> None:
87 pass
88
89 async def __aenter__(self):
90 return self
91
92 async def __aexit__(
93 self,
94 exc_type: type[BaseException] | None = None,
95 exc_value: BaseException | None = None,
96 traceback: TracebackType | None = None,
97 ) -> None:
98 pass
99
100
101class AsyncProxyTransportFixed(AsyncProxyTransport):
102 """Fix httpx_socks.AsyncProxyTransport
103
104 Map python_socks exceptions to httpx.ProxyError exceptions
105 """
106
107 async def handle_async_request(self, request: httpx.Request):
108 try:
109 return await super().handle_async_request(request)
110 except ProxyConnectionError as e:
111 raise httpx.ProxyError("ProxyConnectionError: " + str(e.strerror), request=request) from e
112 except ProxyTimeoutError as e:
113 raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e
114 except ProxyError as e:
115 raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
116
117
119 verify: bool, http2: bool, local_address: str, proxy_url: str, limit: httpx.Limits, retries: int
120):
121 # support socks5h (requests compatibility):
122 # https://requests.readthedocs.io/en/master/user/advanced/#socks
123 # socks5:// hostname is resolved on client side
124 # socks5h:// hostname is resolved on proxy side
125 rdns = False
126 socks5h = 'socks5h://'
127 if proxy_url.startswith(socks5h):
128 proxy_url = 'socks5://' + proxy_url[len(socks5h) :]
129 rdns = True
130
131 proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
132 _verify = get_sslcontexts(proxy_url, None, verify, True) if verify is True else verify
134 proxy_type=proxy_type,
135 proxy_host=proxy_host,
136 proxy_port=proxy_port,
137 username=proxy_username,
138 password=proxy_password,
139 rdns=rdns,
140 loop=get_loop(),
141 verify=_verify,
142 http2=http2,
143 local_address=local_address,
144 limits=limit,
145 retries=retries,
146 )
147
148
150 verify: bool, http2: bool, local_address: str, proxy_url: str | None, limit: httpx.Limits, retries: int
151):
152 _verify = get_sslcontexts(None, None, verify, True) if verify is True else verify
153 return httpx.AsyncHTTPTransport(
154 # pylint: disable=protected-access
155 verify=_verify,
156 http2=http2,
157 limits=limit,
158 proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, # pyright: ignore[reportPrivateUsage]
159 local_address=local_address,
160 retries=retries,
161 )
162
163
165 # pylint: disable=too-many-arguments
166 enable_http: bool,
167 verify: bool,
168 enable_http2: bool,
169 max_connections: int,
170 max_keepalive_connections: int,
171 keepalive_expiry: float,
172 proxies: dict[str, str],
173 local_address: str,
174 retries: int,
175 max_redirects: int,
176 hook_log_response: t.Callable[..., t.Any] | None,
177) -> httpx.AsyncClient:
178 limit = httpx.Limits(
179 max_connections=max_connections,
180 max_keepalive_connections=max_keepalive_connections,
181 keepalive_expiry=keepalive_expiry,
182 )
183 # See https://www.python-httpx.org/advanced/#routing
184 mounts = {}
185 mounts: None | (dict[str, t.Any | None]) = {}
186 for pattern, proxy_url in proxies.items():
187 if not enable_http and pattern.startswith('http://'):
188 continue
189 if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'):
190 mounts[pattern] = get_transport_for_socks_proxy(
191 verify, enable_http2, local_address, proxy_url, limit, retries
192 )
193 else:
194 mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries)
195
196 if not enable_http:
197 mounts['http://'] = AsyncHTTPTransportNoHttp()
198
199 transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
200
201 event_hooks = None
202 if hook_log_response:
203 event_hooks = {'response': [hook_log_response]}
204
205 return httpx.AsyncClient(
206 transport=transport,
207 mounts=mounts,
208 max_redirects=max_redirects,
209 event_hooks=event_hooks,
210 )
211
212
213def get_loop() -> asyncio.AbstractEventLoop:
214 return LOOP
215
216
217def init():
218 # log
219 for logger_name in (
220 'httpx',
221 'httpcore.proxy',
222 'httpcore.connection',
223 'httpcore.http11',
224 'httpcore.http2',
225 'hpack.hpack',
226 'hpack.table',
227 ):
228 logging.getLogger(logger_name).setLevel(logging.WARNING)
229
230 # loop
231 def loop_thread():
232 global LOOP
233 LOOP = asyncio.new_event_loop()
234 LOOP.run_forever()
235
236 thread = threading.Thread(
237 target=loop_thread,
238 name='asyncio_loop',
239 daemon=True,
240 )
241 thread.start()
242
243
244init()
handle_async_request(self, httpx.Request request)
Definition client.py:83
None __aexit__(self, type[BaseException]|None exc_type=None, BaseException|None exc_value=None, TracebackType|None traceback=None)
Definition client.py:97
handle_async_request(self, httpx.Request request)
Definition client.py:107
SSLContext get_sslcontexts(str|None proxy_url=None, CertTypes|None cert=None, bool verify=True, bool trust_env=True)
Definition client.py:57
shuffle_ciphers(SSLContext ssl_context)
Definition client.py:32
get_transport_for_socks_proxy(bool verify, bool http2, str local_address, str proxy_url, httpx.Limits limit, int retries)
Definition client.py:120
asyncio.AbstractEventLoop get_loop()
Definition client.py:213
get_transport(bool verify, bool http2, str local_address, str|None proxy_url, httpx.Limits limit, int retries)
Definition client.py:151
httpx.AsyncClient new_client(bool enable_http, bool verify, bool enable_http2, int max_connections, int max_keepalive_connections, float keepalive_expiry, dict[str, str] proxies, str local_address, int retries, int max_redirects, t.Callable[..., t.Any]|None hook_log_response)
Definition client.py:177