.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
client.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2# pylint: disable=missing-module-docstring, global-statement
3
4import typing as t
5from types import TracebackType
6
7import asyncio
8import logging
9import random
10from ssl import SSLContext
11import threading
12
13import httpx
14from httpx_socks import AsyncProxyTransport
15from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError
16
17from searx import logger
18
19CertTypes = str | tuple[str, str] | tuple[str, str, str]
20SslContextKeyType = tuple[str | None, CertTypes | None, bool, bool]
21
22logger = logger.getChild('searx.network.client')
23LOOP: asyncio.AbstractEventLoop = None # pyright: ignore[reportAssignmentType]
24
25SSLCONTEXTS: dict[SslContextKeyType, SSLContext] = {}
26
27
28def shuffle_ciphers(ssl_context: SSLContext):
29 """Shuffle httpx's default ciphers of a SSL context randomly.
30
31 From `What Is TLS Fingerprint and How to Bypass It`_
32
33 > When implementing TLS fingerprinting, servers can't operate based on a
34 > locked-in whitelist database of fingerprints. New fingerprints appear
35 > when web clients or TLS libraries release new versions. So, they have to
36 > live off a blocklist database instead.
37 > ...
38 > It's safe to leave the first three as is but shuffle the remaining ciphers
39 > and you can bypass the TLS fingerprint check.
40
41 .. _What Is TLS Fingerprint and How to Bypass It:
42 https://www.zenrows.com/blog/what-is-tls-fingerprint#how-to-bypass-tls-fingerprinting
43
44 """
45 c_list = [cipher["name"] for cipher in ssl_context.get_ciphers()]
46 sc_list, c_list = c_list[:3], c_list[3:]
47 random.shuffle(c_list)
48 ssl_context.set_ciphers(":".join(sc_list + c_list))
49
50
52 proxy_url: str | None = None, cert: CertTypes | None = None, verify: bool = True, trust_env: bool = True
53) -> SSLContext:
54 key: SslContextKeyType = (proxy_url, cert, verify, trust_env)
55 if key not in SSLCONTEXTS:
56 SSLCONTEXTS[key] = httpx.create_ssl_context(verify, cert, trust_env)
57 shuffle_ciphers(SSLCONTEXTS[key])
58 return SSLCONTEXTS[key]
59
60
61class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):
62 """Block HTTP request
63
64 The constructor is blank because httpx.AsyncHTTPTransport.__init__ creates an SSLContext unconditionally:
65 https://github.com/encode/httpx/blob/0f61aa58d66680c239ce43c8cdd453e7dc532bfc/httpx/_transports/default.py#L271
66
67 Each SSLContext consumes more than 500kb of memory, since there is about one network per engine.
68
69 In consequence, this class overrides all public methods
70
71 For reference: https://github.com/encode/httpx/issues/2298
72 """
73
74 def __init__(self, *args, **kwargs): # type: ignore
75 # pylint: disable=super-init-not-called
76 # this on purpose if the base class is not called
77 pass
78
79 async def handle_async_request(self, request: httpx.Request):
80 raise httpx.UnsupportedProtocol('HTTP protocol is disabled')
81
82 async def aclose(self) -> None:
83 pass
84
85 async def __aenter__(self):
86 return self
87
88 async def __aexit__(
89 self,
90 exc_type: type[BaseException] | None = None,
91 exc_value: BaseException | None = None,
92 traceback: TracebackType | None = None,
93 ) -> None:
94 pass
95
96
97class AsyncProxyTransportFixed(AsyncProxyTransport):
98 """Fix httpx_socks.AsyncProxyTransport
99
100 Map python_socks exceptions to httpx.ProxyError exceptions
101 """
102
103 async def handle_async_request(self, request: httpx.Request):
104 try:
105 return await super().handle_async_request(request)
106 except ProxyConnectionError as e:
107 raise httpx.ProxyError("ProxyConnectionError: " + str(e.strerror), request=request) from e
108 except ProxyTimeoutError as e:
109 raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e
110 except ProxyError as e:
111 raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from e
112
113
115 verify: bool, http2: bool, local_address: str, proxy_url: str, limit: httpx.Limits, retries: int
116):
117 # support socks5h (requests compatibility):
118 # https://requests.readthedocs.io/en/master/user/advanced/#socks
119 # socks5:// hostname is resolved on client side
120 # socks5h:// hostname is resolved on proxy side
121 rdns = False
122 socks5h = 'socks5h://'
123 if proxy_url.startswith(socks5h):
124 proxy_url = 'socks5://' + proxy_url[len(socks5h) :]
125 rdns = True
126
127 proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
128 _verify = get_sslcontexts(proxy_url, None, verify, True) if verify is True else verify
130 proxy_type=proxy_type,
131 proxy_host=proxy_host,
132 proxy_port=proxy_port,
133 username=proxy_username,
134 password=proxy_password,
135 rdns=rdns,
136 loop=get_loop(),
137 verify=_verify, # pyright: ignore[reportArgumentType]
138 http2=http2,
139 local_address=local_address,
140 limits=limit,
141 retries=retries,
142 )
143
144
146 verify: bool, http2: bool, local_address: str, proxy_url: str | None, limit: httpx.Limits, retries: int
147):
148 _verify = get_sslcontexts(None, None, verify, True) if verify is True else verify
149 return httpx.AsyncHTTPTransport(
150 # pylint: disable=protected-access
151 verify=_verify,
152 http2=http2,
153 limits=limit,
154 proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, # pyright: ignore[reportPrivateUsage]
155 local_address=local_address,
156 retries=retries,
157 )
158
159
161 # pylint: disable=too-many-arguments
162 enable_http: bool,
163 verify: bool,
164 enable_http2: bool,
165 max_connections: int,
166 max_keepalive_connections: int,
167 keepalive_expiry: float,
168 proxies: dict[str, str],
169 local_address: str,
170 retries: int,
171 max_redirects: int,
172 hook_log_response: t.Callable[..., t.Any] | None,
173) -> httpx.AsyncClient:
174 limit = httpx.Limits(
175 max_connections=max_connections,
176 max_keepalive_connections=max_keepalive_connections,
177 keepalive_expiry=keepalive_expiry,
178 )
179 # See https://www.python-httpx.org/advanced/#routing
180 mounts = {}
181 mounts: None | (dict[str, t.Any | None]) = {}
182 for pattern, proxy_url in proxies.items():
183 if not enable_http and pattern.startswith('http://'):
184 continue
185 if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'):
186 mounts[pattern] = get_transport_for_socks_proxy(
187 verify, enable_http2, local_address, proxy_url, limit, retries
188 )
189 else:
190 mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries)
191
192 if not enable_http:
193 mounts['http://'] = AsyncHTTPTransportNoHttp()
194
195 transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
196
197 event_hooks = None
198 if hook_log_response:
199 event_hooks = {'response': [hook_log_response]}
200
201 return httpx.AsyncClient(
202 transport=transport,
203 mounts=mounts,
204 max_redirects=max_redirects,
205 event_hooks=event_hooks,
206 )
207
208
209def get_loop() -> asyncio.AbstractEventLoop:
210 return LOOP
211
212
213def init():
214 # log
215 for logger_name in (
216 'httpx',
217 'httpcore.proxy',
218 'httpcore.connection',
219 'httpcore.http11',
220 'httpcore.http2',
221 'hpack.hpack',
222 'hpack.table',
223 ):
224 logging.getLogger(logger_name).setLevel(logging.WARNING)
225
226 # loop
227 def loop_thread():
228 global LOOP
229 LOOP = asyncio.new_event_loop()
230 LOOP.run_forever()
231
232 thread = threading.Thread(
233 target=loop_thread,
234 name='asyncio_loop',
235 daemon=True,
236 )
237 thread.start()
238
239
240init()
handle_async_request(self, httpx.Request request)
Definition client.py:79
None __aexit__(self, type[BaseException]|None exc_type=None, BaseException|None exc_value=None, TracebackType|None traceback=None)
Definition client.py:93
handle_async_request(self, httpx.Request request)
Definition client.py:103
SSLContext get_sslcontexts(str|None proxy_url=None, CertTypes|None cert=None, bool verify=True, bool trust_env=True)
Definition client.py:53
shuffle_ciphers(SSLContext ssl_context)
Definition client.py:28
get_transport_for_socks_proxy(bool verify, bool http2, str local_address, str proxy_url, httpx.Limits limit, int retries)
Definition client.py:116
asyncio.AbstractEventLoop get_loop()
Definition client.py:209
get_transport(bool verify, bool http2, str local_address, str|None proxy_url, httpx.Limits limit, int retries)
Definition client.py:147
httpx.AsyncClient new_client(bool enable_http, bool verify, bool enable_http2, int max_connections, int max_keepalive_connections, float keepalive_expiry, dict[str, str] proxies, str local_address, int retries, int max_redirects, t.Callable[..., t.Any]|None hook_log_response)
Definition client.py:173