.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
http_user_agent.py
Go to the documentation of this file.
1
# SPDX-License-Identifier: AGPL-3.0-or-later
2
"""
3
Method ``http_user_agent``
4
--------------------------
5
6
The ``http_user_agent`` method evaluates a request as the request of a bot if
7
the User-Agent_ header is unset or matches the regular expression
8
:py:obj:`USER_AGENT`.
9
10
.. _User-Agent:
11
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
12
13
"""
14
# pylint: disable=unused-argument
15
16
from
__future__
import
annotations
17
import
re
18
from
ipaddress
import
(
19
IPv4Network,
20
IPv6Network,
21
)
22
23
import
flask
24
import
werkzeug
25
26
from
.
import
config
27
from
._helpers
import
too_many_requests
28
29
30
USER_AGENT = (
31
r'('
32
+
r'unknown'
33
+
r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
34
+
r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
35
+
r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
36
+
r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
37
+
r'|ZmEu|BLEXBot|bitlybot|HeadlessChrome'
38
# unmaintained Farside instances
39
+
r'|'
40
+ re.escape(
r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)'
)
41
# other bots and client to block
42
+
'|.*PetalBot.*'
43
+
r')'
44
)
45
"""Regular expression that matches to User-Agent_ from known *bots*"""
46
47
_regexp =
None
48
49
50
def
regexp_user_agent
():
51
global
_regexp
# pylint: disable=global-statement
52
if
not
_regexp:
53
_regexp = re.compile(USER_AGENT)
54
return
_regexp
55
56
57
def
filter_request
(
58
network: IPv4Network | IPv6Network,
59
request: flask.Request,
60
cfg:
config.Config
,
61
) -> werkzeug.Response |
None
:
62
63
user_agent = request.headers.get(
'User-Agent'
,
'unknown'
)
64
if
regexp_user_agent
().match(user_agent):
65
return
too_many_requests(network, f
"bot detected, HTTP header User-Agent: {user_agent}"
)
66
return
None
searx.botdetection.config.Config
Definition
config.py:54
searx.botdetection.http_user_agent.filter_request
werkzeug.Response|None filter_request(IPv4Network|IPv6Network network, flask.Request request, config.Config cfg)
Definition
http_user_agent.py:61
searx.botdetection.http_user_agent.regexp_user_agent
regexp_user_agent()
Definition
http_user_agent.py:50
searxng
searx
botdetection
http_user_agent.py
Generated on Sat Nov 16 2024 00:10:57 for .oO SearXNG Developer Documentation Oo. by
1.12.0