.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
http_user_agent.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""
3Method ``http_user_agent``
4--------------------------
5
6The ``http_user_agent`` method evaluates a request as the request of a bot if
7the User-Agent_ header is unset or matches the regular expression
8:py:obj:`USER_AGENT`.
9
10.. _User-Agent:
11 https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
12
13"""
14# pylint: disable=unused-argument
15
16from __future__ import annotations
17import re
18from ipaddress import (
19 IPv4Network,
20 IPv6Network,
21)
22
23import flask
24import werkzeug
25
26from . import config
27from ._helpers import too_many_requests
28
29
30USER_AGENT = (
31 r'('
32 + r'unknown'
33 + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
34 + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
35 + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
36 + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
37 + r'|ZmEu|BLEXBot|bitlybot|HeadlessChrome'
38 # unmaintained Farside instances
39 + r'|'
40 + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
41 # other bots and client to block
42 + '|.*PetalBot.*'
43 + r')'
44)
45"""Regular expression that matches to User-Agent_ from known *bots*"""
46
47_regexp = None
48
49
51 global _regexp # pylint: disable=global-statement
52 if not _regexp:
53 _regexp = re.compile(USER_AGENT)
54 return _regexp
55
56
58 network: IPv4Network | IPv6Network,
59 request: flask.Request,
60 cfg: config.Config,
61) -> werkzeug.Response | None:
62
63 user_agent = request.headers.get('User-Agent', 'unknown')
64 if regexp_user_agent().match(user_agent):
65 return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")
66 return None
werkzeug.Response|None filter_request(IPv4Network|IPv6Network network, flask.Request request, config.Config cfg)