2"""Implementations for caching favicons.
4:py:obj:`FaviconCacheConfig`:
5 Configuration of the favicon cache
8 Abstract base class for the implementation of a favicon cache.
10:py:obj:`FaviconCacheSQLite`:
11 Favicon cache that manages the favicon BLOBs in a SQLite DB.
13:py:obj:`FaviconCacheNull`:
14 Fallback solution if the configured cache cannot be used for system reasons.
20from __future__
import annotations
21from typing
import Literal
35from searx
import sqlitedb
36from searx
import logger
37from searx.utils import humanize_bytes, humanize_number
40FALLBACK_ICON = b
"FALLBACK_ICON"
42logger = logger.getChild(
'favicons.cache')
48 """show state of the cache"""
49 print(CACHE.state().report())
54 """perform maintenance of the cache"""
55 root_log = logging.getLogger()
57 root_log.setLevel(logging.DEBUG)
59 root_log.handlers = []
60 handler = logging.StreamHandler()
61 handler.setFormatter(logging.Formatter(
"%(message)s"))
62 logger.addHandler(handler)
63 logger.setLevel(logging.DEBUG)
65 state_t0 = CACHE.state()
66 CACHE.maintenance(force=force)
67 state_t1 = CACHE.state()
68 state_delta = state_t0 - state_t1
69 print(
"The cache has been reduced by:")
70 print(state_delta.report(
"\n- {descr}: {val}").lstrip(
"\n"))
73def init(cfg:
"FaviconCacheConfig"):
74 """Initialization of a global ``CACHE``"""
77 if cfg.db_type ==
"sqlite":
78 if sqlite3.sqlite_version_info <= (3, 35):
80 "Disable favicon caching completely: SQLite library (%s) is too old! (require >= 3.35)",
81 sqlite3.sqlite_version,
86 elif cfg.db_type ==
"mem":
87 logger.error(
"Favicons are cached in memory, don't use this in production!")
90 raise NotImplementedError(f
"favicons db_type '{cfg.db_type}' is unknown")
94 """Configuration of the favicon cache."""
96 db_type: Literal[
"sqlite",
"mem"] =
"sqlite"
97 """Type of the database:
100 :py:obj:`.cache.FaviconCacheSQLite`
103 :py:obj:`.cache.FaviconCacheMEM` (not recommended)
106 db_url: str = tempfile.gettempdir() + os.sep +
"faviconcache.db"
107 """URL of the SQLite DB, the path to the database file."""
109 HOLD_TIME: int = 60 * 60 * 24 * 30
110 """Hold time (default in sec.), after which a BLOB is removed from the cache."""
112 LIMIT_TOTAL_BYTES: int = 1024 * 1024 * 50
113 """Maximum of bytes (default) stored in the cache of all blobs. Note: The
114 limit is only reached at each maintenance interval after which the oldest
115 BLOBs are deleted; the limit is exceeded during the maintenance period. If
116 the maintenance period is *too long* or maintenance is switched off
117 completely, the cache grows uncontrollably."""
119 BLOB_MAX_BYTES: int = 1024 * 20
120 """The maximum BLOB size in bytes that a favicon may have so that it can be
121 saved in the cache. If the favicon is larger, it is not saved in the cache
122 and must be requested by the client via the proxy."""
124 MAINTENANCE_PERIOD: int = 60 * 60
125 """Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
128 MAINTENANCE_MODE: Literal[
"auto",
"off"] =
"auto"
129 """Type of maintenance mode
132 Maintenance is carried out automatically as part of the maintenance
133 intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
136 Maintenance is switched off and must be carried out by an external process
141@dataclasses.dataclass
143 """Dataclass wich provides information on the status of the cache."""
145 favicons: int |
None =
None
146 bytes: int |
None =
None
147 domains: int |
None =
None
148 resolvers: int |
None =
None
151 (
"favicons",
"number of favicons in cache", humanize_number),
152 (
"bytes",
"total size (approx. bytes) of cache", humanize_bytes),
153 (
"domains",
"total number of domains in cache", humanize_number),
154 (
"resolvers",
"number of resolvers", str),
157 def __sub__(self, other) -> FaviconCacheStats:
159 raise TypeError(f
"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
162 self_val, other_val = getattr(self, field), getattr(other, field)
163 if None in (self_val, other_val):
165 if isinstance(self_val, int):
166 kwargs[field] = self_val - other_val
168 kwargs[field] = self_val
171 def report(self, fmt: str =
"{descr}: {val}\n"):
174 val = getattr(self, field)
179 s.append(fmt.format(descr=descr, val=val))
184 """Abstract base class for the implementation of a favicon cache."""
188 """An instance of the favicon cache is build up from the configuration."""
191 def __call__(self, resolver: str, authority: str) ->
None | tuple[
None | bytes,
None | str]:
192 """Returns ``None`` or the tuple of ``(data, mime)`` that has been
193 registered in the cache. The ``None`` indicates that there was no entry
197 def set(self, resolver: str, authority: str, mime: str |
None, data: bytes |
None) -> bool:
198 """Set data and mime-type in the cache. If data is None, the
199 :py:obj:`FALLBACK_ICON` is registered. in the cache."""
202 def state(self) -> FaviconCacheStats:
203 """Returns a :py:obj:`FaviconCacheStats` (key/values) with information
204 on the state of the cache."""
208 """Performs maintenance on the cache"""
211class FaviconCacheNull(FaviconCache):
212 """A dummy favicon cache that caches nothing / a fallback solution. The
213 NullCache is used when more efficient caches such as the
214 :py:obj:`FaviconCacheSQLite` cannot be used because, for example, the SQLite
215 library is only available in an old version and does not meet the
221 def __call__(self, resolver: str, authority: str) ->
None | tuple[
None | bytes,
None | str]:
224 def set(self, resolver: str, authority: str, mime: str |
None, data: bytes |
None) -> bool:
235 """Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB
236 model in the SQLite DB is implemented using the abstract class
237 :py:obj:`sqlitedb.SQLiteAppl`.
239 The following configurations are required / supported:
241 - :py:obj:`FaviconCacheConfig.db_url`
242 - :py:obj:`FaviconCacheConfig.HOLD_TIME`
243 - :py:obj:`FaviconCacheConfig.LIMIT_TOTAL_BYTES`
244 - :py:obj:`FaviconCacheConfig.BLOB_MAX_BYTES`
245 - :py:obj:`MAINTENANCE_PERIOD`
246 - :py:obj:`MAINTENANCE_MODE`
252CREATE TABLE IF NOT EXISTS blobs (
257 PRIMARY KEY (sha256))"""
259 """Table to store BLOB objects by their sha256 hash values."""
262CREATE TABLE IF NOT EXISTS blob_map (
263 m_time INTEGER DEFAULT (strftime('%s', 'now')), -- last modified (unix epoch) time in sec.
267 PRIMARY KEY (resolver, authority))"""
269 """Table to map from (resolver, authority) to sha256 hash values."""
271 DDL_CREATE_TABLES = {
273 "blob_map": DDL_BLOB_MAP,
276 SQL_DROP_LEFTOVER_BLOBS = (
277 "DELETE FROM blobs WHERE sha256 IN ("
280 " LEFT JOIN blob_map bm"
281 " ON b.sha256 = bm.sha256"
282 " WHERE bm.sha256 IS NULL)"
284 """Delete blobs.sha256 (BLOBs) no longer in blob_map.sha256."""
286 SQL_ITER_BLOBS_SHA256_BYTES_C = (
287 "SELECT b.sha256, b.bytes_c FROM blobs b"
289 " ON b.sha256 = bm.sha256"
290 " ORDER BY bm.m_time ASC"
294 "INSERT INTO blobs (sha256, bytes_c, mime, data) VALUES (?, ?, ?, ?)"
295 " ON CONFLICT (sha256) DO NOTHING"
298 SQL_INSERT_BLOB_MAP = (
299 "INSERT INTO blob_map (sha256, resolver, authority) VALUES (?, ?, ?)"
300 " ON CONFLICT DO UPDATE "
301 " SET sha256=excluded.sha256, m_time=strftime('%s', 'now')"
305 """An instance of the favicon cache is build up from the configuration."""
307 if cfg.db_url ==
":memory:":
308 logger.critical(
"don't use SQLite DB in :memory: in production!!")
312 def __call__(self, resolver: str, authority: str) ->
None | tuple[
None | bytes,
None | str]:
314 sql =
"SELECT sha256 FROM blob_map WHERE resolver = ? AND authority = ?"
315 res = self.
DB.execute(sql, (resolver, authority)).fetchone()
319 data, mime = (
None,
None)
321 if sha256 == FALLBACK_ICON:
324 sql =
"SELECT data, mime FROM blobs WHERE sha256 = ?"
325 res = self.
DB.execute(sql, (sha256,)).fetchone()
330 def set(self, resolver: str, authority: str, mime: str |
None, data: bytes |
None) -> bool:
336 if data
is not None and mime
is None:
338 "favicon resolver %s tries to cache mime-type None for authority %s",
344 bytes_c = len(data
or b
"")
345 if bytes_c > self.
cfg.BLOB_MAX_BYTES:
347 "favicon of resolver: %s / authority: %s to big to cache (bytes: %s) " % (resolver, authority, bytes_c)
352 sha256 = FALLBACK_ICON
354 sha256 = hashlib.sha256(data).hexdigest()
357 if sha256 != FALLBACK_ICON:
365 """Returns (unix epoch) time of the next maintenance."""
367 return self.
cfg.MAINTENANCE_PERIOD + self.
properties.m_time(
"LAST_MAINTENANCE")
375 logger.debug(
"no maintenance required yet, next maintenance interval is in the future")
385 f
"DELETE FROM blob_map"
386 f
" WHERE cast(m_time as integer) < cast(strftime('%s', 'now') as integer) - {self.cfg.HOLD_TIME}"
388 logger.debug(
"dropped %s obsolete blob_map items from db", res.rowcount)
390 logger.debug(
"dropped %s obsolete BLOBS from db", res.rowcount)
393 total_bytes = conn.execute(
"SELECT SUM(bytes_c) FROM blobs").fetchone()[0]
or 0
394 if total_bytes > self.
cfg.LIMIT_TOTAL_BYTES:
396 x = total_bytes - self.
cfg.LIMIT_TOTAL_BYTES
400 sha256, bytes_c = row
401 sha_list.append(sha256)
406 conn.execute(
"DELETE FROM blobs WHERE sha256 IN ('%s')" %
"','".join(sha_list))
407 conn.execute(
"DELETE FROM blob_map WHERE sha256 IN ('%s')" %
"','".join(sha_list))
408 logger.debug(
"dropped %s blobs with total size of %s bytes", len(sha_list), c)
411 val = self.
DB.execute(sql).fetchone()
418 def state(self) -> FaviconCacheStats:
420 favicons=self.
_query_val(
"SELECT count(*) FROM blobs", 0),
421 bytes=self.
_query_val(
"SELECT SUM(bytes_c) FROM blobs", 0),
422 domains=self.
_query_val(
"SELECT count(*) FROM (SELECT authority FROM blob_map GROUP BY authority)", 0),
423 resolvers=self.
_query_val(
"SELECT count(*) FROM (SELECT resolver FROM blob_map GROUP BY resolver)", 0),
428 """Favicon cache in process' memory. Its just a POC that stores the
429 favicons in the memory of the process.
433 Don't use it in production, it will blow up your memory!!
443 def __call__(self, resolver: str, authority: str) ->
None | tuple[bytes |
None, str |
None]:
445 sha, mime = self.
_sha_mime.get(f
"{resolver}:{authority}", (
None,
None))
448 data = self.
_data.get(sha)
449 if data == FALLBACK_ICON:
453 def set(self, resolver: str, authority: str, mime: str |
None, data: bytes |
None) -> bool:
461 "favicon resolver %s tries to cache mime-type None for authority %s",
467 digest = hashlib.sha256(data).hexdigest()
468 self.
_data[digest] = data
469 self.
_sha_mime[f
"{resolver}:{authority}"] = (digest, mime)
None|tuple[bytes|None, str|None] __call__(self, str resolver, str authority)
maintenance(self, force=False)
bool set(self, str resolver, str authority, str|None mime, bytes|None data)
None|tuple[None|bytes, None|str] __call__(self, str resolver, str authority)
maintenance(self, force=False)
__init__(self, FaviconCacheConfig cfg)
bool set(self, str resolver, str authority, str|None mime, bytes|None data)
bool set(self, str resolver, str authority, str|None mime, bytes|None data)
None|tuple[None|bytes, None|str] __call__(self, str resolver, str authority)
tuple SQL_DROP_LEFTOVER_BLOBS
maintenance(self, force=False)
int next_maintenance_time(self)
_query_val(self, sql, default=None)
__init__(self, FaviconCacheConfig cfg)
tuple SQL_INSERT_BLOB_MAP
tuple SQL_ITER_BLOBS_SHA256_BYTES_C
FaviconCacheStats __sub__(self, other)
report(self, str fmt="{descr}: {val}\n")
maintenance(self, force=False)
__init__(self, FaviconCacheConfig cfg)
None|tuple[None|bytes, None|str] __call__(self, str resolver, str authority)
bool set(self, str resolver, str authority, str|None mime, bytes|None data)
sqlite3.Connection connect(self)
sqlite3.Connection DB(self)
maintenance(bool force=True, bool debug=False)