2"""Implementations for caching favicons.
4:py:obj:`FaviconCacheConfig`:
5 Configuration of the favicon cache
8 Abstract base class for the implementation of a favicon cache.
10:py:obj:`FaviconCacheSQLite`:
11 Favicon cache that manages the favicon BLOBs in a SQLite DB.
13:py:obj:`FaviconCacheNull`:
14 Fallback solution if the configured cache cannot be used for system reasons.
20from __future__
import annotations
21from typing
import Literal
35from searx
import sqlitedb
36from searx
import logger
37from searx.utils import humanize_bytes, humanize_number
40FALLBACK_ICON = b
"FALLBACK_ICON"
42logger = logger.getChild(
'favicons.cache')
48 """show state of the cache"""
49 print(CACHE.state().report())
54 """perform maintenance of the cache"""
55 root_log = logging.getLogger()
57 root_log.setLevel(logging.DEBUG)
59 root_log.handlers = []
60 handler = logging.StreamHandler()
61 handler.setFormatter(logging.Formatter(
"%(message)s"))
62 logger.addHandler(handler)
63 logger.setLevel(logging.DEBUG)
65 state_t0 = CACHE.state()
66 CACHE.maintenance(force=force)
67 state_t1 = CACHE.state()
68 state_delta = state_t0 - state_t1
69 print(
"The cache has been reduced by:")
70 print(state_delta.report(
"\n- {descr}: {val}").lstrip(
"\n"))
73def init(cfg:
"FaviconCacheConfig"):
74 """Initialization of a global ``CACHE``"""
77 if cfg.db_type ==
"sqlite":
78 if sqlite3.sqlite_version_info <= (3, 35):
80 "Disable favicon caching completely: SQLite library (%s) is too old! (require >= 3.35)",
81 sqlite3.sqlite_version,
86 elif cfg.db_type ==
"mem":
87 logger.error(
"Favicons are cached in memory, don't use this in production!")
90 raise NotImplementedError(f
"favicons db_type '{cfg.db_type}' is unknown")
94 """Configuration of the favicon cache."""
96 db_type: Literal[
"sqlite",
"mem"] =
"sqlite"
97 """Type of the database:
100 :py:obj:`.cache.FaviconCacheSQLite`
103 :py:obj:`.cache.FaviconCacheMEM` (not recommended)
106 db_url: str = tempfile.gettempdir() + os.sep +
"faviconcache.db"
107 """URL of the SQLite DB, the path to the database file."""
109 HOLD_TIME: int = 60 * 60 * 24 * 30
110 """Hold time (default in sec.), after which a BLOB is removed from the cache."""
112 LIMIT_TOTAL_BYTES: int = 1024 * 1024 * 50
113 """Maximum of bytes (default) stored in the cache of all blobs. Note: The
114 limit is only reached at each maintenance interval after which the oldest
115 BLOBs are deleted; the limit is exceeded during the maintenance period. If
116 the maintenance period is *too long* or maintenance is switched off
117 completely, the cache grows uncontrollably."""
119 BLOB_MAX_BYTES: int = 1024 * 20
120 """The maximum BLOB size in bytes that a favicon may have so that it can be
121 saved in the cache. If the favicon is larger, it is not saved in the cache
122 and must be requested by the client via the proxy."""
124 MAINTENANCE_PERIOD: int = 60 * 60
125 """Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
128 MAINTENANCE_MODE: Literal[
"auto",
"off"] =
"auto"
129 """Type of maintenance mode
132 Maintenance is carried out automatically as part of the maintenance
133 intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
136 Maintenance is switched off and must be carried out by an external process
141@dataclasses.dataclass
143 """Dataclass wich provides information on the status of the cache."""
145 favicons: int |
None =
None
146 bytes: int |
None =
None
147 domains: int |
None =
None
148 resolvers: int |
None =
None
151 (
"favicons",
"number of favicons in cache", humanize_number),
152 (
"bytes",
"total size (approx. bytes) of cache", humanize_bytes),
153 (
"domains",
"total number of domains in cache", humanize_number),
154 (
"resolvers",
"number of resolvers", str),
157 def __sub__(self, other) -> FaviconCacheStats:
159 raise TypeError(f
"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
162 self_val, other_val = getattr(self, field), getattr(other, field)
163 if None in (self_val, other_val):
165 if isinstance(self_val, int):
166 kwargs[field] = self_val - other_val
168 kwargs[field] = self_val
171 def report(self, fmt: str =
"{descr}: {val}\n"):
174 val = getattr(self, field)
179 s.append(fmt.format(descr=descr, val=val))
184 """Abstract base class for the implementation of a favicon cache."""
188 """An instance of the favicon cache is build up from the configuration."""
191 def __call__(self, resolver: str, authority: str) ->
None | tuple[
None | bytes,
None | str]:
192 """Returns ``None`` or the tuple of ``(data, mime)`` that has been
193 registered in the cache. The ``None`` indicates that there was no entry
197 def set(self, resolver: str, authority: str, mime: str |
None, data: bytes |
None) -> bool:
198 """Set data and mime-type in the cache. If data is None, the
199 :py:obj:`FALLBACK_ICON` is registered. in the cache."""
202 def state(self) -> FaviconCacheStats:
203 """Returns a :py:obj:`FaviconCacheStats` (key/values) with information
204 on the state of the cache."""
208 """Performs maintenance on the cache"""
211class FaviconCacheNull(FaviconCache):
212 """A dummy favicon cache that caches nothing / a fallback solution. The
213 NullCache is used when more efficient caches such as the
214 :py:obj:`FaviconCacheSQLite` cannot be used because, for example, the SQLite
215 library is only available in an old version and does not meet the
221 def __call__(self, resolver: str, authority: str) ->
None | tuple[
None | bytes,
None | str]:
224 def set(self, resolver: str, authority: str, mime: str |
None, data: bytes |
None) -> bool:
235 """Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB
236 model in the SQLite DB is implemented using the abstract class
237 :py:obj:`sqlitedb.SQLiteAppl`.
239 For introspection of the DB, jump into developer environment and run command
240 to show cache state::
242 $ ./manage pyenv.cmd bash --norc --noprofile
243 (py3) python -m searx.favicons cache state
245 The following configurations are required / supported:
247 - :py:obj:`FaviconCacheConfig.db_url`
248 - :py:obj:`FaviconCacheConfig.HOLD_TIME`
249 - :py:obj:`FaviconCacheConfig.LIMIT_TOTAL_BYTES`
250 - :py:obj:`FaviconCacheConfig.BLOB_MAX_BYTES`
251 - :py:obj:`MAINTENANCE_PERIOD`
252 - :py:obj:`MAINTENANCE_MODE`
258CREATE TABLE IF NOT EXISTS blobs (
263 PRIMARY KEY (sha256))"""
265 """Table to store BLOB objects by their sha256 hash values."""
268CREATE TABLE IF NOT EXISTS blob_map (
269 m_time INTEGER DEFAULT (strftime('%s', 'now')), -- last modified (unix epoch) time in sec.
273 PRIMARY KEY (resolver, authority))"""
275 """Table to map from (resolver, authority) to sha256 hash values."""
277 DDL_CREATE_TABLES = {
279 "blob_map": DDL_BLOB_MAP,
282 SQL_DROP_LEFTOVER_BLOBS = (
283 "DELETE FROM blobs WHERE sha256 IN ("
286 " LEFT JOIN blob_map bm"
287 " ON b.sha256 = bm.sha256"
288 " WHERE bm.sha256 IS NULL)"
290 """Delete blobs.sha256 (BLOBs) no longer in blob_map.sha256."""
292 SQL_ITER_BLOBS_SHA256_BYTES_C = (
293 "SELECT b.sha256, b.bytes_c FROM blobs b"
295 " ON b.sha256 = bm.sha256"
296 " ORDER BY bm.m_time ASC"
300 "INSERT INTO blobs (sha256, bytes_c, mime, data) VALUES (?, ?, ?, ?)"
301 " ON CONFLICT (sha256) DO NOTHING"
304 SQL_INSERT_BLOB_MAP = (
305 "INSERT INTO blob_map (sha256, resolver, authority) VALUES (?, ?, ?)"
306 " ON CONFLICT DO UPDATE "
307 " SET sha256=excluded.sha256, m_time=strftime('%s', 'now')"
311 """An instance of the favicon cache is build up from the configuration."""
313 if cfg.db_url ==
":memory:":
314 logger.critical(
"don't use SQLite DB in :memory: in production!!")
318 def __call__(self, resolver: str, authority: str) ->
None | tuple[
None | bytes,
None | str]:
320 sql =
"SELECT sha256 FROM blob_map WHERE resolver = ? AND authority = ?"
321 res = self.
DB.execute(sql, (resolver, authority)).fetchone()
325 data, mime = (
None,
None)
327 if sha256 == FALLBACK_ICON:
330 sql =
"SELECT data, mime FROM blobs WHERE sha256 = ?"
331 res = self.
DB.execute(sql, (sha256,)).fetchone()
336 def set(self, resolver: str, authority: str, mime: str |
None, data: bytes |
None) -> bool:
342 if data
is not None and mime
is None:
344 "favicon resolver %s tries to cache mime-type None for authority %s",
350 bytes_c = len(data
or b
"")
351 if bytes_c > self.
cfg.BLOB_MAX_BYTES:
353 "favicon of resolver: %s / authority: %s to big to cache (bytes: %s) " % (resolver, authority, bytes_c)
358 sha256 = FALLBACK_ICON
360 sha256 = hashlib.sha256(data).hexdigest()
363 if sha256 != FALLBACK_ICON:
375 """Returns (unix epoch) time of the next maintenance."""
377 return self.
cfg.MAINTENANCE_PERIOD + self.
properties.m_time(
"LAST_MAINTENANCE")
385 logger.debug(
"no maintenance required yet, next maintenance interval is in the future")
396 f
"DELETE FROM blob_map"
397 f
" WHERE cast(m_time as integer) < cast(strftime('%s', 'now') as integer) - {self.cfg.HOLD_TIME}"
399 logger.debug(
"dropped %s obsolete blob_map items from db", res.rowcount)
401 logger.debug(
"dropped %s obsolete BLOBS from db", res.rowcount)
404 total_bytes = conn.execute(
"SELECT SUM(bytes_c) FROM blobs").fetchone()[0]
or 0
405 if total_bytes > self.
cfg.LIMIT_TOTAL_BYTES:
407 x = total_bytes - self.
cfg.LIMIT_TOTAL_BYTES
411 sha256, bytes_c = row
412 sha_list.append(sha256)
417 conn.execute(
"DELETE FROM blobs WHERE sha256 IN ('%s')" %
"','".join(sha_list))
418 conn.execute(
"DELETE FROM blob_map WHERE sha256 IN ('%s')" %
"','".join(sha_list))
419 logger.debug(
"dropped %s blobs with total size of %s bytes", len(sha_list), c)
424 conn.execute(
"PRAGMA wal_checkpoint(TRUNCATE)")
428 val = self.
DB.execute(sql).fetchone()
435 def state(self) -> FaviconCacheStats:
437 favicons=self.
_query_val(
"SELECT count(*) FROM blobs", 0),
438 bytes=self.
_query_val(
"SELECT SUM(bytes_c) FROM blobs", 0),
439 domains=self.
_query_val(
"SELECT count(*) FROM (SELECT authority FROM blob_map GROUP BY authority)", 0),
440 resolvers=self.
_query_val(
"SELECT count(*) FROM (SELECT resolver FROM blob_map GROUP BY resolver)", 0),
445 """Favicon cache in process' memory. Its just a POC that stores the
446 favicons in the memory of the process.
450 Don't use it in production, it will blow up your memory!!
460 def __call__(self, resolver: str, authority: str) ->
None | tuple[bytes |
None, str |
None]:
462 sha, mime = self.
_sha_mime.get(f
"{resolver}:{authority}", (
None,
None))
465 data = self.
_data.get(sha)
466 if data == FALLBACK_ICON:
470 def set(self, resolver: str, authority: str, mime: str |
None, data: bytes |
None) -> bool:
478 "favicon resolver %s tries to cache mime-type None for authority %s",
484 digest = hashlib.sha256(data).hexdigest()
485 self.
_data[digest] = data
486 self.
_sha_mime[f
"{resolver}:{authority}"] = (digest, mime)
None|tuple[bytes|None, str|None] __call__(self, str resolver, str authority)
maintenance(self, force=False)
bool set(self, str resolver, str authority, str|None mime, bytes|None data)
None|tuple[None|bytes, None|str] __call__(self, str resolver, str authority)
maintenance(self, force=False)
__init__(self, FaviconCacheConfig cfg)
bool set(self, str resolver, str authority, str|None mime, bytes|None data)
bool set(self, str resolver, str authority, str|None mime, bytes|None data)
None|tuple[None|bytes, None|str] __call__(self, str resolver, str authority)
tuple SQL_DROP_LEFTOVER_BLOBS
maintenance(self, force=False)
_query_val(self, sql, default=None)
__init__(self, FaviconCacheConfig cfg)
tuple SQL_INSERT_BLOB_MAP
tuple SQL_ITER_BLOBS_SHA256_BYTES_C
FaviconCacheStats __sub__(self, other)
report(self, str fmt="{descr}: {val}\n")
maintenance(self, force=False)
__init__(self, FaviconCacheConfig cfg)
None|tuple[None|bytes, None|str] __call__(self, str resolver, str authority)
bool set(self, str resolver, str authority, str|None mime, bytes|None data)
sqlite3.Connection connect(self)
sqlite3.Connection DB(self)
maintenance(bool force=True, bool debug=False)