.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
cache.py
Go to the documentation of this file.
1"""Implementation of caching solutions.
2
3- :py:obj:`searx.cache.ExpireCache` and its :py:obj:`searx.cache.ExpireCacheCfg`
4
5----
6"""
7
8from __future__ import annotations
9
10__all__ = ["ExpireCacheCfg", "ExpireCacheStats", "ExpireCache", "ExpireCacheSQLite"]
11
12import abc
13from collections.abc import Iterator
14import dataclasses
15import datetime
16import hashlib
17import hmac
18import os
19import pickle
20import sqlite3
21import string
22import tempfile
23import time
24import typing
25
26import msgspec
27
28from searx import sqlitedb
29from searx import logger
30from searx import get_setting
31
32log = logger.getChild("cache")
33
34
35class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
36 """Configuration of a :py:obj:`ExpireCache` cache."""
37
38 name: str
39 """Name of the cache."""
40
41 db_url: str = ""
42 """URL of the SQLite DB, the path to the database file. If unset a default
43 DB will be created in `/tmp/sxng_cache_{self.name}.db`"""
44
45 MAX_VALUE_LEN: int = 1024 * 10
46 """Max length of a *serialized* value."""
47
48 MAXHOLD_TIME: int = 60 * 60 * 24 * 7 # 7 days
49 """Hold time (default in sec.), after which a value is removed from the cache."""
50
51 MAINTENANCE_PERIOD: int = 60 * 60 # 2h
52 """Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
53 ``auto``."""
54
55 MAINTENANCE_MODE: typing.Literal["auto", "off"] = "auto"
56 """Type of maintenance mode
57
58 ``auto``:
59 Maintenance is carried out automatically as part of the maintenance
60 intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
61
62 ``off``:
63 Maintenance is switched off and must be carried out by an external process
64 if required.
65 """
66
67 password: bytes = get_setting("server.secret_key").encode() # type: ignore
68 """Password used by :py:obj:`ExpireCache.secret_hash`.
69
70 The default password is taken from :ref:`secret_key <server.secret_key>`.
71 When the password is changed, the hashed keys in the cache can no longer be
72 used, which is why all values in the cache are deleted when the password is
73 changed.
74 """
75
76 def __post_init__(self):
77 # if db_url is unset, use a default DB in /tmp/sxng_cache_{name}.db
78 if not self.db_url:
79 self.db_url = tempfile.gettempdir() + os.sep + f"sxng_cache_{ExpireCache.normalize_name(self.name)}.db"
80
81
82@dataclasses.dataclass
84 """Dataclass which provides information on the status of the cache."""
85
86 cached_items: dict[str, list[tuple[str, typing.Any, int]]]
87 """Values in the cache mapped by context name.
88
89 .. code: python
90
91 {
92 "context name": [
93 ("foo key": "foo value", <expire>),
94 ("bar key": "bar value", <expire>),
95 # ...
96 ],
97 # ...
98 }
99 """
100
101 def report(self):
102 c_ctx = 0
103 c_kv = 0
104 lines = []
105
106 for ctx_name, kv_list in self.cached_items.items():
107 c_ctx += 1
108 if not kv_list:
109 lines.append(f"[{ctx_name:20s}] empty")
110 continue
111
112 for key, value, expire in kv_list:
113 valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S")
114 c_kv += 1
115 lines.append(f"[{ctx_name:20s}] {valid_until} {key:12}" f" --> ({type(value).__name__}) {value} ")
116
117 lines.append(f"Number of contexts: {c_ctx}")
118 lines.append(f"number of key/value pairs: {c_kv}")
119 return "\n".join(lines)
120
121
122class ExpireCache(abc.ABC):
123 """Abstract base class for the implementation of a key/value cache
124 with expire date."""
125
126 cfg: ExpireCacheCfg
127
128 hash_token = "hash_token"
129
130 @abc.abstractmethod
131 def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
132 """Set *key* to *value*. To set a timeout on key use argument
133 ``expire`` (in sec.). If expire is unset the default is taken from
134 :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`. After the timeout has expired,
135 the key will automatically be deleted.
136
137 The ``ctx`` argument specifies the context of the ``key``. A key is
138 only unique in its context.
139
140 The concrete implementations of this abstraction determine how the
141 context is mapped in the connected database. In SQL databases, for
142 example, the context is a DB table or in a Key/Value DB it could be
143 a prefix for the key.
144
145 If the context is not specified (the default is ``None``) then a
146 default context should be used, e.g. a default table for SQL databases
147 or a default prefix in a Key/Value DB.
148 """
149
150 @abc.abstractmethod
151 def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any:
152 """Return *value* of *key*. If key is unset, ``None`` is returned."""
153
154 @abc.abstractmethod
155 def maintenance(self, force: bool = False, truncate: bool = False) -> bool:
156 """Performs maintenance on the cache.
157
158 ``force``:
159 Maintenance should be carried out even if the maintenance interval has
160 not yet been reached.
161
162 ``truncate``:
163 Truncate the entire cache, which is necessary, for example, if the
164 password has changed.
165 """
166
167 @abc.abstractmethod
168 def state(self) -> ExpireCacheStats:
169 """Returns a :py:obj:`ExpireCacheStats`, which provides information
170 about the status of the cache."""
171
172 @staticmethod
173 def build_cache(cfg: ExpireCacheCfg) -> ExpireCache:
174 """Factory to build a caching instance.
175
176 .. note::
177
178 Currently, only the SQLite adapter is available, but other database
179 types could be implemented in the future, e.g. a Valkey (Redis)
180 adapter.
181 """
182 return ExpireCacheSQLite(cfg)
183
184 @staticmethod
185 def normalize_name(name: str) -> str:
186 """Returns a normalized name that can be used as a file name or as a SQL
187 table name (is used, for example, to normalize the context name)."""
188
189 _valid = "-_." + string.ascii_letters + string.digits
190 return "".join([c for c in name if c in _valid])
191
192 def serialize(self, value: typing.Any) -> bytes:
193 dump: bytes = pickle.dumps(value)
194 return dump
195
196 def deserialize(self, value: bytes) -> typing.Any:
197 obj = pickle.loads(value)
198 return obj
199
200 def secret_hash(self, name: str | bytes) -> str:
201 """Creates a hash of the argument ``name``. The hash value is formed
202 from the ``name`` combined with the :py:obj:`password
203 <ExpireCacheCfg.password>`. Can be used, for example, to make the
204 ``key`` stored in the DB unreadable for third parties."""
205
206 if isinstance(name, str):
207 name = bytes(name, encoding='utf-8')
208 m = hmac.new(name + self.cfg.password, digestmod='sha256')
209 return m.hexdigest()
210
211
213 """Cache that manages key/value pairs in a SQLite DB. The DB model in the
214 SQLite DB is implemented in abstract class :py:obj:`SQLiteAppl
215 <searx.sqlitedb.SQLiteAppl>`.
216
217 The following configurations are required / supported:
218
219 - :py:obj:`ExpireCacheCfg.db_url`
220 - :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`
221 - :py:obj:`ExpireCacheCfg.MAINTENANCE_PERIOD`
222 - :py:obj:`ExpireCacheCfg.MAINTENANCE_MODE`
223 """
224
225 DB_SCHEMA = 1
226
227 # The key/value tables will be created on demand by self.create_table
228 DDL_CREATE_TABLES = {}
229
230 CACHE_TABLE_PREFIX = "CACHE-TABLE"
231
232 def __init__(self, cfg: ExpireCacheCfg):
233 """An instance of the SQLite expire cache is build up from a
234 :py:obj:`config <ExpireCacheCfg>`."""
235
236 self.cfg = cfg
237 if cfg.db_url == ":memory:":
238 log.critical("don't use SQLite DB in :memory: in production!!")
239 super().__init__(cfg.db_url)
240
241 def init(self, conn: sqlite3.Connection) -> bool:
242 ret_val = super().init(conn)
243 if not ret_val:
244 return False
245
246 new = hashlib.sha256(self.cfg.password).hexdigest()
247 old = self.properties(self.hash_token)
248 if old != new:
249 if old is not None:
250 log.warning("[%s] hash token changed: truncate all cache tables", self.cfg.name)
251 self.maintenance(force=True, truncate=True)
252 self.properties.set(self.hash_token, new)
253
254 return True
255
256 def maintenance(self, force: bool = False, truncate: bool = False) -> bool:
257
258 if not force and int(time.time()) < self.next_maintenance_time:
259 # log.debug("no maintenance required yet, next maintenance interval is in the future")
260 return False
261
262 # Prevent parallel DB maintenance cycles from other DB connections
263 # (e.g. in multi thread or process environments).
264 self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property!
265
266 if truncate:
268 return True
269
270 # drop items by expire time stamp ..
271 expire = int(time.time())
272
273 with self.connect() as conn:
274 for table in self.table_names:
275 res = conn.execute(f"DELETE FROM {table} WHERE expire < ?", (expire,))
276 log.debug("deleted %s keys from table %s (expire date reached)", res.rowcount, table)
277
278 # Vacuuming the WALs
279 # https://www.theunterminatedstring.com/sqlite-vacuuming/
280
281 conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
282 conn.close()
283
284 return True
285
286 def create_table(self, table: str) -> bool:
287 """Create DB ``table`` if it has not yet been created, no recreates are
288 initiated if the table already exists.
289 """
290 if table in self.table_names:
291 # log.debug("key/value table %s exists in DB (no need to recreate)", table)
292 return False
293
294 log.info("key/value table '%s' NOT exists in DB -> create DB table ..", table)
295 sql_table = "\n".join(
296 [
297 f"CREATE TABLE IF NOT EXISTS {table} (",
298 " key TEXT,",
299 " value BLOB,",
300 f" expire INTEGER DEFAULT (strftime('%s', 'now') + {self.cfg.MAXHOLD_TIME}),",
301 "PRIMARY KEY (key))",
302 ]
303 )
304 sql_index = f"CREATE INDEX IF NOT EXISTS index_expire_{table} ON {table}(expire);"
305 with self.connect() as conn:
306 conn.execute(sql_table)
307 conn.execute(sql_index)
308 conn.close()
309
310 self.properties.set(f"{self.CACHE_TABLE_PREFIX}-{table}", table)
311 return True
312
313 @property
314 def table_names(self) -> list[str]:
315 """List of key/value tables already created in the DB."""
316 sql = f"SELECT value FROM properties WHERE name LIKE '{self.CACHE_TABLE_PREFIX}%%'"
317 rows = self.DB.execute(sql).fetchall() or []
318 return [r[0] for r in rows]
319
320 def truncate_tables(self, table_names: list[str]):
321 log.debug("truncate table: %s", ",".join(table_names))
322 with self.connect() as conn:
323 for table in table_names:
324 conn.execute(f"DELETE FROM {table}")
325 conn.close()
326 return True
327
328 @property
329 def next_maintenance_time(self) -> int:
330 """Returns (unix epoch) time of the next maintenance."""
331
332 return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE", int(time.time()))
333
334 # implement ABC methods of ExpireCache
335
336 def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
337 """Set key/value in DB table given by argument ``ctx``. If expire is
338 unset the default is taken from :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`.
339 If ``ctx`` argument is ``None`` (the default), a table name is
340 generated from the :py:obj:`ExpireCacheCfg.name`. If DB table does not
341 exists, it will be created (on demand) by :py:obj:`self.create_table
342 <ExpireCacheSQLite.create_table>`.
343 """
344 table = ctx
345 self.maintenance()
346
347 value = self.serialize(value=value)
348 if len(value) > self.cfg.MAX_VALUE_LEN:
349 log.warning("ExpireCache.set(): %s.key='%s' - value too big to cache (len: %s) ", table, value, len(value))
350 return False
351
352 if not expire:
353 expire = self.cfg.MAXHOLD_TIME
354 expire = int(time.time()) + expire
355
356 table_name = table
357 if not table_name:
358 table_name = self.normalize_name(self.cfg.name)
359 self.create_table(table_name)
360
361 sql = (
362 f"INSERT INTO {table_name} (key, value, expire) VALUES (?, ?, ?)"
363 f" ON CONFLICT DO "
364 f"UPDATE SET value=?, expire=?"
365 )
366
367 if table:
368 with self.DB:
369 self.DB.execute(sql, (key, value, expire, value, expire))
370 else:
371 with self.connect() as conn:
372 conn.execute(sql, (key, value, expire, value, expire))
373 conn.close()
374
375 return True
376
377 def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any:
378 """Get value of ``key`` from table given by argument ``ctx``. If
379 ``ctx`` argument is ``None`` (the default), a table name is generated
380 from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
381 table), the ``default`` value is returned.
382
383 """
384 table = ctx
385 self.maintenance()
386
387 if not table:
388 table = self.normalize_name(self.cfg.name)
389
390 if table not in self.table_names:
391 return default
392
393 sql = f"SELECT value FROM {table} WHERE key = ?"
394 row = self.DB.execute(sql, (key,)).fetchone()
395 if row is None:
396 return default
397
398 return self.deserialize(row[0])
399
400 def pairs(self, ctx: str) -> Iterator[tuple[str, typing.Any]]:
401 """Iterate over key/value pairs from table given by argument ``ctx``.
402 If ``ctx`` argument is ``None`` (the default), a table name is
403 generated from the :py:obj:`ExpireCacheCfg.name`."""
404 table = ctx
405 self.maintenance()
406
407 if not table:
408 table = self.normalize_name(self.cfg.name)
409
410 if table in self.table_names:
411 for row in self.DB.execute(f"SELECT key, value FROM {table}"):
412 yield row[0], self.deserialize(row[1])
413
414 def state(self) -> ExpireCacheStats:
415 cached_items = {}
416 for table in self.table_names:
417 cached_items[table] = []
418 for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
419 cached_items[table].append((row[0], self.deserialize(row[1]), row[2]))
420 return ExpireCacheStats(cached_items=cached_items)
truncate_tables(self, list[str] table_names)
Definition cache.py:320
bool create_table(self, str table)
Definition cache.py:286
bool init(self, sqlite3.Connection conn)
Definition cache.py:241
__init__(self, ExpireCacheCfg cfg)
Definition cache.py:232
typing.Any get(self, str key, default=None, str|None ctx=None)
Definition cache.py:377
bool maintenance(self, bool force=False, bool truncate=False)
Definition cache.py:256
bool set(self, str key, typing.Any value, int|None expire, str|None ctx=None)
Definition cache.py:336
ExpireCacheStats state(self)
Definition cache.py:414
Iterator[tuple[str, typing.Any]] pairs(self, str ctx)
Definition cache.py:400
bytes serialize(self, typing.Any value)
Definition cache.py:192
ExpireCache build_cache(ExpireCacheCfg cfg)
Definition cache.py:173
typing.Any get(self, str key, default=None, str|None ctx=None)
Definition cache.py:151
ExpireCacheCfg hash_token
Definition cache.py:128
str secret_hash(self, str|bytes name)
Definition cache.py:200
str normalize_name(str name)
Definition cache.py:185
ExpireCacheStats state(self)
Definition cache.py:168
bool maintenance(self, bool force=False, bool truncate=False)
Definition cache.py:155
typing.Any deserialize(self, bytes value)
Definition cache.py:196
bool set(self, str key, typing.Any value, int|None expire, str|None ctx=None)
Definition cache.py:131
sqlite3.Connection connect(self)
Definition sqlitedb.py:194
get_setting(name, default=_unset)
Definition __init__.py:69