.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
cache.py
Go to the documentation of this file.
1"""Implementation of caching solutions.
2
3- :py:obj:`searx.cache.ExpireCache` and its :py:obj:`searx.cache.ExpireCacheCfg`
4
5----
6"""
7
8__all__ = ["ExpireCacheCfg", "ExpireCacheStats", "ExpireCache", "ExpireCacheSQLite"]
9
10import abc
11from collections.abc import Iterator
12import dataclasses
13import datetime
14import hashlib
15import hmac
16import os
17import pickle
18import sqlite3
19import string
20import tempfile
21import time
22import typing
23
24import msgspec
25
26from searx import sqlitedb
27from searx import logger
28from searx import get_setting
29
30log = logger.getChild("cache")
31
32
33class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
34 """Configuration of a :py:obj:`ExpireCache` cache."""
35
36 name: str
37 """Name of the cache."""
38
39 db_url: str = ""
40 """URL of the SQLite DB, the path to the database file. If unset a default
41 DB will be created in `/tmp/sxng_cache_{self.name}.db`"""
42
43 MAX_VALUE_LEN: int = 1024 * 10
44 """Max length of a *serialized* value."""
45
46 MAXHOLD_TIME: int = 60 * 60 * 24 * 7 # 7 days
47 """Hold time (default in sec.), after which a value is removed from the cache."""
48
49 MAINTENANCE_PERIOD: int = 60 * 60 # 2h
50 """Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
51 ``auto``."""
52
53 MAINTENANCE_MODE: typing.Literal["auto", "off"] = "auto"
54 """Type of maintenance mode
55
56 ``auto``:
57 Maintenance is carried out automatically as part of the maintenance
58 intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
59
60 ``off``:
61 Maintenance is switched off and must be carried out by an external process
62 if required.
63 """
64
65 password: bytes = get_setting("server.secret_key").encode()
66 """Password used by :py:obj:`ExpireCache.secret_hash`.
67
68 The default password is taken from :ref:`secret_key <server.secret_key>`.
69 When the password is changed, the hashed keys in the cache can no longer be
70 used, which is why all values in the cache are deleted when the password is
71 changed.
72 """
73
74 def __post_init__(self):
75 # if db_url is unset, use a default DB in /tmp/sxng_cache_{name}.db
76 if not self.db_url:
77 self.db_url = tempfile.gettempdir() + os.sep + f"sxng_cache_{ExpireCache.normalize_name(self.name)}.db"
78
79
80@dataclasses.dataclass
82 """Dataclass which provides information on the status of the cache."""
83
84 cached_items: dict[str, list[tuple[str, typing.Any, int]]]
85 """Values in the cache mapped by context name.
86
87 .. code: python
88
89 {
90 "context name": [
91 ("foo key": "foo value", <expire>),
92 ("bar key": "bar value", <expire>),
93 # ...
94 ],
95 # ...
96 }
97 """
98
99 def report(self):
100 c_ctx = 0
101 c_kv = 0
102 lines: list[str] = []
103
104 for ctx_name, kv_list in self.cached_items.items():
105 c_ctx += 1
106 if not kv_list:
107 lines.append(f"[{ctx_name:20s}] empty")
108 continue
109
110 for key, value, expire in kv_list:
111 valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S")
112 c_kv += 1
113 lines.append(f"[{ctx_name:20s}] {valid_until} {key:12}" f" --> ({type(value).__name__}) {value} ")
114
115 lines.append(f"Number of contexts: {c_ctx}")
116 lines.append(f"number of key/value pairs: {c_kv}")
117 return "\n".join(lines)
118
119
120class ExpireCache(abc.ABC):
121 """Abstract base class for the implementation of a key/value cache
122 with expire date."""
123
124 cfg: ExpireCacheCfg
125
126 hash_token: str = "hash_token"
127
128 @abc.abstractmethod
129 def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
130 """Set *key* to *value*. To set a timeout on key use argument
131 ``expire`` (in sec.). If expire is unset the default is taken from
132 :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`. After the timeout has expired,
133 the key will automatically be deleted.
134
135 The ``ctx`` argument specifies the context of the ``key``. A key is
136 only unique in its context.
137
138 The concrete implementations of this abstraction determine how the
139 context is mapped in the connected database. In SQL databases, for
140 example, the context is a DB table or in a Key/Value DB it could be
141 a prefix for the key.
142
143 If the context is not specified (the default is ``None``) then a
144 default context should be used, e.g. a default table for SQL databases
145 or a default prefix in a Key/Value DB.
146 """
147
148 @abc.abstractmethod
149 def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
150 """Return *value* of *key*. If key is unset, ``None`` is returned."""
151
152 @abc.abstractmethod
153 def maintenance(self, force: bool = False, truncate: bool = False) -> bool:
154 """Performs maintenance on the cache.
155
156 ``force``:
157 Maintenance should be carried out even if the maintenance interval has
158 not yet been reached.
159
160 ``truncate``:
161 Truncate the entire cache, which is necessary, for example, if the
162 password has changed.
163 """
164
165 @abc.abstractmethod
166 def state(self) -> ExpireCacheStats:
167 """Returns a :py:obj:`ExpireCacheStats`, which provides information
168 about the status of the cache."""
169
170 @staticmethod
171 def build_cache(cfg: ExpireCacheCfg) -> "ExpireCacheSQLite":
172 """Factory to build a caching instance.
173
174 .. note::
175
176 Currently, only the SQLite adapter is available, but other database
177 types could be implemented in the future, e.g. a Valkey (Redis)
178 adapter.
179 """
180 return ExpireCacheSQLite(cfg)
181
182 @staticmethod
183 def normalize_name(name: str) -> str:
184 """Returns a normalized name that can be used as a file name or as a SQL
185 table name (is used, for example, to normalize the context name)."""
186
187 _valid = "-_." + string.ascii_letters + string.digits
188 return "".join([c for c in name if c in _valid])
189
190 def serialize(self, value: typing.Any) -> bytes:
191 dump: bytes = pickle.dumps(value)
192 return dump
193
194 def deserialize(self, value: bytes) -> typing.Any:
195 obj = pickle.loads(value)
196 return obj
197
198 def secret_hash(self, name: str | bytes) -> str:
199 """Creates a hash of the argument ``name``. The hash value is formed
200 from the ``name`` combined with the :py:obj:`password
201 <ExpireCacheCfg.password>`. Can be used, for example, to make the
202 ``key`` stored in the DB unreadable for third parties."""
203
204 if isinstance(name, str):
205 name = bytes(name, encoding='utf-8')
206 m = hmac.new(name + self.cfg.password, digestmod='sha256')
207 return m.hexdigest()
208
209
211 """Cache that manages key/value pairs in a SQLite DB. The DB model in the
212 SQLite DB is implemented in abstract class :py:obj:`SQLiteAppl
213 <searx.sqlitedb.SQLiteAppl>`.
214
215 The following configurations are required / supported:
216
217 - :py:obj:`ExpireCacheCfg.db_url`
218 - :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`
219 - :py:obj:`ExpireCacheCfg.MAINTENANCE_PERIOD`
220 - :py:obj:`ExpireCacheCfg.MAINTENANCE_MODE`
221 """
222
223 DB_SCHEMA: int = 1
224
225 # The key/value tables will be created on demand by self.create_table
226 DDL_CREATE_TABLES: dict[str, str] = {}
227
228 CACHE_TABLE_PREFIX: str = "CACHE-TABLE"
229
230 def __init__(self, cfg: ExpireCacheCfg):
231 """An instance of the SQLite expire cache is build up from a
232 :py:obj:`config <ExpireCacheCfg>`."""
233
234 self.cfg: ExpireCacheCfg = cfg
235 if cfg.db_url == ":memory:":
236 log.critical("don't use SQLite DB in :memory: in production!!")
237 super().__init__(cfg.db_url)
238
239 def init(self, conn: sqlite3.Connection) -> bool:
240 ret_val = super().init(conn)
241 if not ret_val:
242 return False
243
244 new = hashlib.sha256(self.cfg.password).hexdigest()
245 old = self.properties(self.hash_token)
246 if old != new:
247 if old is not None:
248 log.warning("[%s] hash token changed: truncate all cache tables", self.cfg.name)
249 self.maintenance(force=True, truncate=True)
250 self.properties.set(self.hash_token, new)
251
252 return True
253
254 def maintenance(self, force: bool = False, truncate: bool = False) -> bool:
255
256 if not force and int(time.time()) < self.next_maintenance_time:
257 # log.debug("no maintenance required yet, next maintenance interval is in the future")
258 return False
259
260 # Prevent parallel DB maintenance cycles from other DB connections
261 # (e.g. in multi thread or process environments).
262 self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property!
263
264 if truncate:
266 return True
267
268 # drop items by expire time stamp ..
269 expire = int(time.time())
270
271 with self.connect() as conn:
272 for table in self.table_names:
273 res = conn.execute(f"DELETE FROM {table} WHERE expire < ?", (expire,))
274 log.debug("deleted %s keys from table %s (expire date reached)", res.rowcount, table)
275
276 # Vacuuming the WALs
277 # https://www.theunterminatedstring.com/sqlite-vacuuming/
278
279 conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
280 conn.close()
281
282 return True
283
284 def create_table(self, table: str) -> bool:
285 """Create DB ``table`` if it has not yet been created, no recreates are
286 initiated if the table already exists.
287 """
288 if table in self.table_names:
289 # log.debug("key/value table %s exists in DB (no need to recreate)", table)
290 return False
291
292 log.info("key/value table '%s' NOT exists in DB -> create DB table ..", table)
293 sql_table = "\n".join(
294 [
295 f"CREATE TABLE IF NOT EXISTS {table} (",
296 " key TEXT,",
297 " value BLOB,",
298 f" expire INTEGER DEFAULT (strftime('%s', 'now') + {self.cfg.MAXHOLD_TIME}),",
299 "PRIMARY KEY (key))",
300 ]
301 )
302 sql_index = f"CREATE INDEX IF NOT EXISTS index_expire_{table} ON {table}(expire);"
303 with self.connect() as conn:
304 conn.execute(sql_table)
305 conn.execute(sql_index)
306 conn.close()
307
308 self.properties.set(f"{self.CACHE_TABLE_PREFIX}-{table}", table)
309 return True
310
311 @property
312 def table_names(self) -> list[str]:
313 """List of key/value tables already created in the DB."""
314 sql = f"SELECT value FROM properties WHERE name LIKE '{self.CACHE_TABLE_PREFIX}%%'"
315 rows = self.DB.execute(sql).fetchall() or []
316 return [r[0] for r in rows]
317
318 def truncate_tables(self, table_names: list[str]):
319 log.debug("truncate table: %s", ",".join(table_names))
320 with self.connect() as conn:
321 for table in table_names:
322 conn.execute(f"DELETE FROM {table}")
323 conn.close()
324 return True
325
326 @property
327 def next_maintenance_time(self) -> int:
328 """Returns (unix epoch) time of the next maintenance."""
329
330 return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE", int(time.time()))
331
332 # implement ABC methods of ExpireCache
333
334 def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
335 """Set key/value in DB table given by argument ``ctx``. If expire is
336 unset the default is taken from :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`.
337 If ``ctx`` argument is ``None`` (the default), a table name is
338 generated from the :py:obj:`ExpireCacheCfg.name`. If DB table does not
339 exists, it will be created (on demand) by :py:obj:`self.create_table
340 <ExpireCacheSQLite.create_table>`.
341 """
342 table = ctx
343 self.maintenance()
344
345 value = self.serialize(value=value)
346 if len(value) > self.cfg.MAX_VALUE_LEN:
347 log.warning("ExpireCache.set(): %s.key='%s' - value too big to cache (len: %s) ", table, value, len(value))
348 return False
349
350 if not expire:
351 expire = self.cfg.MAXHOLD_TIME
352 expire = int(time.time()) + expire
353
354 table_name = table
355 if not table_name:
356 table_name = self.normalize_name(self.cfg.name)
357 self.create_table(table_name)
358
359 sql = (
360 f"INSERT INTO {table_name} (key, value, expire) VALUES (?, ?, ?)"
361 f" ON CONFLICT DO "
362 f"UPDATE SET value=?, expire=?"
363 )
364
365 if table:
366 with self.DB:
367 self.DB.execute(sql, (key, value, expire, value, expire))
368 else:
369 with self.connect() as conn:
370 conn.execute(sql, (key, value, expire, value, expire))
371 conn.close()
372
373 return True
374
375 def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
376 """Get value of ``key`` from table given by argument ``ctx``. If
377 ``ctx`` argument is ``None`` (the default), a table name is generated
378 from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
379 table), the ``default`` value is returned.
380
381 """
382 table = ctx
383 self.maintenance()
384
385 if not table:
386 table = self.normalize_name(self.cfg.name)
387
388 if table not in self.table_names:
389 return default
390
391 sql = f"SELECT value FROM {table} WHERE key = ?"
392 row = self.DB.execute(sql, (key,)).fetchone()
393 if row is None:
394 return default
395
396 return self.deserialize(row[0])
397
398 def pairs(self, ctx: str) -> Iterator[tuple[str, typing.Any]]:
399 """Iterate over key/value pairs from table given by argument ``ctx``.
400 If ``ctx`` argument is ``None`` (the default), a table name is
401 generated from the :py:obj:`ExpireCacheCfg.name`."""
402 table = ctx
403 self.maintenance()
404
405 if not table:
406 table = self.normalize_name(self.cfg.name)
407
408 if table in self.table_names:
409 for row in self.DB.execute(f"SELECT key, value FROM {table}"):
410 yield row[0], self.deserialize(row[1])
411
412 def state(self) -> ExpireCacheStats:
413 cached_items: dict[str, list[tuple[str, typing.Any, int]]] = {}
414 for table in self.table_names:
415 cached_items[table] = []
416 for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
417 cached_items[table].append((row[0], self.deserialize(row[1]), row[2]))
418 return ExpireCacheStats(cached_items=cached_items)
typing.Any get(self, str key, typing.Any default=None, str|None ctx=None)
Definition cache.py:375
truncate_tables(self, list[str] table_names)
Definition cache.py:318
bool create_table(self, str table)
Definition cache.py:284
bool init(self, sqlite3.Connection conn)
Definition cache.py:239
__init__(self, ExpireCacheCfg cfg)
Definition cache.py:230
bool maintenance(self, bool force=False, bool truncate=False)
Definition cache.py:254
bool set(self, str key, typing.Any value, int|None expire, str|None ctx=None)
Definition cache.py:334
ExpireCacheStats state(self)
Definition cache.py:412
Iterator[tuple[str, typing.Any]] pairs(self, str ctx)
Definition cache.py:398
bytes serialize(self, typing.Any value)
Definition cache.py:190
"ExpireCacheSQLite" build_cache(ExpireCacheCfg cfg)
Definition cache.py:171
typing.Any get(self, str key, typing.Any default=None, str|None ctx=None)
Definition cache.py:149
str secret_hash(self, str|bytes name)
Definition cache.py:198
str normalize_name(str name)
Definition cache.py:183
ExpireCacheStats state(self)
Definition cache.py:166
bool maintenance(self, bool force=False, bool truncate=False)
Definition cache.py:153
typing.Any deserialize(self, bytes value)
Definition cache.py:194
bool set(self, str key, typing.Any value, int|None expire, str|None ctx=None)
Definition cache.py:129
sqlite3.Connection connect(self)
Definition sqlitedb.py:196
SQLiteProperties properties
Definition sqlitedb.py:157
t.Any get_setting(str name, t.Any default=_unset)
Definition __init__.py:74