.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
cache.py
Go to the documentation of this file.
1"""Implementation of caching solutions.
2
3- :py:obj:`searx.cache.ExpireCache` and its :py:obj:`searx.cache.ExpireCacheCfg`
4
5----
6"""
7
8from __future__ import annotations
9
10__all__ = ["ExpireCacheCfg", "ExpireCacheStats", "ExpireCache", "ExpireCacheSQLite"]
11
12import abc
13import dataclasses
14import datetime
15import hashlib
16import hmac
17import os
18import pickle
19import sqlite3
20import string
21import tempfile
22import time
23import typing
24
25import msgspec
26
27from searx import sqlitedb
28from searx import logger
29from searx import get_setting
30
31log = logger.getChild("cache")
32
33
34class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
35 """Configuration of a :py:obj:`ExpireCache` cache."""
36
37 name: str
38 """Name of the cache."""
39
40 db_url: str = ""
41 """URL of the SQLite DB, the path to the database file. If unset a default
42 DB will be created in `/tmp/sxng_cache_{self.name}.db`"""
43
44 MAX_VALUE_LEN: int = 1024 * 10
45 """Max length of a *serialized* value."""
46
47 MAXHOLD_TIME: int = 60 * 60 * 24 * 7 # 7 days
48 """Hold time (default in sec.), after which a value is removed from the cache."""
49
50 MAINTENANCE_PERIOD: int = 60 * 60 # 2h
51 """Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
52 ``auto``."""
53
54 MAINTENANCE_MODE: typing.Literal["auto", "off"] = "auto"
55 """Type of maintenance mode
56
57 ``auto``:
58 Maintenance is carried out automatically as part of the maintenance
59 intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
60
61 ``off``:
62 Maintenance is switched off and must be carried out by an external process
63 if required.
64 """
65
66 password: bytes = get_setting("server.secret_key").encode() # type: ignore
67 """Password used by :py:obj:`ExpireCache.secret_hash`.
68
69 The default password is taken from :ref:`secret_key <server.secret_key>`.
70 When the password is changed, the hashed keys in the cache can no longer be
71 used, which is why all values in the cache are deleted when the password is
72 changed.
73 """
74
75 def __post_init__(self):
76 # if db_url is unset, use a default DB in /tmp/sxng_cache_{name}.db
77 if not self.db_url:
78 self.db_url = tempfile.gettempdir() + os.sep + f"sxng_cache_{ExpireCache.normalize_name(self.name)}.db"
79
80
81@dataclasses.dataclass
83 """Dataclass which provides information on the status of the cache."""
84
85 cached_items: dict[str, list[tuple[str, typing.Any, int]]]
86 """Values in the cache mapped by context name.
87
88 .. code: python
89
90 {
91 "context name": [
92 ("foo key": "foo value", <expire>),
93 ("bar key": "bar value", <expire>),
94 # ...
95 ],
96 # ...
97 }
98 """
99
100 def report(self):
101 c_ctx = 0
102 c_kv = 0
103 lines = []
104
105 for ctx_name, kv_list in self.cached_items.items():
106 c_ctx += 1
107 if not kv_list:
108 lines.append(f"[{ctx_name:20s}] empty")
109 continue
110
111 for key, value, expire in kv_list:
112 valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S")
113 c_kv += 1
114 lines.append(f"[{ctx_name:20s}] {valid_until} {key:12}" f" --> ({type(value).__name__}) {value} ")
115
116 lines.append(f"Number of contexts: {c_ctx}")
117 lines.append(f"number of key/value pairs: {c_kv}")
118 return "\n".join(lines)
119
120
121class ExpireCache(abc.ABC):
122 """Abstract base class for the implementation of a key/value cache
123 with expire date."""
124
125 cfg: ExpireCacheCfg
126
127 hash_token = "hash_token"
128
129 @abc.abstractmethod
130 def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
131 """Set *key* to *value*. To set a timeout on key use argument
132 ``expire`` (in sec.). If expire is unset the default is taken from
133 :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`. After the timeout has expired,
134 the key will automatically be deleted.
135
136 The ``ctx`` argument specifies the context of the ``key``. A key is
137 only unique in its context.
138
139 The concrete implementations of this abstraction determine how the
140 context is mapped in the connected database. In SQL databases, for
141 example, the context is a DB table or in a Key/Value DB it could be
142 a prefix for the key.
143
144 If the context is not specified (the default is ``None``) then a
145 default context should be used, e.g. a default table for SQL databases
146 or a default prefix in a Key/Value DB.
147 """
148
149 @abc.abstractmethod
150 def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any:
151 """Return *value* of *key*. If key is unset, ``None`` is returned."""
152
153 @abc.abstractmethod
154 def maintenance(self, force: bool = False, truncate: bool = False) -> bool:
155 """Performs maintenance on the cache.
156
157 ``force``:
158 Maintenance should be carried out even if the maintenance interval has
159 not yet been reached.
160
161 ``truncate``:
162 Truncate the entire cache, which is necessary, for example, if the
163 password has changed.
164 """
165
166 @abc.abstractmethod
167 def state(self) -> ExpireCacheStats:
168 """Returns a :py:obj:`ExpireCacheStats`, which provides information
169 about the status of the cache."""
170
171 @staticmethod
172 def build_cache(cfg: ExpireCacheCfg) -> ExpireCache:
173 """Factory to build a caching instance.
174
175 .. note::
176
177 Currently, only the SQLite adapter is available, but other database
178 types could be implemented in the future, e.g. a Valkey (Redis)
179 adapter.
180 """
181 return ExpireCacheSQLite(cfg)
182
183 @staticmethod
184 def normalize_name(name: str) -> str:
185 """Returns a normalized name that can be used as a file name or as a SQL
186 table name (is used, for example, to normalize the context name)."""
187
188 _valid = "-_." + string.ascii_letters + string.digits
189 return "".join([c for c in name if c in _valid])
190
191 def serialize(self, value: typing.Any) -> bytes:
192 dump: bytes = pickle.dumps(value)
193 return dump
194
195 def deserialize(self, value: bytes) -> typing.Any:
196 obj = pickle.loads(value)
197 return obj
198
199 def secret_hash(self, name: str | bytes) -> str:
200 """Creates a hash of the argument ``name``. The hash value is formed
201 from the ``name`` combined with the :py:obj:`password
202 <ExpireCacheCfg.password>`. Can be used, for example, to make the
203 ``key`` stored in the DB unreadable for third parties."""
204
205 if isinstance(name, str):
206 name = bytes(name, encoding='utf-8')
207 m = hmac.new(name + self.cfg.password, digestmod='sha256')
208 return m.hexdigest()
209
210
212 """Cache that manages key/value pairs in a SQLite DB. The DB model in the
213 SQLite DB is implemented in abstract class :py:obj:`SQLiteAppl
214 <searx.sqlitedb.SQLiteAppl>`.
215
216 The following configurations are required / supported:
217
218 - :py:obj:`ExpireCacheCfg.db_url`
219 - :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`
220 - :py:obj:`ExpireCacheCfg.MAINTENANCE_PERIOD`
221 - :py:obj:`ExpireCacheCfg.MAINTENANCE_MODE`
222 """
223
224 DB_SCHEMA = 1
225
226 # The key/value tables will be created on demand by self.create_table
227 DDL_CREATE_TABLES = {}
228
229 CACHE_TABLE_PREFIX = "CACHE-TABLE"
230
231 def __init__(self, cfg: ExpireCacheCfg):
232 """An instance of the SQLite expire cache is build up from a
233 :py:obj:`config <ExpireCacheCfg>`."""
234
235 self.cfg = cfg
236 if cfg.db_url == ":memory:":
237 log.critical("don't use SQLite DB in :memory: in production!!")
238 super().__init__(cfg.db_url)
239
240 def init(self, conn: sqlite3.Connection) -> bool:
241 ret_val = super().init(conn)
242 if not ret_val:
243 return False
244
245 new = hashlib.sha256(self.cfg.password).hexdigest()
246 old = self.properties(self.hash_token)
247 if old != new:
248 if old is not None:
249 log.warning("[%s] hash token changed: truncate all cache tables", self.cfg.name)
250 self.maintenance(force=True, truncate=True)
251 self.properties.set(self.hash_token, new)
252
253 return True
254
255 def maintenance(self, force: bool = False, truncate: bool = False) -> bool:
256
257 if not force and int(time.time()) < self.next_maintenance_time:
258 # log.debug("no maintenance required yet, next maintenance interval is in the future")
259 return False
260
261 # Prevent parallel DB maintenance cycles from other DB connections
262 # (e.g. in multi thread or process environments).
263 self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property!
264
265 if truncate:
267 return True
268
269 # drop items by expire time stamp ..
270 expire = int(time.time())
271
272 with self.connect() as conn:
273 for table in self.table_names:
274 res = conn.execute(f"DELETE FROM {table} WHERE expire < ?", (expire,))
275 log.debug("deleted %s keys from table %s (expire date reached)", res.rowcount, table)
276
277 # Vacuuming the WALs
278 # https://www.theunterminatedstring.com/sqlite-vacuuming/
279
280 conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
281 conn.close()
282
283 return True
284
285 def create_table(self, table: str) -> bool:
286 """Create DB ``table`` if it has not yet been created, no recreates are
287 initiated if the table already exists.
288 """
289 if table in self.table_names:
290 # log.debug("key/value table %s exists in DB (no need to recreate)", table)
291 return False
292
293 log.info("key/value table '%s' NOT exists in DB -> create DB table ..", table)
294 sql_table = "\n".join(
295 [
296 f"CREATE TABLE IF NOT EXISTS {table} (",
297 " key TEXT,",
298 " value BLOB,",
299 f" expire INTEGER DEFAULT (strftime('%s', 'now') + {self.cfg.MAXHOLD_TIME}),",
300 "PRIMARY KEY (key))",
301 ]
302 )
303 sql_index = f"CREATE INDEX IF NOT EXISTS index_expire_{table} ON {table}(expire);"
304 with self.connect() as conn:
305 conn.execute(sql_table)
306 conn.execute(sql_index)
307 conn.close()
308
309 self.properties.set(f"{self.CACHE_TABLE_PREFIX}-{table}", table)
310 return True
311
312 @property
313 def table_names(self) -> list[str]:
314 """List of key/value tables already created in the DB."""
315 sql = f"SELECT value FROM properties WHERE name LIKE '{self.CACHE_TABLE_PREFIX}%%'"
316 rows = self.DB.execute(sql).fetchall() or []
317 return [r[0] for r in rows]
318
319 def truncate_tables(self, table_names: list[str]):
320 log.debug("truncate table: %s", ",".join(table_names))
321 with self.connect() as conn:
322 for table in table_names:
323 conn.execute(f"DELETE FROM {table}")
324 conn.close()
325 return True
326
327 @property
328 def next_maintenance_time(self) -> int:
329 """Returns (unix epoch) time of the next maintenance."""
330
331 return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE", int(time.time()))
332
333 # implement ABC methods of ExpireCache
334
335 def set(self, key: str, value: typing.Any, expire: int | None, ctx: str | None = None) -> bool:
336 """Set key/value in DB table given by argument ``ctx``. If expire is
337 unset the default is taken from :py:obj:`ExpireCacheCfg.MAXHOLD_TIME`.
338 If ``ctx`` argument is ``None`` (the default), a table name is
339 generated from the :py:obj:`ExpireCacheCfg.name`. If DB table does not
340 exists, it will be created (on demand) by :py:obj:`self.create_table
341 <ExpireCacheSQLite.create_table>`.
342 """
343 table = ctx
344 self.maintenance()
345
346 value = self.serialize(value=value)
347 if len(value) > self.cfg.MAX_VALUE_LEN:
348 log.warning("ExpireCache.set(): %s.key='%s' - value too big to cache (len: %s) ", table, value, len(value))
349 return False
350
351 if not expire:
352 expire = self.cfg.MAXHOLD_TIME
353 expire = int(time.time()) + expire
354
355 table_name = table
356 if not table_name:
357 table_name = self.normalize_name(self.cfg.name)
358 self.create_table(table_name)
359
360 sql = (
361 f"INSERT INTO {table_name} (key, value, expire) VALUES (?, ?, ?)"
362 f" ON CONFLICT DO "
363 f"UPDATE SET value=?, expire=?"
364 )
365
366 if table:
367 with self.DB:
368 self.DB.execute(sql, (key, value, expire, value, expire))
369 else:
370 with self.connect() as conn:
371 conn.execute(sql, (key, value, expire, value, expire))
372 conn.close()
373
374 return True
375
376 def get(self, key: str, default=None, ctx: str | None = None) -> typing.Any:
377 """Get value of ``key`` from table given by argument ``ctx``. If
378 ``ctx`` argument is ``None`` (the default), a table name is generated
379 from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
380 table), the ``default`` value is returned.
381
382 """
383 table = ctx
384 self.maintenance()
385
386 if not table:
387 table = self.normalize_name(self.cfg.name)
388
389 if table not in self.table_names:
390 return default
391
392 sql = f"SELECT value FROM {table} WHERE key = ?"
393 row = self.DB.execute(sql, (key,)).fetchone()
394 if row is None:
395 return default
396
397 return self.deserialize(row[0])
398
399 def state(self) -> ExpireCacheStats:
400 cached_items = {}
401 for table in self.table_names:
402 cached_items[table] = []
403 for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
404 cached_items[table].append((row[0], self.deserialize(row[1]), row[2]))
405 return ExpireCacheStats(cached_items=cached_items)
truncate_tables(self, list[str] table_names)
Definition cache.py:319
bool create_table(self, str table)
Definition cache.py:285
bool init(self, sqlite3.Connection conn)
Definition cache.py:240
__init__(self, ExpireCacheCfg cfg)
Definition cache.py:231
typing.Any get(self, str key, default=None, str|None ctx=None)
Definition cache.py:376
bool maintenance(self, bool force=False, bool truncate=False)
Definition cache.py:255
bool set(self, str key, typing.Any value, int|None expire, str|None ctx=None)
Definition cache.py:335
ExpireCacheStats state(self)
Definition cache.py:399
bytes serialize(self, typing.Any value)
Definition cache.py:191
ExpireCache build_cache(ExpireCacheCfg cfg)
Definition cache.py:172
typing.Any get(self, str key, default=None, str|None ctx=None)
Definition cache.py:150
ExpireCacheCfg hash_token
Definition cache.py:127
str secret_hash(self, str|bytes name)
Definition cache.py:199
str normalize_name(str name)
Definition cache.py:184
ExpireCacheStats state(self)
Definition cache.py:167
bool maintenance(self, bool force=False, bool truncate=False)
Definition cache.py:154
typing.Any deserialize(self, bytes value)
Definition cache.py:195
bool set(self, str key, typing.Any value, int|None expire, str|None ctx=None)
Definition cache.py:130
sqlite3.Connection connect(self)
Definition sqlitedb.py:194
get_setting(name, default=_unset)
Definition __init__.py:69