.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
sqlitedb.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Implementations to make access to SQLite databases a little more convenient.
3
4:py:obj:`SQLiteAppl`
5 Abstract class with which DB applications can be implemented.
6
7:py:obj:`SQLiteProperties`:
8 Class to manage properties stored in a database.
9
10Examplarical implementations based on :py:obj:`SQLiteAppl`:
11
12:py:obj:`searx.cache.ExpireCacheSQLite` :
13 Cache that manages key/value pairs in a SQLite DB, in which the key/value
14 pairs are deleted after an "expire" time. This type of cache is used, for
15 example, for the engines, see :py:obj:`searx.enginelib.EngineCache`.
16
17:py:obj:`searx.favicons.cache.FaviconCacheSQLite` :
18 Favicon cache that manages the favicon BLOBs in a SQLite DB.
19
20----
21"""
22
23import typing as t
24import abc
25import datetime
26import re
27import sqlite3
28import sys
29import threading
30import uuid
31
32from searx import logger
33
34logger = logger.getChild("sqlitedb")
35
36THREAD_LOCAL = threading.local()
37
38
40 """A *thead-local* DB session"""
41
42 @classmethod
43 def get_connect(cls, app: "SQLiteAppl") -> sqlite3.Connection:
44 """Returns a thread local DB connection. The connection is only
45 established once per thread.
46 """
47 if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
48 url_to_session: dict[str, DBSession] = {}
49 THREAD_LOCAL.DBSession_map = url_to_session
50
51 session: DBSession | None = THREAD_LOCAL.DBSession_map.get(app.db_url)
52 if session is None:
53 session = cls(app)
54 return session.conn
55
56 def __init__(self, app: "SQLiteAppl"):
57 self.uuid: uuid.UUID = uuid.uuid4()
58 self.app: SQLiteAppl = app
59 self._conn: sqlite3.Connection | None = None
60 # self.__del__ will be called, when thread ends
61 if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
62 url_to_session: dict[str, DBSession] = {}
63 THREAD_LOCAL.DBSession_map = url_to_session
64 THREAD_LOCAL.DBSession_map[self.app.db_url] = self
65
66 @property
67 def conn(self) -> sqlite3.Connection:
68 msg = f"[{threading.current_thread().ident}] DBSession: " f"{self.app.__class__.__name__}({self.app.db_url})"
69 if self._conn is None:
70 self._conn = self.app.connect()
71 logger.debug("%s --> created new connection", msg)
72 # else:
73 # logger.debug("%s --> already connected", msg)
74
75 return self._conn
76
77 def __del__(self):
78 try:
79 if self._conn is not None:
80 # HINT: Don't use Python's logging facility in a destructor, it
81 # will produce error reports when python aborts the process or
82 # thread, because at this point objects that the logging module
83 # needs, do not exist anymore.
84 # msg = f"DBSession: close [{self.uuid}] {self.app.__class__.__name__}({self.app.db_url})"
85 # logger.debug(msg)
86 self._conn.close()
87 except Exception: # pylint: disable=broad-exception-caught
88 pass
89
90
91class SQLiteAppl(abc.ABC):
92 """Abstract base class for implementing convenient DB access in SQLite
93 applications. In the constructor, a :py:obj:`SQLiteProperties` instance is
94 already aggregated under ``self.properties``."""
95
96 DDL_CREATE_TABLES: dict[str, str] = {}
97
98 DB_SCHEMA: int = 1
99 """As soon as changes are made to the DB schema, the version number must be
100 increased. Changes to the version number require the DB to be recreated (or
101 migrated / if an migration path exists and is implemented)."""
102
103 SQLITE_THREADING_MODE: str = {
104 0: "single-thread",
105 1: "multi-thread",
106 3: "serialized"}[sqlite3.threadsafety] # fmt:skip
107 """Threading mode of the SQLite library. Depends on the options used at
108 compile time and is different for different distributions and architectures.
109
110 Possible values are 0:``single-thread``, 1:``multi-thread``,
111 3:``serialized`` (see :py:obj:`sqlite3.threadsafety`). Pre- Python 3.11
112 this value was hard coded to 1.
113
114 Depending on this value, optimizations are made, e.g. in “serialized” mode
115 it is not necessary to create a separate DB connector for each thread.
116 """
117
118 SQLITE_JOURNAL_MODE: str = "WAL"
119 """``SQLiteAppl`` applications are optimized for WAL_ mode, its not recommend
120 to change the journal mode (see :py:obj:`SQLiteAppl.tear_down`).
121
122 .. _WAL: https://sqlite.org/wal.html
123 """
124 SQLITE_CONNECT_ARGS: dict[str,str|int|bool|None] = {
125 # "timeout": 5.0,
126 # "detect_types": 0,
127 "check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
128 "cached_statements": 0, # https://github.com/python/cpython/issues/118172
129 # "uri": False,
130 "isolation_level": None,
131 } # fmt:skip
132 """Connection arguments (:py:obj:`sqlite3.connect`)
133
134 ``check_same_thread``:
135 Is disabled by default when :py:obj:`SQLITE_THREADING_MODE` is
136 ``serialized``. The check is more of a hindrance in this case because it
137 would prevent a DB connector from being used in multiple threads.
138
139 ``cached_statements``:
140 Is set to ``0`` by default. Note: Python 3.12+ fetch result are not
141 consistent in multi-threading application and causing an API misuse error.
142
143 The multithreading use in SQLiteAppl is intended and supported if
144 threadsafety is set to 3 (aka "serialized"). CPython supports “serialized”
145 from version 3.12 on, but unfortunately only with errors:
146
147 - https://github.com/python/cpython/issues/118172
148 - https://github.com/python/cpython/issues/123873
149
150 The workaround for SQLite3 multithreading cache inconsistency is to set
151 option ``cached_statements`` to ``0`` by default.
152 """
153
154 def __init__(self, db_url: str):
155
156 self.db_url: str = db_url
157 self.properties: SQLiteProperties = SQLiteProperties(db_url)
158 self._init_done: bool = False
159 self._compatibility()
160 # atexit.register(self.tear_down)
161
162 # def tear_down(self):
163 # """:ref:`Vacuuming the WALs` upon normal interpreter termination
164 # (:py:obj:`atexit.register`).
165
166 # .. _SQLite: Vacuuming the WALs: https://www.theunterminatedstring.com/sqlite-vacuuming/
167 # """
168 # self.DB.execute("PRAGMA wal_checkpoint(TRUNCATE)")
169
170 def _compatibility(self):
171
172 if self.SQLITE_THREADING_MODE == "serialized":
173 self._DB: sqlite3.Connection | None = None
174 else:
175 msg = (
176 f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
177 " read https://docs.python.org/3/library/sqlite3.html#sqlite3.threadsafety"
178 )
179 if threading.active_count() > 1:
180 logger.error(msg)
181 else:
182 logger.warning(msg)
183
184 if sqlite3.sqlite_version_info <= (3, 35):
185 # See "Generalize UPSERT:" in https://sqlite.org/releaselog/3_35_0.html
186 logger.critical(
187 "SQLite runtime library version %s is not supported (require >= 3.35)", sqlite3.sqlite_version
188 )
189
190 def _connect(self) -> sqlite3.Connection:
191 conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore
192 conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
193 self.register_functions(conn)
194 return conn
195
196 def connect(self) -> sqlite3.Connection:
197 """Creates a new DB connection (:py:obj:`SQLITE_CONNECT_ARGS`). If not
198 already done, the DB schema is set up. The caller must take care of
199 closing the resource. Alternatively, :py:obj:`SQLiteAppl.DB` can also
200 be used (the resource behind `self.DB` is automatically closed when the
201 process or thread is terminated).
202 """
203 if sys.version_info < (3, 12):
204 # Prior Python 3.12 there is no "autocommit" option
205 self.SQLITE_CONNECT_ARGS.pop("autocommit", None) # pyright: ignore[reportUnreachable]
206
207 msg = (
208 f"[{threading.current_thread().ident}] {self.__class__.__name__}({self.db_url})"
209 f" {self.SQLITE_CONNECT_ARGS} // {self.SQLITE_JOURNAL_MODE}"
210 )
211 logger.debug(msg)
212
213 with self._connect() as conn:
214 self.init(conn)
215 return conn
216
217 def register_functions(self, conn: sqlite3.Connection):
218 """Create user-defined_ SQL functions.
219
220 ``REGEXP(<pattern>, <field>)`` : 0 | 1
221 `re.search`_ returns (int) 1 for a match and 0 for none match of
222 ``<pattern>`` in ``<field>``.
223
224 .. code:: sql
225
226 SELECT '12' AS field WHERE REGEXP('^[0-9][0-9]$', field)
227 -- 12
228
229 SELECT REGEXP('[0-9][0-9]', 'X12Y')
230 -- 1
231 SELECT REGEXP('[0-9][0-9]', 'X1Y')
232 -- 0
233
234 .. _user-defined: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.create_function
235 .. _deterministic: https://sqlite.org/deterministic.html
236 .. _re.search: https://docs.python.org/3/library/re.html#re.search
237 """
238
239 conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True) # type: ignore
240
241 @property
242 def DB(self) -> sqlite3.Connection:
243 """Provides a DB connection. The connection is a *singleton* and
244 therefore well suited for read access. If
245 :py:obj:`SQLITE_THREADING_MODE` is ``serialized`` only one DB connection
246 is created for all threads.
247
248 .. note::
249
250 For dedicated `transaction control`_, it is recommended to create a
251 new connection (:py:obj:`SQLiteAppl.connect`).
252
253 .. _transaction control:
254 https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
255 """
256
257 conn: sqlite3.Connection
258
259 if self.SQLITE_THREADING_MODE == "serialized":
260 # Theoretically it is possible to reuse the DB cursor across threads
261 # as of Python 3.12, in practice the threading of the cursor seems
262 # to me a little faulty that I prefer to establish one connection
263 # per thread.
264 #
265 # may we can activate this code one day ..
266 # if self._DB is None:
267 # self._DB = self.connect()
268 # conn = self._DB
269 conn = DBSession.get_connect(self)
270 else:
271 conn = DBSession.get_connect(self)
272
273 # Since more than one instance of SQLiteAppl share the same DB
274 # connection, we need to make sure that each SQLiteAppl instance has run
275 # its init method at least once.
276 self.init(conn)
277
278 return conn
279
280 def init(self, conn: sqlite3.Connection) -> bool:
281 """Initializes the DB schema and properties, is only executed once even
282 if called several times.
283
284 If the initialization has not yet taken place, it is carried out and a
285 `True` is returned to the caller at the end. If the initialization has
286 already been carried out in the past, `False` is returned.
287 """
288
289 if self._init_done:
290 return False
291 self._init_done = True
292
293 logger.debug("init DB: %s", self.db_url)
294 self.properties.init(conn)
295
296 ver = self.properties("DB_SCHEMA")
297 if ver is None:
298 with conn:
299 self.create_schema(conn)
300 else:
301 ver = int(ver)
302 if ver != self.DB_SCHEMA:
303 raise sqlite3.DatabaseError("Expected DB schema v%s, DB schema is v%s" % (self.DB_SCHEMA, ver))
304 logger.debug("DB_SCHEMA = %s", ver)
305
306 return True
307
308 def create_schema(self, conn: sqlite3.Connection):
309
310 logger.debug("create schema ..")
311 self.properties.set("DB_SCHEMA", self.DB_SCHEMA)
312 self.properties.set("LAST_MAINTENANCE", "")
313 with conn:
314 for table_name, sql in self.DDL_CREATE_TABLES.items():
315 conn.execute(sql)
316 self.properties.set(f"Table {table_name} created", table_name)
317
318
320 """Simple class to manage properties of a DB application in the DB. The
321 object has its own DB connection and transaction area.
322
323 .. code:: sql
324
325 CREATE TABLE IF NOT EXISTS properties (
326 name TEXT,
327 value TEXT,
328 m_time INTEGER DEFAULT (strftime('%s', 'now')),
329 PRIMARY KEY (name))
330
331 """
332
333 SQLITE_JOURNAL_MODE: str = "WAL"
334
335 DDL_PROPERTIES: str = """\
336CREATE TABLE IF NOT EXISTS properties (
337 name TEXT,
338 value TEXT,
339 m_time INTEGER DEFAULT (strftime('%s', 'now')), -- last modified (unix epoch) time in sec.
340 PRIMARY KEY (name))"""
341
342 """Table to store properties of the DB application"""
343
344 SQL_GET: str = "SELECT value FROM properties WHERE name = ?"
345 SQL_M_TIME: str = "SELECT m_time FROM properties WHERE name = ?"
346 SQL_SET: str = (
347 "INSERT INTO properties (name, value) VALUES (?, ?)"
348 " ON CONFLICT(name) DO UPDATE"
349 " SET value=excluded.value, m_time=strftime('%s', 'now')"
350 )
351 SQL_DELETE: str = "DELETE FROM properties WHERE name = ?"
352 SQL_TABLE_EXISTS: str = (
353 "SELECT name FROM sqlite_master"
354 " WHERE type='table' AND name='properties'"
355 ) # fmt:skip
356 SQLITE_CONNECT_ARGS: dict[str, str | int | bool | None] = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
357
358 # pylint: disable=super-init-not-called
359 def __init__(self, db_url: str): # pyright: ignore[reportMissingSuperCall]
360
361 self.db_url: str = db_url
362 self._init_done: bool = False
363 self._compatibility()
364
365 def init(self, conn: sqlite3.Connection) -> bool:
366 """Initializes DB schema of the properties in the DB."""
367
368 if self._init_done:
369 return False
370 self._init_done = True
371 logger.debug("init properties of DB: %s", self.db_url)
372 res = conn.execute(self.SQL_TABLE_EXISTS)
373 if res.fetchone() is None: # DB schema needs to be be created
374 self.create_schema(conn)
375 return True
376
377 def __call__(self, name: str, default: t.Any = None) -> t.Any:
378 """Returns the value of the property ``name`` or ``default`` if property
379 not exists in DB."""
380
381 res = self.DB.execute(self.SQL_GET, (name,)).fetchone()
382 if res is None:
383 return default
384 return res[0]
385
386 def set(self, name: str, value: str | int):
387 """Set ``value`` of property ``name`` in DB. If property already
388 exists, update the ``m_time`` (and the value)."""
389
390 with self.DB:
391 self.DB.execute(self.SQL_SET, (name, value))
392
393 def delete(self, name: str) -> int:
394 """Delete of property ``name`` from DB."""
395 with self.DB:
396 cur = self.DB.execute(self.SQL_DELETE, (name,))
397 return cur.rowcount
398
399 def row(self, name: str, default: t.Any = None):
400 """Returns the DB row of property ``name`` or ``default`` if property
401 not exists in DB."""
402
403 res = self.DB.execute("SELECT * FROM properties WHERE name = ?", (name,))
404 row = res.fetchone()
405 if row is None:
406 return default
407
408 col_names = [column[0] for column in row.description]
409 return dict(zip(col_names, row))
410
411 def m_time(self, name: str, default: int = 0) -> int:
412 """Last modification time of this property."""
413 res = self.DB.execute(self.SQL_M_TIME, (name,))
414 row = res.fetchone()
415 if row is None:
416 return default
417 return int(row[0])
418
419 def create_schema(self, conn: sqlite3.Connection):
420 with conn:
421 conn.execute(self.DDL_PROPERTIES)
422
423 def __str__(self) -> str:
424 lines: list[str] = []
425 for row in self.DB.execute("SELECT name, value, m_time FROM properties"):
426 name, value, m_time = row
427 m_time = datetime.datetime.fromtimestamp(m_time).strftime("%Y-%m-%d %H:%M:%S")
428 lines.append(f"[last modified: {m_time}] {name:20s}: {value}")
429 return "\n".join(lines)
sqlite3.Connection|None _conn
Definition sqlitedb.py:59
__init__(self, "SQLiteAppl" app)
Definition sqlitedb.py:56
sqlite3.Connection get_connect(cls, "SQLiteAppl" app)
Definition sqlitedb.py:43
sqlite3.Connection conn(self)
Definition sqlitedb.py:67
sqlite3.Connection _connect(self)
Definition sqlitedb.py:190
sqlite3.Connection|None _DB
Definition sqlitedb.py:173
sqlite3.Connection connect(self)
Definition sqlitedb.py:196
__init__(self, str db_url)
Definition sqlitedb.py:154
SQLiteProperties properties
Definition sqlitedb.py:157
bool init(self, sqlite3.Connection conn)
Definition sqlitedb.py:280
create_schema(self, sqlite3.Connection conn)
Definition sqlitedb.py:308
register_functions(self, sqlite3.Connection conn)
Definition sqlitedb.py:217
sqlite3.Connection DB(self)
Definition sqlitedb.py:242
create_schema(self, sqlite3.Connection conn)
Definition sqlitedb.py:419
int delete(self, str name)
Definition sqlitedb.py:393
set(self, str name, str|int value)
Definition sqlitedb.py:386
row(self, str name, t.Any default=None)
Definition sqlitedb.py:399
int m_time(self, str name, int default=0)
Definition sqlitedb.py:411
__init__(self, str db_url)
Definition sqlitedb.py:359
t.Any __call__(self, str name, t.Any default=None)
Definition sqlitedb.py:377
bool init(self, sqlite3.Connection conn)
Definition sqlitedb.py:365