.oO SearXNG Developer Documentation Oo.
Loading...
Searching...
No Matches
sqlitedb.py
Go to the documentation of this file.
1# SPDX-License-Identifier: AGPL-3.0-or-later
2"""Implementations to make access to SQLite databases a little more convenient.
3
4:py:obj:`SQLiteAppl`
5 Abstract class with which DB applications can be implemented.
6
7:py:obj:`SQLiteProperties`:
8 Class to manage properties stored in a database.
9
10Examplarical implementations based on :py:obj:`SQLiteAppl`:
11
12:py:obj:`searx.cache.ExpireCacheSQLite` :
13 Cache that manages key/value pairs in a SQLite DB, in which the key/value
14 pairs are deleted after an "expire" time. This type of cache is used, for
15 example, for the engines, see :py:obj:`searx.enginelib.EngineCache`.
16
17:py:obj:`searx.favicons.cache.FaviconCacheSQLite` :
18 Favicon cache that manages the favicon BLOBs in a SQLite DB.
19
20----
21"""
22from __future__ import annotations
23
24import abc
25import datetime
26import re
27import sqlite3
28import sys
29import threading
30import uuid
31
32from searx import logger
33
34logger = logger.getChild("sqlitedb")
35
36THREAD_LOCAL = threading.local()
37
38
40 """A *thead-local* DB session"""
41
42 @classmethod
43 def get_connect(cls, app: SQLiteAppl) -> sqlite3.Connection:
44 """Returns a thread local DB connection. The connection is only
45 established once per thread.
46 """
47 if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
48 THREAD_LOCAL.DBSession_map = {}
49
50 session = THREAD_LOCAL.DBSession_map.get(app.db_url)
51 if session is None:
52 session = cls(app)
53 return session.conn
54
55 def __init__(self, app: SQLiteAppl):
56 self.uuid = uuid.uuid4()
57 self.app = app
58 self._conn = None
59 # self.__del__ will be called, when thread ends
60 if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
61 THREAD_LOCAL.DBSession_map = {}
62 THREAD_LOCAL.DBSession_map[self.app.db_url] = self
63
64 @property
65 def conn(self) -> sqlite3.Connection:
66 msg = f"[{threading.current_thread().ident}] DBSession: " f"{self.app.__class__.__name__}({self.app.db_url})"
67 if self._conn is None:
68 self._conn = self.app.connect()
69 logger.debug("%s --> created new connection", msg)
70 # else:
71 # logger.debug("%s --> already connected", msg)
72
73 return self._conn
74
75 def __del__(self):
76 try:
77 if self._conn is not None:
78 # HINT: Don't use Python's logging facility in a destructor, it
79 # will produce error reports when python aborts the process or
80 # thread, because at this point objects that the logging module
81 # needs, do not exist anymore.
82 # msg = f"DBSession: close [{self.uuid}] {self.app.__class__.__name__}({self.app.db_url})"
83 # logger.debug(msg)
84 self._conn.close()
85 except Exception: # pylint: disable=broad-exception-caught
86 pass
87
88
89class SQLiteAppl(abc.ABC):
90 """Abstract base class for implementing convenient DB access in SQLite
91 applications. In the constructor, a :py:obj:`SQLiteProperties` instance is
92 already aggregated under ``self.properties``."""
93
94 DDL_CREATE_TABLES: dict[str, str] = {}
95
96 DB_SCHEMA: int = 1
97 """As soon as changes are made to the DB schema, the version number must be
98 increased. Changes to the version number require the DB to be recreated (or
99 migrated / if an migration path exists and is implemented)."""
100
101 SQLITE_THREADING_MODE = {
102 0: "single-thread",
103 1: "multi-thread",
104 3: "serialized"}[sqlite3.threadsafety] # fmt:skip
105 """Threading mode of the SQLite library. Depends on the options used at
106 compile time and is different for different distributions and architectures.
107
108 Possible values are 0:``single-thread``, 1:``multi-thread``,
109 3:``serialized`` (see :py:obj:`sqlite3.threadsafety`). Pre- Python 3.11
110 this value was hard coded to 1.
111
112 Depending on this value, optimizations are made, e.g. in “serialized” mode
113 it is not necessary to create a separate DB connector for each thread.
114 """
115
116 SQLITE_JOURNAL_MODE = "WAL"
117 """``SQLiteAppl`` applications are optimized for WAL_ mode, its not recommend
118 to change the journal mode (see :py:obj:`SQLiteAppl.tear_down`).
119
120 .. _WAL: https://sqlite.org/wal.html
121 """
122 SQLITE_CONNECT_ARGS = {
123 # "timeout": 5.0,
124 # "detect_types": 0,
125 "check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
126 "cached_statements": 0, # https://github.com/python/cpython/issues/118172
127 # "uri": False,
128 "isolation_level": None,
129 } # fmt:skip
130 """Connection arguments (:py:obj:`sqlite3.connect`)
131
132 ``check_same_thread``:
133 Is disabled by default when :py:obj:`SQLITE_THREADING_MODE` is
134 ``serialized``. The check is more of a hindrance in this case because it
135 would prevent a DB connector from being used in multiple threads.
136
137 ``cached_statements``:
138 Is set to ``0`` by default. Note: Python 3.12+ fetch result are not
139 consistent in multi-threading application and causing an API misuse error.
140
141 The multithreading use in SQLiteAppl is intended and supported if
142 threadsafety is set to 3 (aka "serialized"). CPython supports “serialized”
143 from version 3.12 on, but unfortunately only with errors:
144
145 - https://github.com/python/cpython/issues/118172
146 - https://github.com/python/cpython/issues/123873
147
148 The workaround for SQLite3 multithreading cache inconsistency is to set
149 option ``cached_statements`` to ``0`` by default.
150 """
151
152 def __init__(self, db_url):
153
154 self.db_url = db_url
156 self._init_done = False
157 self._compatibility()
158 # atexit.register(self.tear_down)
159
160 # def tear_down(self):
161 # """:ref:`Vacuuming the WALs` upon normal interpreter termination
162 # (:py:obj:`atexit.register`).
163
164 # .. _SQLite: Vacuuming the WALs: https://www.theunterminatedstring.com/sqlite-vacuuming/
165 # """
166 # self.DB.execute("PRAGMA wal_checkpoint(TRUNCATE)")
167
168 def _compatibility(self):
169
170 if self.SQLITE_THREADING_MODE == "serialized":
171 self._DB = None
172 else:
173 msg = (
174 f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
175 " read https://docs.python.org/3/library/sqlite3.html#sqlite3.threadsafety"
176 )
177 if threading.active_count() > 1:
178 logger.error(msg)
179 else:
180 logger.warning(msg)
181
182 if sqlite3.sqlite_version_info <= (3, 35):
183 # See "Generalize UPSERT:" in https://sqlite.org/releaselog/3_35_0.html
184 logger.critical(
185 "SQLite runtime library version %s is not supported (require >= 3.35)", sqlite3.sqlite_version
186 )
187
188 def _connect(self) -> sqlite3.Connection:
189 conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore
190 conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
191 self.register_functions(conn)
192 return conn
193
194 def connect(self) -> sqlite3.Connection:
195 """Creates a new DB connection (:py:obj:`SQLITE_CONNECT_ARGS`). If not
196 already done, the DB schema is set up. The caller must take care of
197 closing the resource. Alternatively, :py:obj:`SQLiteAppl.DB` can also
198 be used (the resource behind `self.DB` is automatically closed when the
199 process or thread is terminated).
200 """
201 if sys.version_info < (3, 12):
202 # Prior Python 3.12 there is no "autocommit" option
203 self.SQLITE_CONNECT_ARGS.pop("autocommit", None)
204
205 msg = (
206 f"[{threading.current_thread().ident}] {self.__class__.__name__}({self.db_url})"
207 f" {self.SQLITE_CONNECT_ARGS} // {self.SQLITE_JOURNAL_MODE}"
208 )
209 logger.debug(msg)
210
211 with self._connect() as conn:
212 self.init(conn)
213 return conn
214
215 def register_functions(self, conn):
216 """Create user-defined_ SQL functions.
217
218 ``REGEXP(<pattern>, <field>)`` : 0 | 1
219 `re.search`_ returns (int) 1 for a match and 0 for none match of
220 ``<pattern>`` in ``<field>``.
221
222 .. code:: sql
223
224 SELECT '12' AS field WHERE REGEXP('^[0-9][0-9]$', field)
225 -- 12
226
227 SELECT REGEXP('[0-9][0-9]', 'X12Y')
228 -- 1
229 SELECT REGEXP('[0-9][0-9]', 'X1Y')
230 -- 0
231
232 .. _user-defined: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.create_function
233 .. _deterministic: https://sqlite.org/deterministic.html
234 .. _re.search: https://docs.python.org/3/library/re.html#re.search
235 """
236
237 conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True)
238
239 @property
240 def DB(self) -> sqlite3.Connection:
241 """Provides a DB connection. The connection is a *singleton* and
242 therefore well suited for read access. If
243 :py:obj:`SQLITE_THREADING_MODE` is ``serialized`` only one DB connection
244 is created for all threads.
245
246 .. note::
247
248 For dedicated `transaction control`_, it is recommended to create a
249 new connection (:py:obj:`SQLiteAppl.connect`).
250
251 .. _transaction control:
252 https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
253 """
254
255 conn = None
256
257 if self.SQLITE_THREADING_MODE == "serialized":
258 # Theoretically it is possible to reuse the DB cursor across threads
259 # as of Python 3.12, in practice the threading of the cursor seems
260 # to me a little faulty that I prefer to establish one connection
261 # per thread.
262 #
263 # may we can activate this code one day ..
264 # if self._DB is None:
265 # self._DB = self.connect()
266 # conn = self._DB
267 conn = DBSession.get_connect(self)
268 else:
269 conn = DBSession.get_connect(self)
270
271 # Since more than one instance of SQLiteAppl share the same DB
272 # connection, we need to make sure that each SQLiteAppl instance has run
273 # its init method at least once.
274 self.init(conn)
275
276 return conn
277
278 def init(self, conn: sqlite3.Connection) -> bool:
279 """Initializes the DB schema and properties, is only executed once even
280 if called several times.
281
282 If the initialization has not yet taken place, it is carried out and a
283 `True` is returned to the caller at the end. If the initialization has
284 already been carried out in the past, `False` is returned.
285 """
286
287 if self._init_done:
288 return False
289 self._init_done = True
290
291 logger.debug("init DB: %s", self.db_url)
292 self.properties.init(conn)
293
294 ver = self.properties("DB_SCHEMA")
295 if ver is None:
296 with conn:
297 self.create_schema(conn)
298 else:
299 ver = int(ver)
300 if ver != self.DB_SCHEMA:
301 raise sqlite3.DatabaseError("Expected DB schema v%s, DB schema is v%s" % (self.DB_SCHEMA, ver))
302 logger.debug("DB_SCHEMA = %s", ver)
303
304 return True
305
306 def create_schema(self, conn: sqlite3.Connection):
307
308 logger.debug("create schema ..")
309 self.properties.set("DB_SCHEMA", self.DB_SCHEMA)
310 self.properties.set("LAST_MAINTENANCE", "")
311 with conn:
312 for table_name, sql in self.DDL_CREATE_TABLES.items():
313 conn.execute(sql)
314 self.properties.set(f"Table {table_name} created", table_name)
315
316
318 """Simple class to manage properties of a DB application in the DB. The
319 object has its own DB connection and transaction area.
320
321 .. code:: sql
322
323 CREATE TABLE IF NOT EXISTS properties (
324 name TEXT,
325 value TEXT,
326 m_time INTEGER DEFAULT (strftime('%s', 'now')),
327 PRIMARY KEY (name))
328
329 """
330
331 SQLITE_JOURNAL_MODE = "WAL"
332
333 DDL_PROPERTIES = """\
334CREATE TABLE IF NOT EXISTS properties (
335 name TEXT,
336 value TEXT,
337 m_time INTEGER DEFAULT (strftime('%s', 'now')), -- last modified (unix epoch) time in sec.
338 PRIMARY KEY (name))"""
339
340 """Table to store properties of the DB application"""
341
342 SQL_GET = "SELECT value FROM properties WHERE name = ?"
343 SQL_M_TIME = "SELECT m_time FROM properties WHERE name = ?"
344 SQL_SET = (
345 "INSERT INTO properties (name, value) VALUES (?, ?)"
346 " ON CONFLICT(name) DO UPDATE"
347 " SET value=excluded.value, m_time=strftime('%s', 'now')"
348 )
349 SQL_DELETE = "DELETE FROM properties WHERE name = ?"
350 SQL_TABLE_EXISTS = (
351 "SELECT name FROM sqlite_master"
352 " WHERE type='table' AND name='properties'"
353 ) # fmt:skip
354 SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
355
356 def __init__(self, db_url: str): # pylint: disable=super-init-not-called
357
358 self.db_url = db_url
359 self._init_done = False
360 self._compatibility()
361
362 def init(self, conn: sqlite3.Connection) -> bool:
363 """Initializes DB schema of the properties in the DB."""
364
365 if self._init_done:
366 return False
367 self._init_done = True
368 logger.debug("init properties of DB: %s", self.db_url)
369 res = conn.execute(self.SQL_TABLE_EXISTS)
370 if res.fetchone() is None: # DB schema needs to be be created
371 self.create_schema(conn)
372 return True
373
374 def __call__(self, name: str, default=None):
375 """Returns the value of the property ``name`` or ``default`` if property
376 not exists in DB."""
377
378 res = self.DB.execute(self.SQL_GET, (name,)).fetchone()
379 if res is None:
380 return default
381 return res[0]
382
383 def set(self, name: str, value: str | int):
384 """Set ``value`` of property ``name`` in DB. If property already
385 exists, update the ``m_time`` (and the value)."""
386
387 with self.DB:
388 self.DB.execute(self.SQL_SET, (name, value))
389
390 def delete(self, name: str) -> int:
391 """Delete of property ``name`` from DB."""
392 with self.DB:
393 cur = self.DB.execute(self.SQL_DELETE, (name,))
394 return cur.rowcount
395
396 def row(self, name: str, default=None):
397 """Returns the DB row of property ``name`` or ``default`` if property
398 not exists in DB."""
399
400 res = self.DB.execute("SELECT * FROM properties WHERE name = ?", (name,))
401 row = res.fetchone()
402 if row is None:
403 return default
404
405 col_names = [column[0] for column in row.description]
406 return dict(zip(col_names, row))
407
408 def m_time(self, name: str, default: int = 0) -> int:
409 """Last modification time of this property."""
410 res = self.DB.execute(self.SQL_M_TIME, (name,))
411 row = res.fetchone()
412 if row is None:
413 return default
414 return int(row[0])
415
416 def create_schema(self, conn):
417 with conn:
418 conn.execute(self.DDL_PROPERTIES)
419
420 def __str__(self) -> str:
421 lines = []
422 for row in self.DB.execute("SELECT name, value, m_time FROM properties"):
423 name, value, m_time = row
424 m_time = datetime.datetime.fromtimestamp(m_time).strftime("%Y-%m-%d %H:%M:%S")
425 lines.append(f"[last modified: {m_time}] {name:20s}: {value}")
426 return "\n".join(lines)
__init__(self, SQLiteAppl app)
Definition sqlitedb.py:55
sqlite3.Connection get_connect(cls, SQLiteAppl app)
Definition sqlitedb.py:43
sqlite3.Connection conn(self)
Definition sqlitedb.py:65
sqlite3.Connection _connect(self)
Definition sqlitedb.py:188
__init__(self, db_url)
Definition sqlitedb.py:152
sqlite3.Connection connect(self)
Definition sqlitedb.py:194
register_functions(self, conn)
Definition sqlitedb.py:215
bool init(self, sqlite3.Connection conn)
Definition sqlitedb.py:278
create_schema(self, sqlite3.Connection conn)
Definition sqlitedb.py:306
sqlite3.Connection DB(self)
Definition sqlitedb.py:240
row(self, str name, default=None)
Definition sqlitedb.py:396
int delete(self, str name)
Definition sqlitedb.py:390
set(self, str name, str|int value)
Definition sqlitedb.py:383
int m_time(self, str name, int default=0)
Definition sqlitedb.py:408
__call__(self, str name, default=None)
Definition sqlitedb.py:374
__init__(self, str db_url)
Definition sqlitedb.py:356
bool init(self, sqlite3.Connection conn)
Definition sqlitedb.py:362