sqlite
notes
- Uses bytecode instead of AST like postgres https://sqlite.org/draft/whybytecode.html
- maybe use bytecodealliance/weval for compiler types?
- https://github.com/mmomtchev/sqlite-wasm-http for httpvfs via range headers
- https://github.com/mlin/sqlite_web_vfs as extension
- https://www.sqlite.org/limits.html
- ~281 TB limit with large page size
- stores each index and table as separate b-tree over the database pages
BEGIN CONCURRENT
allows multiple writers with separate tables or index's separated by page size.- hctree concurrency for row level locking
- uses session extension for leader-follower backend replication
- higher page bit (48 bit) support higher storage limit of 1EiB
- single flat file database
- C source can be compiled to WASM/Javascript sqlite.org/fiddle
- Replication options
- session extension or hctree
- sqliterysnc
- built in to standard sqlite tools.
- Uses virtual db table extension.
- opens transaction, hashes differences by page size and copies the database page blobs to attached database.
- both databases require same page size
- dqlite
- custom c library by canonical
- rqlite
- uses sqlite as storage engine
- litefs
- streaming replication to s3
- fuse based file replication with 1 active writer in cluster
- litestream for recovery
- crdt using last write wins
- libsql is a fork of sqlite with extra features
- strict tables for type enforcement
- set primary key integer id to avoid adding rowid to a table
WITHOUT ROWID
- session extension for changeset management
- max
PRAGMA page_size = 65536
- powers of 2 ex
16384
for 16KiB - default 4096
- require
PRAGMA journal_mode = OFF
- powers of 2 ex
- reduce wal before VACUUM INTO
PRAGMA wal_checkpoint(TRUNCATE)
pragma integrity_check;
.recover
command for corruption- sqlar is file sqlite archive for in db compression of files (zlib)
- sqlite v3.4.0 added support wasm
- opfs enabled when loaded from worker
- opfs not limited in size unlike localstorage
- currently locking issues between threads/tabs
- opfs enabled when loaded from worker
- optimizer can create temporary index's automatically.
dictated as
USING AUTOMATIC COVERING INDEX
in theexplain
query output - multi column index
- import csv as virtual table or database
pragma journal_mode = WAL;
for write ahead log (replaced shared cache)WAL2
for 2 files for concurrent checkpointing while writing.pragma journal_mode = delete
before changing wal mode from 1 to 2
pragma synchronous = normal;
pragma temp_store = memory;
to disable temp writespragma mmap_size = 30000000000;
for memory mapping- open in memory database with
:memory:
- foreign keys disabled in transactions and by default
PRAGMA foreign keys
memsys5
memory allocator for embedded or deterministic memory usage- disable posix advisory locks
site database
-- create sqlar table from directory. Derived from '.archive -cn *.org' or 'sqlite3 -Acn *.org' -- setup db PRAGMA page_size=512; SAVEPOINT ar; DROP TABLE IF EXISTS sqlar; CREATE TABLE IF NOT EXISTS sqlar( name TEXT PRIMARY KEY, -- name of the file mode INT, -- access permissions mtime INT, -- last modification time sz INT, -- original file size data BLOB -- compressed content ); -- add uncompressed org files REPLACE INTO sqlar(name,mode,mtime,sz,data) SELECT name, mode, mtime, CASE substr(lsmode(mode),1,1) WHEN '-' THEN length(data) WHEN 'd' THEN 0 ELSE -1 END, data FROM fsdir('.',NULL) AS disk WHERE lsmode(mode) NOT LIKE '?%' AND name GLOB '*.org'; -- setup fts DROP TABLE IF EXISTS site_pages; CREATE VIRTUAL TABLE site_pages USING fts5(name, data); -- add data to index INSERT INTO site_pages SELECT name, data FROM sqlar; -- optimize fts index INSERT INTO site_pages(site_pages) VALUES('optimize'); -- keep only fts's copy of content DROP TABLE IF EXISTS sqlar; RELEASE ar; VACUUM;
- view query ex:
-- get pages and preview from query (json?) .mode box --wrap 30 .param set $query 'emacs' SELECT REPLACE(name, '.org', '.html') AS name, snippet(site_pages, 1, '<b>','</b>', '', 64) AS snippet FROM site_pages WHERE data MATCH $query ORDER BY rank;
(ignore-errors ; create db and set mtime to newest (let* ((base-dir (file-name-directory (or load-file-name buffer-file-name))) (db (concat base-dir "sqlar.sqlite"))) (call-process "sqlite3" nil t nil db (format ".read %s" (concat base-dir "db.sql"))); create db (set-file-times db (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org")))); set timestamps (mapc #'(lambda (org-file) ; MAYBE change to sql query by mtime for newest (let ((db-ts (file-attribute-modification-time (file-attributes db))) (org-ts (file-attribute-modification-time (file-attributes org-file)))) (if (time-less-p db-ts org-ts) (progn (message "setting %s timestamp to %s's" db org-file) (set-file-times db org-ts))))) (directory-files base-dir t "\.org$")) (set-file-times (concat base-dir "sqlite-query.js") (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org")))) (set-file-times (concat base-dir "sql-worker.js") (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org"))))))
site search js/sql
/* Requires sqlite3.js, sqlite3.wasm and sql-worker.js(my worker wrapper) // MAYBE add service-worker caching with http.vfs splitting // MAYBE use opfs vfs with sync support (multiple tabs) // example page: <script src="sqlite3.js"></script> <script src="sqlite-query.js"></script> <article class="panel is-primary"> <div class="panel-block"> <input class="input" type="text" placeholder="Search" id="mySearch"> <button id="mySearchBtn" class="button">🔍</button> </div> </article> <dialog id="myModal" class="modal"> <div class="modal-content"> <button id="modalClose" class="modal-close is-large" aria-label="close">×</button> <div id="modalDiv"></div> </div> </dialog> */ document.addEventListener('DOMContentLoaded',() => { document.getElementById("modalClose").onclick = function() { //document.getElementById("modal").classList.remove("modal-is-open"); document.getElementById("modal").close(); } document.getElementById("search").addEventListener("keyup", function(event){ if (event.keyCode == 13) { // on enter if (typeof globalThis.w === 'undefined') { // load sqlite worker console.log("creating worker"); const workerArgs = []; workerArgs.push("sql-worker.js"); const w = new Worker(...workerArgs); w.onmessage = (msg) => { // write results as table rows and links let html = '<table class="table">'; msg.data.forEach((row) => { html = html + "<tr><td><a href='" +row[0]+"'>"+row[1]+"</a></td></tr>"}); html = html + "</table>"; document.getElementById("modalDiv").innerHTML = html; //document.getElementById("modal").classList.add("modal-is-open"); document.getElementById("modal").showModal(); }; globalThis.w = w; // query worker let query = document.getElementById("search").value; globalThis.w.postMessage(query); } else { let query = document.getElementById("search").value; globalThis.w.postMessage(query); } } }); });
onmessage = (e) => { const opt = {"returnValue": "resultRows"}; const sql ="SELECT REPLACE(name, '.org', '.html') AS name, snippet(site_pages, 1, '<b>','</b>', '', 64) AS snippet FROM site_pages WHERE data MATCH '"+ e.data +"' ORDER BY rank;"; if (typeof globalThis.sqlite3InitModule === 'undefined') { // load sqlite3 importScripts('sqlite3.js'); // mjs module? globalThis.sqlite3InitModule().then((sqlite3)=> { sqlite3.installOpfsSAHPoolVfs().then((poolUtil)=> { // use opfs for storage if (poolUtil.getFileCount() == 0) { console.log("fetching sqlite opfs files"); fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => { poolUtil.importDb('/sqlar.sqlite', arrayBuffer); // requires absolute path (lack of transparent naming obscures real name) const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite'); globalThis.db = db; const res = db.exec([sql], opt); postMessage(res); }); } else { console.log("checking existing sqlite opfs files" + poolUtil.getFileNames()); fetch('sqlar.sqlite', {method: "HEAD"}).then(response => { console.log("Response headers are: " + response.headers.get("Content-Length")); const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite'); // load existing db to check size against headers console.log("loaded db"); const header_size = response.headers.get("Content-Length"); const size_sql = "SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();"; const size = db.selectArray(size_sql); // selectArray to not include results in db.exec console.log("Header size is: " + header_size + " DB size is: " + size); if (size == header_size) { console.log("Database is up to date"); globalThis.db = db; const res = db.exec([sql], opt); postMessage(res); } else { console.log("Updating database with fetch"); db.close(); // close before wiping fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => { poolUtil.wipeFiles().then(_ => { poolUtil.importDb('/sqlar.sqlite', arrayBuffer); const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite'); globalThis.db = db; const res = db.exec([sql], opt); postMessage(res); }); }); } }).catch((error) => { console.log("Failed to fetch HEAD. Using Existing DB."); const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite'); globalThis.db = db; const res = db.exec([sql], opt); postMessage(res); }); } }).catch((error) => { console.log("Failed to use opfs using temp db"); const capi = sqlite3.capi, oo = sqlite3.oo1, wasm = sqlite3.wasm; const db = new oo.DB(); fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => { const bytes = new Uint8Array(arrayBuffer); const p = sqlite3.wasm.allocFromTypedArray(bytes); const rc = sqlite3.capi.sqlite3_deserialize(db.pointer, 'main', p, bytes.length, bytes.length,sqlite3.capi.SQLITE_DESERIALIZE_FREEONCLOSE); //db.checkRc(rc); globalThis.db = db; const res = globalThis.db.exec([sql], opt); postMessage(res); }); }); }); }else { const res = globalThis.db.exec([sql], opt); postMessage(res); } };