sqlite
notes
- Uses bytecode instead of AST like postgres https://sqlite.org/draft/whybytecode.html
- maybe use bytecodealliance/weval for compiler types?
- https://github.com/mmomtchev/sqlite-wasm-http for httpvfs via range headers
- https://github.com/mlin/sqlite_web_vfs as extension
- https://www.sqlite.org/limits.html
- ~281 TB limit with large page size
- stores each index and table as separate b-tree over the database pages
BEGIN CONCURRENTallows multiple writers with separate tables or index's separated by page size.- hctree concurrency for row level locking
- uses session extension for leader-follower backend replication
- higher page bit (48 bit) support higher storage limit of 1EiB
- single flat file database
- C source can be compiled to WASM/Javascript sqlite.org/fiddle
- Replication options
- session extension or hctree
- sqliterysnc
- built in to standard sqlite tools.
- Uses virtual db table extension.
- opens transaction, hashes differences by page size and copies the database page blobs to attached database.
- both databases require same page size
- dqlite
- custom c library by canonical
- rqlite
- uses sqlite as storage engine
- litefs
- streaming replication to s3
- fuse based file replication with 1 active writer in cluster
- litestream for recovery
- crdt using last write wins
- libsql is a fork of sqlite with extra features
- strict tables for type enforcement
- set primary key integer id to avoid adding rowid to a table
WITHOUT ROWID - session extension for changeset management
- max
PRAGMA page_size = 65536- powers of 2 ex
16384for 16KiB - default 4096
- require
PRAGMA journal_mode = OFF
- powers of 2 ex
- reduce wal before VACUUM INTO
PRAGMA wal_checkpoint(TRUNCATE) pragma integrity_check;.recovercommand for corruption- sqlar is file sqlite archive for in db compression of files (zlib)
- sqlite v3.4.0 added support wasm
- opfs enabled when loaded from worker
- opfs not limited in size unlike localstorage
- currently locking issues between threads/tabs
- opfs enabled when loaded from worker
- optimizer can create temporary index's automatically.
dictated as
USING AUTOMATIC COVERING INDEXin theexplainquery output - multi column index
- import csv as virtual table or database
pragma journal_mode = WAL;for write ahead log (replaced shared cache)WAL2for 2 files for concurrent checkpointing while writing.pragma journal_mode = deletebefore changing wal mode from 1 to 2
pragma synchronous = normal;pragma temp_store = memory;to disable temp writespragma mmap_size = 30000000000;for memory mapping- open in memory database with
:memory: - foreign keys disabled in transactions and by default
PRAGMA foreign keys memsys5memory allocator for embedded or deterministic memory usage- disable posix advisory locks
site database
-- create sqlar table from directory. Derived from '.archive -cn *.org' or 'sqlite3 -Acn *.org'
-- setup db
PRAGMA page_size=512;
SAVEPOINT ar;
DROP TABLE IF EXISTS sqlar;
CREATE TABLE IF NOT EXISTS sqlar(
name TEXT PRIMARY KEY, -- name of the file
mode INT, -- access permissions
mtime INT, -- last modification time
sz INT, -- original file size
data BLOB -- compressed content
);
-- add uncompressed org files
REPLACE INTO sqlar(name,mode,mtime,sz,data)
SELECT
name,
mode,
mtime,
CASE substr(lsmode(mode),1,1)
WHEN '-' THEN length(data)
WHEN 'd' THEN 0
ELSE -1 END,
data
FROM fsdir('.',NULL) AS disk
WHERE lsmode(mode) NOT LIKE '?%' AND name GLOB '*.org';
-- setup fts
DROP TABLE IF EXISTS site_pages;
CREATE VIRTUAL TABLE site_pages USING fts5(name, data);
-- add data to index
INSERT INTO site_pages
SELECT name, data
FROM sqlar;
-- optimize fts index
INSERT INTO site_pages(site_pages) VALUES('optimize');
-- keep only fts's copy of content
DROP TABLE IF EXISTS sqlar;
RELEASE ar;
VACUUM;
- view query ex:
-- get pages and preview from query (json?) .mode box --wrap 30 .param set $query 'emacs' SELECT REPLACE(name, '.org', '.html') AS name, snippet(site_pages, 1, '<b>','</b>', '', 64) AS snippet FROM site_pages WHERE data MATCH $query ORDER BY rank;
(ignore-errors ; create db and set mtime to newest
(let* ((base-dir (file-name-directory (or load-file-name buffer-file-name)))
(db (concat base-dir "sqlar.sqlite")))
(call-process "sqlite3" nil t nil db (format ".read %s" (concat base-dir "db.sql"))); create db
(set-file-times db (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org")))); set timestamps
(mapc #'(lambda (org-file) ; MAYBE change to sql query by mtime for newest
(let ((db-ts (file-attribute-modification-time (file-attributes db)))
(org-ts (file-attribute-modification-time (file-attributes org-file))))
(if (time-less-p db-ts org-ts)
(progn
(message "setting %s timestamp to %s's" db org-file)
(set-file-times db org-ts)))))
(directory-files base-dir t "\.org$"))
(set-file-times (concat base-dir "sqlite-query.js") (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org"))))
(set-file-times (concat base-dir "sql-worker.js") (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org"))))))
site search js/sql
/* Requires sqlite3.js, sqlite3.wasm and sql-worker.js(my worker wrapper)
// MAYBE add service-worker caching with http.vfs splitting
// MAYBE use opfs vfs with sync support (multiple tabs)
// example page:
<script src="sqlite3.js"></script>
<script src="sqlite-query.js"></script>
<article class="panel is-primary">
<div class="panel-block">
<input class="input" type="text" placeholder="Search" id="mySearch">
<button id="mySearchBtn" class="button">🔍</button>
</div>
</article>
<dialog id="myModal" class="modal">
<div class="modal-content">
<button id="modalClose" class="modal-close is-large" aria-label="close">×</button>
<div id="modalDiv"></div>
</div>
</dialog>
*/
document.addEventListener('DOMContentLoaded',() => {
document.getElementById("modalClose").onclick = function() {
//document.getElementById("modal").classList.remove("modal-is-open");
document.getElementById("modal").close();
}
document.getElementById("search").addEventListener("keyup", function(event){
if (event.keyCode == 13) { // on enter
if (typeof globalThis.w === 'undefined') { // load sqlite worker
console.log("creating worker");
const workerArgs = [];
workerArgs.push("sql-worker.js");
const w = new Worker(...workerArgs);
w.onmessage = (msg) => { // write results as table rows and links
let html = '<table class="table">';
msg.data.forEach((row) => { html = html + "<tr><td><a href='" +row[0]+"'>"+row[1]+"</a></td></tr>"});
html = html + "</table>";
document.getElementById("modalDiv").innerHTML = html;
//document.getElementById("modal").classList.add("modal-is-open");
document.getElementById("modal").showModal();
};
globalThis.w = w; // query worker
let query = document.getElementById("search").value;
globalThis.w.postMessage(query);
} else {
let query = document.getElementById("search").value;
globalThis.w.postMessage(query);
}
}
});
});
onmessage = (e) => {
const opt = {"returnValue": "resultRows"};
const sql ="SELECT REPLACE(name, '.org', '.html') AS name, snippet(site_pages, 1, '<b>','</b>', '', 64) AS snippet FROM site_pages WHERE data MATCH '"+ e.data +"' ORDER BY rank;";
if (typeof globalThis.sqlite3InitModule === 'undefined') { // load sqlite3
importScripts('sqlite3.js'); // mjs module?
globalThis.sqlite3InitModule().then((sqlite3)=> {
sqlite3.installOpfsSAHPoolVfs().then((poolUtil)=> { // use opfs for storage
if (poolUtil.getFileCount() == 0) {
console.log("fetching sqlite opfs files");
fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => {
poolUtil.importDb('/sqlar.sqlite', arrayBuffer); // requires absolute path (lack of transparent naming obscures real name)
const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite');
globalThis.db = db;
const res = db.exec([sql], opt);
postMessage(res);
});
} else {
console.log("checking existing sqlite opfs files" + poolUtil.getFileNames());
fetch('sqlar.sqlite', {method: "HEAD"}).then(response => {
console.log("Response headers are: " + response.headers.get("Content-Length"));
const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite'); // load existing db to check size against headers
console.log("loaded db");
const header_size = response.headers.get("Content-Length");
const size_sql = "SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();";
const size = db.selectArray(size_sql); // selectArray to not include results in db.exec
console.log("Header size is: " + header_size + " DB size is: " + size);
if (size == header_size) {
console.log("Database is up to date");
globalThis.db = db;
const res = db.exec([sql], opt);
postMessage(res);
} else {
console.log("Updating database with fetch");
db.close(); // close before wiping
fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => {
poolUtil.wipeFiles().then(_ => {
poolUtil.importDb('/sqlar.sqlite', arrayBuffer);
const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite');
globalThis.db = db;
const res = db.exec([sql], opt);
postMessage(res);
});
});
}
}).catch((error) => {
console.log("Failed to fetch HEAD. Using Existing DB.");
const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite');
globalThis.db = db;
const res = db.exec([sql], opt);
postMessage(res);
});
}
}).catch((error) => {
console.log("Failed to use opfs using temp db");
const capi = sqlite3.capi,
oo = sqlite3.oo1,
wasm = sqlite3.wasm;
const db = new oo.DB();
fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => {
const bytes = new Uint8Array(arrayBuffer);
const p = sqlite3.wasm.allocFromTypedArray(bytes);
const rc = sqlite3.capi.sqlite3_deserialize(db.pointer, 'main', p, bytes.length, bytes.length,sqlite3.capi.SQLITE_DESERIALIZE_FREEONCLOSE);
//db.checkRc(rc);
globalThis.db = db;
const res = globalThis.db.exec([sql], opt);
postMessage(res);
});
});
});
}else {
const res = globalThis.db.exec([sql], opt);
postMessage(res);
}
};