sqlite

notes

  • Uses bytecode instead of AST like postgres https://sqlite.org/draft/whybytecode.html
  • https://github.com/mmomtchev/sqlite-wasm-http for httpvfs via range headers
  • https://www.sqlite.org/limits.html
    • ~281 TB limit with large page size
  • stores each index and table as separate b-tree over the database pages
    • BEGIN CONCURRENT allows multiple writers with separate tables or index's separated by page size.
    • hctree concurrency for row level locking
      • uses session extension for leader-follower backend replication
      • higher page bit (48 bit) support higher storage limit of 1EiB
  • single flat file database
  • C source can be compiled to WASM/Javascript sqlite.org/fiddle
  • Replication options
    • dqlite
      • custom c library by canonical
    • rqlite
      • uses sqlite as storage engine
    • litefs
      • streaming replication to s3
      • fuse based file replication with 1 active writer in cluster
        • litestream for recovery
    • crdt using last write wins
  • libsql is a fork of sqlite with extra features
  • strict tables for type enforcement
  • session extension for changeset management
  • max PRAGMA page_size = 65536
    • powers of 2 ex 16384 for 16KiB
    • default 4096
    • require PRAGMA journal_mode = OFF
  • reduce wal before VACUUM INTO PRAGMA wal_checkpoint(TRUNCATE)
  • pragma integrity_check;
  • .recover command for corruption
  • sqlar is file sqlite archive for in db compression of files (zlib)
  • sqlite v3.4.0 added support wasm
    • opfs enabled when loaded from worker
      • opfs not limited in size unlike localstorage
      • currently locking issues between threads/tabs
  • optimizer can create temporary index's automatically. dictated as USING AUTOMATIC COVERING INDEX in the explain query output
  • import csv as virtual table or database
  • pragma journal_mode = WAL; for write ahead log (replaced shared cache)
    • WAL2 for 2 files for concurrent checkpointing while writing.
      • pragma journal_mode = delete before changing wal mode from 1 to 2
  • pragma synchronous = normal;
  • pragma temp_store = memory; to disable temp writes
  • pragma mmap_size = 30000000000; for memory mapping
  • open in memory database with :memory:
  • foreign keys disabled in transactions and by default PRAGMA foreign keys
  • site database
-- create sqlar table from directory. Derived from '.archive -cn *.org' or 'sqlite3 -Acn *.org'
-- setup db
PRAGMA page_size=512;
SAVEPOINT ar;
DROP TABLE IF EXISTS sqlar;
CREATE TABLE IF NOT EXISTS sqlar(
  name TEXT PRIMARY KEY,  -- name of the file
  mode INT,               -- access permissions
  mtime INT,              -- last modification time
  sz INT,                 -- original file size
  data BLOB               -- compressed content
);
-- add uncompressed org files
REPLACE INTO sqlar(name,mode,mtime,sz,data)
  SELECT
    name,
    mode,
    mtime,
    CASE substr(lsmode(mode),1,1)
      WHEN '-' THEN length(data)
      WHEN 'd' THEN 0
      ELSE -1 END,
    data
  FROM fsdir('.',NULL) AS disk
  WHERE lsmode(mode) NOT LIKE '?%' AND name GLOB '*.org';
-- setup fts
DROP TABLE IF EXISTS site_pages;
CREATE VIRTUAL TABLE site_pages USING fts5(name, data);
-- add data to index
INSERT INTO site_pages
SELECT name, data
FROM sqlar;
-- optimize fts index
INSERT INTO site_pages(site_pages) VALUES('optimize');
-- keep only fts's copy of content
DROP TABLE IF EXISTS sqlar;
RELEASE ar;
VACUUM;
  • view query ex:
-- get pages and preview from query (json?)
.mode box --wrap 30
.param set $query 'emacs'
SELECT REPLACE(name, '.org', '.html') AS name, snippet(site_pages, 1, '<b>','</b>', '', 64) AS snippet FROM site_pages WHERE data MATCH $query ORDER BY rank;
(ignore-errors ; create db and set mtime to newest
  (let* ((base-dir (file-name-directory (or load-file-name buffer-file-name)))
         (db (concat base-dir "sqlar.sqlite")))
    (call-process "sqlite3" nil t nil db (format ".read %s" (concat base-dir "db.sql"))); create db
    (set-file-times db (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org")))); set timestamps
    (mapc #'(lambda (org-file) ; MAYBE change to sql query by mtime for newest
              (let ((db-ts (file-attribute-modification-time (file-attributes db)))
                    (org-ts (file-attribute-modification-time (file-attributes org-file))))
                (if (time-less-p db-ts org-ts)
                  (progn
                    (message "setting %s timestamp to %s's" db org-file)
                    (set-file-times db org-ts)))))
            (directory-files base-dir t "\.org$"))
    (set-file-times (concat base-dir "sqlite-query.js") (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org"))))
    (set-file-times (concat base-dir "sql-worker.js") (file-attribute-modification-time (file-attributes (concat base-dir "sqlite.org"))))))

site search js/sql

    /* Requires sqlite3.js, sqlite3.wasm and sql-worker.js(my worker wrapper)
    // MAYBE add service-worker caching with http.vfs splitting
    // MAYBE use opfs vfs with sync support (multiple tabs)
    // example page:
    <script src="sqlite3.js"></script>
    <script src="sqlite-query.js"></script>

    <article class="panel is-primary">
      <div class="panel-block">
        <input class="input" type="text" placeholder="Search" id="mySearch">
        <button id="mySearchBtn" class="button">&#128269;</button>
      </div>
    </article>
    <dialog id="myModal" class="modal">
      <div class="modal-content">
        <button id="modalClose" class="modal-close is-large" aria-label="close">&times;</button>
        <div id="modalDiv"></div>
      </div>
    </dialog>
    */
document.addEventListener('DOMContentLoaded',() => {
    document.getElementById("modalClose").onclick = function() {
        //document.getElementById("modal").classList.remove("modal-is-open");
        document.getElementById("modal").close();
    }
    document.getElementById("search").addEventListener("keyup", function(event){
        if (event.keyCode == 13) { // on enter
            if (typeof globalThis.w === 'undefined') { // load sqlite worker
                console.log("creating worker");
                const workerArgs = [];
                workerArgs.push("sql-worker.js");
                const w = new Worker(...workerArgs);
                w.onmessage = (msg) => { // write results as table rows and links
                    let html = '<table class="table">';
                    msg.data.forEach((row) => { html = html + "<tr><td><a href='" +row[0]+"'>"+row[1]+"</a></td></tr>"});
                    html = html + "</table>";
                    document.getElementById("modalDiv").innerHTML = html;
                    //document.getElementById("modal").classList.add("modal-is-open");
                    document.getElementById("modal").showModal();
                };
                globalThis.w = w; // query worker
                let query = document.getElementById("search").value;
                globalThis.w.postMessage(query);
            } else {
                let query = document.getElementById("search").value;
                globalThis.w.postMessage(query);
            }
        }
    });
});
onmessage = (e) => {
    const opt = {"returnValue": "resultRows"};
    const sql ="SELECT REPLACE(name, '.org', '.html') AS name, snippet(site_pages, 1, '<b>','</b>', '', 64) AS snippet FROM site_pages WHERE data MATCH '"+ e.data +"' ORDER BY rank;";
    if (typeof globalThis.sqlite3InitModule === 'undefined') { // load sqlite3
        importScripts('sqlite3.js'); // mjs module?
        globalThis.sqlite3InitModule().then((sqlite3)=> {
            sqlite3.installOpfsSAHPoolVfs().then((poolUtil)=> { // use opfs for storage
                if (poolUtil.getFileCount() == 0) {
                    console.log("fetching sqlite opfs files");
                    fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => {
                        poolUtil.importDb('/sqlar.sqlite', arrayBuffer); // requires absolute path (lack of transparent naming obscures real name)
                        const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite');
                        globalThis.db = db;
                        const res = db.exec([sql], opt);
                        postMessage(res);
                    });
                } else {
                    console.log("checking existing sqlite opfs files" + poolUtil.getFileNames());
                    fetch('sqlar.sqlite', {method: "HEAD"}).then(response => {
                        console.log("Response headers are: " + response.headers.get("Content-Length"));
                        const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite'); // load existing db to check size against headers
                        console.log("loaded db");
                        const header_size = response.headers.get("Content-Length");
                        const size_sql = "SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();";
                        const size = db.selectArray(size_sql); // selectArray to not include results in db.exec
                        console.log("Header size is: " + header_size + " DB size is: " + size);
                        if (size == header_size) {
                            console.log("Database is up to date");
                            globalThis.db = db;
                            const res = db.exec([sql], opt);
                            postMessage(res);
                        } else {
                            console.log("Updating database with fetch");
                            db.close(); // close before wiping
                            fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => {
                                poolUtil.wipeFiles().then(_ => {
                                    poolUtil.importDb('/sqlar.sqlite', arrayBuffer);
                                    const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite');
                                    globalThis.db = db;
                                    const res = db.exec([sql], opt);
                                    postMessage(res);
                                });
                            });
                        }
                    }).catch((error) => {
                        console.log("Failed to fetch HEAD. Using Existing DB.");
                        const db = new poolUtil.OpfsSAHPoolDb('/sqlar.sqlite');
                        globalThis.db = db;
                        const res = db.exec([sql], opt);
                        postMessage(res);
                    });
                }
            }).catch((error) => {
                console.log("Failed to use opfs using temp db");
                const capi = sqlite3.capi,
                      oo = sqlite3.oo1,
                      wasm = sqlite3.wasm;
                const db = new oo.DB();
                fetch('sqlar.sqlite').then(response => response.arrayBuffer()).then(arrayBuffer => {
                    const bytes = new Uint8Array(arrayBuffer);
                    const p = sqlite3.wasm.allocFromTypedArray(bytes);
                    const rc = sqlite3.capi.sqlite3_deserialize(db.pointer, 'main', p, bytes.length, bytes.length,sqlite3.capi.SQLITE_DESERIALIZE_FREEONCLOSE);
                    //db.checkRc(rc);

                    globalThis.db = db;
                    const res = globalThis.db.exec([sql], opt);
                    postMessage(res);
                });
            });
        });
    }else {
        const res = globalThis.db.exec([sql], opt);
        postMessage(res);
    }
};