diff --git a/src/services/sqlite/Database.ts b/src/services/sqlite/Database.ts index 2a600e19..42cb8726 100644 --- a/src/services/sqlite/Database.ts +++ b/src/services/sqlite/Database.ts @@ -1,4 +1,8 @@ import { Database } from 'bun:sqlite'; +import { execFileSync } from 'child_process'; +import { existsSync, unlinkSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; import { DATA_DIR, DB_PATH, ensureDir } from '../../shared/paths.js'; import { logger } from '../../utils/logger.js'; import { MigrationRunner } from './migrations/runner.js'; @@ -15,6 +19,118 @@ export interface Migration { let dbInstance: Database | null = null; +/** + * Repair malformed database schema before migrations run. + * + * This handles the case where a database is synced between machines running + * different claude-mem versions. A newer version may have added columns and + * indexes that an older version (or even the same version on a fresh install) + * cannot process. SQLite throws "malformed database schema" when it encounters + * an index referencing a non-existent column, which prevents ALL queries — + * including the migrations that would fix the schema. + * + * The fix: use Python's sqlite3 module (which supports writable_schema) to + * drop the orphaned schema objects, then let the migration system recreate + * them properly. bun:sqlite doesn't allow DELETE FROM sqlite_master even + * with writable_schema = ON. + */ +function repairMalformedSchema(db: Database): void { + try { + // Quick test: if we can query sqlite_master, the schema is fine + db.query('SELECT name FROM sqlite_master WHERE type = "table" LIMIT 1').all(); + return; + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + if (!message.includes('malformed database schema')) { + throw error; + } + + logger.warn('DB', 'Detected malformed database schema, attempting repair', { error: message }); + + // Extract the problematic object name from the error message + // Format: "malformed database schema (object_name) - details" + const match = message.match(/malformed database schema \(([^)]+)\)/); + if (!match) { + logger.error('DB', 'Could not parse malformed schema error, cannot auto-repair', { error: message }); + throw error; + } + + const objectName = match[1]; + logger.info('DB', `Dropping malformed schema object: ${objectName}`); + + // Get the DB file path. For file-based DBs, we can use Python to repair. + // For in-memory DBs, we can't shell out — just re-throw. + const dbPath = db.filename; + if (!dbPath || dbPath === ':memory:' || dbPath === '') { + logger.error('DB', 'Cannot auto-repair in-memory database'); + throw error; + } + + // Close the connection so Python can safely modify the file + db.close(); + + // Use Python's sqlite3 module to drop the orphaned object and reset + // related migration versions so they re-run and recreate things properly. + // bun:sqlite doesn't support DELETE FROM sqlite_master even with writable_schema. + // + // We write a temp script rather than using -c to avoid shell escaping issues + // with paths containing spaces or special characters. execFileSync passes + // args directly without a shell, so dbPath and objectName are safe. + const scriptPath = join(tmpdir(), `claude-mem-repair-${Date.now()}.py`); + try { + writeFileSync(scriptPath, ` +import sqlite3, sys +db_path = sys.argv[1] +obj_name = sys.argv[2] +c = sqlite3.connect(db_path) +c.execute('PRAGMA writable_schema = ON') +c.execute('DELETE FROM sqlite_master WHERE name = ?', (obj_name,)) +c.execute('PRAGMA writable_schema = OFF') +# Reset migration versions so affected migrations re-run. +# Guard with existence check: schema_versions may not exist on a very fresh DB. +has_sv = c.execute( + "SELECT count(*) FROM sqlite_master WHERE type='table' AND name='schema_versions'" +).fetchone()[0] +if has_sv: + c.execute('DELETE FROM schema_versions') +c.commit() +c.close() +`); + execFileSync('python3', [scriptPath, dbPath, objectName], { timeout: 10000 }); + logger.info('DB', `Dropped orphaned schema object "${objectName}" and reset migration versions via Python sqlite3. All migrations will re-run (they are idempotent).`); + } catch (pyError: unknown) { + const pyMessage = pyError instanceof Error ? pyError.message : String(pyError); + logger.error('DB', 'Python sqlite3 repair failed', { error: pyMessage }); + throw new Error(`Schema repair failed: ${message}. Python repair error: ${pyMessage}`); + } finally { + if (existsSync(scriptPath)) unlinkSync(scriptPath); + } + } +} + +/** + * Wrapper that handles the close/reopen cycle needed for schema repair. + * Returns a (possibly new) Database connection. + */ +function repairMalformedSchemaWithReopen(dbPath: string, db: Database): Database { + try { + db.query('SELECT name FROM sqlite_master WHERE type = "table" LIMIT 1').all(); + return db; + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + if (!message.includes('malformed database schema')) { + throw error; + } + + // repairMalformedSchema closes the DB internally for Python access + repairMalformedSchema(db); + + // Reopen and check for additional malformed objects + const newDb = new Database(dbPath, { create: true, readwrite: true }); + return repairMalformedSchemaWithReopen(dbPath, newDb); + } +} + /** * ClaudeMemDatabase - New entry point for the sqlite module * @@ -38,6 +154,11 @@ export class ClaudeMemDatabase { // Create database connection this.db = new Database(dbPath, { create: true, readwrite: true }); + // Repair any malformed schema before applying settings or running migrations. + // Must happen first — even PRAGMA calls can fail on a corrupted schema. + // This may close and reopen the connection if repair is needed. + this.db = repairMalformedSchemaWithReopen(dbPath, this.db); + // Apply optimized SQLite settings this.db.run('PRAGMA journal_mode = WAL'); this.db.run('PRAGMA synchronous = NORMAL'); @@ -97,6 +218,10 @@ export class DatabaseManager { this.db = new Database(DB_PATH, { create: true, readwrite: true }); + // Repair any malformed schema before applying settings or running migrations. + // Must happen first — even PRAGMA calls can fail on a corrupted schema. + this.db = repairMalformedSchemaWithReopen(DB_PATH, this.db); + // Apply optimized SQLite settings this.db.run('PRAGMA journal_mode = WAL'); this.db.run('PRAGMA synchronous = NORMAL'); diff --git a/tests/services/sqlite/schema-repair.test.ts b/tests/services/sqlite/schema-repair.test.ts new file mode 100644 index 00000000..2f64db46 --- /dev/null +++ b/tests/services/sqlite/schema-repair.test.ts @@ -0,0 +1,253 @@ +/** + * Tests for malformed schema repair in Database.ts + * + * Mock Justification: NONE (0% mock code) + * - Uses real SQLite with temp file — tests actual schema repair logic + * - Uses Python sqlite3 to simulate cross-version schema corruption + * (bun:sqlite doesn't allow writable_schema modifications) + * - Covers the cross-machine sync scenario from issue #1307 + * + * Value: Prevents the silent 503 failure loop when a DB is synced between + * machines running different claude-mem versions + */ +import { describe, it, expect } from 'bun:test'; +import { Database } from 'bun:sqlite'; +import { ClaudeMemDatabase } from '../../../src/services/sqlite/Database.js'; +import { MigrationRunner } from '../../../src/services/sqlite/migrations/runner.js'; +import { existsSync, unlinkSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { execFileSync, execSync } from 'child_process'; + +function tempDbPath(): string { + return join(tmpdir(), `claude-mem-test-${Date.now()}-${Math.random().toString(36).slice(2)}.db`); +} + +function cleanup(path: string): void { + for (const suffix of ['', '-wal', '-shm']) { + const p = path + suffix; + if (existsSync(p)) unlinkSync(p); + } +} + +function hasPython(): boolean { + try { + execSync('python3 --version', { stdio: 'pipe' }); + return true; + } catch { + return false; + } +} + +/** + * Use Python's sqlite3 to corrupt a DB by removing the content_hash column + * from the observations table definition while leaving the index intact. + * This simulates what happens when a DB from a newer version is synced. + */ +function corruptDbViaPython(dbPath: string): void { + const script = join(tmpdir(), `corrupt-${Date.now()}.py`); + writeFileSync(script, ` +import sqlite3, re, sys +c = sqlite3.connect(sys.argv[1]) +c.execute("PRAGMA writable_schema = ON") +row = c.execute("SELECT sql FROM sqlite_master WHERE type='table' AND name='observations'").fetchone() +if row: + new_sql = re.sub(r',\\s*content_hash\\s+TEXT', '', row[0]) + c.execute("UPDATE sqlite_master SET sql = ? WHERE type='table' AND name='observations'", (new_sql,)) +c.execute("PRAGMA writable_schema = OFF") +c.commit() +c.close() +`); + try { + execSync(`python3 "${script}" "${dbPath}"`, { timeout: 10000 }); + } finally { + if (existsSync(script)) unlinkSync(script); + } +} + +describe('Schema repair on malformed database', () => { + it('should repair a database with an orphaned index referencing a non-existent column', () => { + if (!hasPython()) { + console.log('Python3 not available, skipping test'); + return; + } + + const dbPath = tempDbPath(); + try { + // Step 1: Create a valid database with all migrations + const db = new Database(dbPath, { create: true, readwrite: true }); + db.run('PRAGMA journal_mode = WAL'); + db.run('PRAGMA foreign_keys = ON'); + + const runner = new MigrationRunner(db); + runner.runAllMigrations(); + + // Verify content_hash column and index exist + const hasContentHash = db.prepare('PRAGMA table_info(observations)').all() + .some((col: any) => col.name === 'content_hash'); + expect(hasContentHash).toBe(true); + + // Checkpoint WAL so all data is in the main file + db.run('PRAGMA wal_checkpoint(TRUNCATE)'); + db.close(); + + // Step 2: Corrupt the DB + corruptDbViaPython(dbPath); + + // Step 3: Verify the DB is actually corrupted + const corruptDb = new Database(dbPath, { readwrite: true }); + let threw = false; + try { + corruptDb.query('SELECT name FROM sqlite_master WHERE type = "table" LIMIT 1').all(); + } catch (e: any) { + threw = true; + expect(e.message).toContain('malformed database schema'); + expect(e.message).toContain('idx_observations_content_hash'); + } + corruptDb.close(); + expect(threw).toBe(true); + + // Step 4: Open via ClaudeMemDatabase — it should auto-repair + const repaired = new ClaudeMemDatabase(dbPath); + + // Verify the DB is functional + const tables = repaired.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name") + .all() as { name: string }[]; + const tableNames = tables.map(t => t.name); + expect(tableNames).toContain('observations'); + expect(tableNames).toContain('sdk_sessions'); + + // Verify the index was recreated by the migration runner + const indexes = repaired.db.prepare("SELECT name FROM sqlite_master WHERE type='index' AND name='idx_observations_content_hash'") + .all() as { name: string }[]; + expect(indexes.length).toBe(1); + + // Verify the content_hash column was re-added by the migration + const columns = repaired.db.prepare('PRAGMA table_info(observations)').all() as { name: string }[]; + expect(columns.some(c => c.name === 'content_hash')).toBe(true); + + repaired.close(); + } finally { + cleanup(dbPath); + } + }); + + it('should handle a fresh database without triggering repair', () => { + const dbPath = tempDbPath(); + try { + const db = new ClaudeMemDatabase(dbPath); + const tables = db.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'") + .all() as { name: string }[]; + expect(tables.length).toBeGreaterThan(0); + db.close(); + } finally { + cleanup(dbPath); + } + }); + + it('should repair a corrupted DB that has no schema_versions table', () => { + if (!hasPython()) { + console.log('Python3 not available, skipping test'); + return; + } + + const dbPath = tempDbPath(); + const scriptPath = join(tmpdir(), `corrupt-nosv-${Date.now()}.py`); + try { + // Build a minimal DB with only a malformed observations table and orphaned index + // — no schema_versions table. This simulates a partially-initialized DB that was + // synced before migrations ever ran. + writeFileSync(scriptPath, ` +import sqlite3, sys +c = sqlite3.connect(sys.argv[1]) +c.execute('PRAGMA writable_schema = ON') +# Inject an orphaned index into sqlite_master without any backing table. +# This simulates a partially-synced DB where index metadata arrived but +# the table schema is incomplete or missing columns. +idx_sql = 'CREATE INDEX idx_observations_content_hash ON observations(content_hash, created_at_epoch)' +c.execute( + "INSERT INTO sqlite_master (type, name, tbl_name, rootpage, sql) VALUES ('index', 'idx_observations_content_hash', 'observations', 0, ?)", + (idx_sql,) +) +c.execute('PRAGMA writable_schema = OFF') +c.commit() +c.close() +`); + execFileSync('python3', [scriptPath, dbPath], { timeout: 10000 }); + + // Verify it's corrupted + const corruptDb = new Database(dbPath, { readwrite: true }); + let threw = false; + try { + corruptDb.query('SELECT name FROM sqlite_master WHERE type = "table" LIMIT 1').all(); + } catch (e: any) { + threw = true; + expect(e.message).toContain('malformed database schema'); + } + corruptDb.close(); + expect(threw).toBe(true); + + // ClaudeMemDatabase must repair and fully initialize despite missing schema_versions + const repaired = new ClaudeMemDatabase(dbPath); + const tables = repaired.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name") + .all() as { name: string }[]; + const tableNames = tables.map(t => t.name); + expect(tableNames).toContain('schema_versions'); + expect(tableNames).toContain('observations'); + expect(tableNames).toContain('sdk_sessions'); + repaired.close(); + } finally { + cleanup(dbPath); + if (existsSync(scriptPath)) unlinkSync(scriptPath); + } + }); + + it('should preserve existing data through repair and re-migration', () => { + if (!hasPython()) { + console.log('Python3 not available, skipping test'); + return; + } + + const dbPath = tempDbPath(); + try { + // Step 1: Create a fully migrated DB and insert a session + observation + const db = new Database(dbPath, { create: true, readwrite: true }); + db.run('PRAGMA journal_mode = WAL'); + db.run('PRAGMA foreign_keys = ON'); + + const runner = new MigrationRunner(db); + runner.runAllMigrations(); + + const now = new Date().toISOString(); + const epoch = Date.now(); + db.prepare(` + INSERT INTO sdk_sessions (content_session_id, memory_session_id, project, started_at, started_at_epoch, status) + VALUES (?, ?, ?, ?, ?, ?) + `).run('test-content-1', 'test-memory-1', 'test-project', now, epoch, 'active'); + + db.prepare(` + INSERT INTO observations (memory_session_id, project, type, created_at, created_at_epoch) + VALUES (?, ?, ?, ?, ?) + `).run('test-memory-1', 'test-project', 'discovery', now, epoch); + + db.run('PRAGMA wal_checkpoint(TRUNCATE)'); + db.close(); + + // Step 2: Corrupt the DB + corruptDbViaPython(dbPath); + + // Step 3: Repair via ClaudeMemDatabase + const repaired = new ClaudeMemDatabase(dbPath); + + // Data must survive the repair + re-migration + const sessions = repaired.db.prepare('SELECT COUNT(*) as count FROM sdk_sessions').get() as { count: number }; + const observations = repaired.db.prepare('SELECT COUNT(*) as count FROM observations').get() as { count: number }; + expect(sessions.count).toBe(1); + expect(observations.count).toBe(1); + + repaired.close(); + } finally { + cleanup(dbPath); + } + }); +});