fix: auto-repair malformed database schema from cross-version sync (#1308)
When a claude-mem DB is synced between machines running different versions, orphaned indexes can reference non-existent columns (e.g. idx_observations_content_hash referencing content_hash). This causes SQLite to throw "malformed database schema" on ALL queries, including PRAGMAs, creating a silent 503 failure loop. The fix detects this on startup, uses Python's sqlite3 module to drop the orphaned schema objects (bun:sqlite doesn't support writable_schema modifications), resets migration versions, and lets the idempotent migration system recreate everything properly. Fixes #1307 Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,8 @@
|
||||
import { Database } from 'bun:sqlite';
|
||||
import { execFileSync } from 'child_process';
|
||||
import { existsSync, unlinkSync, writeFileSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { DATA_DIR, DB_PATH, ensureDir } from '../../shared/paths.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { MigrationRunner } from './migrations/runner.js';
|
||||
@@ -15,6 +19,118 @@ export interface Migration {
|
||||
|
||||
let dbInstance: Database | null = null;
|
||||
|
||||
/**
|
||||
* Repair malformed database schema before migrations run.
|
||||
*
|
||||
* This handles the case where a database is synced between machines running
|
||||
* different claude-mem versions. A newer version may have added columns and
|
||||
* indexes that an older version (or even the same version on a fresh install)
|
||||
* cannot process. SQLite throws "malformed database schema" when it encounters
|
||||
* an index referencing a non-existent column, which prevents ALL queries —
|
||||
* including the migrations that would fix the schema.
|
||||
*
|
||||
* The fix: use Python's sqlite3 module (which supports writable_schema) to
|
||||
* drop the orphaned schema objects, then let the migration system recreate
|
||||
* them properly. bun:sqlite doesn't allow DELETE FROM sqlite_master even
|
||||
* with writable_schema = ON.
|
||||
*/
|
||||
function repairMalformedSchema(db: Database): void {
|
||||
try {
|
||||
// Quick test: if we can query sqlite_master, the schema is fine
|
||||
db.query('SELECT name FROM sqlite_master WHERE type = "table" LIMIT 1').all();
|
||||
return;
|
||||
} catch (error: unknown) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
if (!message.includes('malformed database schema')) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
logger.warn('DB', 'Detected malformed database schema, attempting repair', { error: message });
|
||||
|
||||
// Extract the problematic object name from the error message
|
||||
// Format: "malformed database schema (object_name) - details"
|
||||
const match = message.match(/malformed database schema \(([^)]+)\)/);
|
||||
if (!match) {
|
||||
logger.error('DB', 'Could not parse malformed schema error, cannot auto-repair', { error: message });
|
||||
throw error;
|
||||
}
|
||||
|
||||
const objectName = match[1];
|
||||
logger.info('DB', `Dropping malformed schema object: ${objectName}`);
|
||||
|
||||
// Get the DB file path. For file-based DBs, we can use Python to repair.
|
||||
// For in-memory DBs, we can't shell out — just re-throw.
|
||||
const dbPath = db.filename;
|
||||
if (!dbPath || dbPath === ':memory:' || dbPath === '') {
|
||||
logger.error('DB', 'Cannot auto-repair in-memory database');
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Close the connection so Python can safely modify the file
|
||||
db.close();
|
||||
|
||||
// Use Python's sqlite3 module to drop the orphaned object and reset
|
||||
// related migration versions so they re-run and recreate things properly.
|
||||
// bun:sqlite doesn't support DELETE FROM sqlite_master even with writable_schema.
|
||||
//
|
||||
// We write a temp script rather than using -c to avoid shell escaping issues
|
||||
// with paths containing spaces or special characters. execFileSync passes
|
||||
// args directly without a shell, so dbPath and objectName are safe.
|
||||
const scriptPath = join(tmpdir(), `claude-mem-repair-${Date.now()}.py`);
|
||||
try {
|
||||
writeFileSync(scriptPath, `
|
||||
import sqlite3, sys
|
||||
db_path = sys.argv[1]
|
||||
obj_name = sys.argv[2]
|
||||
c = sqlite3.connect(db_path)
|
||||
c.execute('PRAGMA writable_schema = ON')
|
||||
c.execute('DELETE FROM sqlite_master WHERE name = ?', (obj_name,))
|
||||
c.execute('PRAGMA writable_schema = OFF')
|
||||
# Reset migration versions so affected migrations re-run.
|
||||
# Guard with existence check: schema_versions may not exist on a very fresh DB.
|
||||
has_sv = c.execute(
|
||||
"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='schema_versions'"
|
||||
).fetchone()[0]
|
||||
if has_sv:
|
||||
c.execute('DELETE FROM schema_versions')
|
||||
c.commit()
|
||||
c.close()
|
||||
`);
|
||||
execFileSync('python3', [scriptPath, dbPath, objectName], { timeout: 10000 });
|
||||
logger.info('DB', `Dropped orphaned schema object "${objectName}" and reset migration versions via Python sqlite3. All migrations will re-run (they are idempotent).`);
|
||||
} catch (pyError: unknown) {
|
||||
const pyMessage = pyError instanceof Error ? pyError.message : String(pyError);
|
||||
logger.error('DB', 'Python sqlite3 repair failed', { error: pyMessage });
|
||||
throw new Error(`Schema repair failed: ${message}. Python repair error: ${pyMessage}`);
|
||||
} finally {
|
||||
if (existsSync(scriptPath)) unlinkSync(scriptPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper that handles the close/reopen cycle needed for schema repair.
|
||||
* Returns a (possibly new) Database connection.
|
||||
*/
|
||||
function repairMalformedSchemaWithReopen(dbPath: string, db: Database): Database {
|
||||
try {
|
||||
db.query('SELECT name FROM sqlite_master WHERE type = "table" LIMIT 1').all();
|
||||
return db;
|
||||
} catch (error: unknown) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
if (!message.includes('malformed database schema')) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
// repairMalformedSchema closes the DB internally for Python access
|
||||
repairMalformedSchema(db);
|
||||
|
||||
// Reopen and check for additional malformed objects
|
||||
const newDb = new Database(dbPath, { create: true, readwrite: true });
|
||||
return repairMalformedSchemaWithReopen(dbPath, newDb);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ClaudeMemDatabase - New entry point for the sqlite module
|
||||
*
|
||||
@@ -38,6 +154,11 @@ export class ClaudeMemDatabase {
|
||||
// Create database connection
|
||||
this.db = new Database(dbPath, { create: true, readwrite: true });
|
||||
|
||||
// Repair any malformed schema before applying settings or running migrations.
|
||||
// Must happen first — even PRAGMA calls can fail on a corrupted schema.
|
||||
// This may close and reopen the connection if repair is needed.
|
||||
this.db = repairMalformedSchemaWithReopen(dbPath, this.db);
|
||||
|
||||
// Apply optimized SQLite settings
|
||||
this.db.run('PRAGMA journal_mode = WAL');
|
||||
this.db.run('PRAGMA synchronous = NORMAL');
|
||||
@@ -97,6 +218,10 @@ export class DatabaseManager {
|
||||
|
||||
this.db = new Database(DB_PATH, { create: true, readwrite: true });
|
||||
|
||||
// Repair any malformed schema before applying settings or running migrations.
|
||||
// Must happen first — even PRAGMA calls can fail on a corrupted schema.
|
||||
this.db = repairMalformedSchemaWithReopen(DB_PATH, this.db);
|
||||
|
||||
// Apply optimized SQLite settings
|
||||
this.db.run('PRAGMA journal_mode = WAL');
|
||||
this.db.run('PRAGMA synchronous = NORMAL');
|
||||
|
||||
@@ -0,0 +1,253 @@
|
||||
/**
|
||||
* Tests for malformed schema repair in Database.ts
|
||||
*
|
||||
* Mock Justification: NONE (0% mock code)
|
||||
* - Uses real SQLite with temp file — tests actual schema repair logic
|
||||
* - Uses Python sqlite3 to simulate cross-version schema corruption
|
||||
* (bun:sqlite doesn't allow writable_schema modifications)
|
||||
* - Covers the cross-machine sync scenario from issue #1307
|
||||
*
|
||||
* Value: Prevents the silent 503 failure loop when a DB is synced between
|
||||
* machines running different claude-mem versions
|
||||
*/
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { Database } from 'bun:sqlite';
|
||||
import { ClaudeMemDatabase } from '../../../src/services/sqlite/Database.js';
|
||||
import { MigrationRunner } from '../../../src/services/sqlite/migrations/runner.js';
|
||||
import { existsSync, unlinkSync, writeFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { execFileSync, execSync } from 'child_process';
|
||||
|
||||
function tempDbPath(): string {
|
||||
return join(tmpdir(), `claude-mem-test-${Date.now()}-${Math.random().toString(36).slice(2)}.db`);
|
||||
}
|
||||
|
||||
function cleanup(path: string): void {
|
||||
for (const suffix of ['', '-wal', '-shm']) {
|
||||
const p = path + suffix;
|
||||
if (existsSync(p)) unlinkSync(p);
|
||||
}
|
||||
}
|
||||
|
||||
function hasPython(): boolean {
|
||||
try {
|
||||
execSync('python3 --version', { stdio: 'pipe' });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use Python's sqlite3 to corrupt a DB by removing the content_hash column
|
||||
* from the observations table definition while leaving the index intact.
|
||||
* This simulates what happens when a DB from a newer version is synced.
|
||||
*/
|
||||
function corruptDbViaPython(dbPath: string): void {
|
||||
const script = join(tmpdir(), `corrupt-${Date.now()}.py`);
|
||||
writeFileSync(script, `
|
||||
import sqlite3, re, sys
|
||||
c = sqlite3.connect(sys.argv[1])
|
||||
c.execute("PRAGMA writable_schema = ON")
|
||||
row = c.execute("SELECT sql FROM sqlite_master WHERE type='table' AND name='observations'").fetchone()
|
||||
if row:
|
||||
new_sql = re.sub(r',\\s*content_hash\\s+TEXT', '', row[0])
|
||||
c.execute("UPDATE sqlite_master SET sql = ? WHERE type='table' AND name='observations'", (new_sql,))
|
||||
c.execute("PRAGMA writable_schema = OFF")
|
||||
c.commit()
|
||||
c.close()
|
||||
`);
|
||||
try {
|
||||
execSync(`python3 "${script}" "${dbPath}"`, { timeout: 10000 });
|
||||
} finally {
|
||||
if (existsSync(script)) unlinkSync(script);
|
||||
}
|
||||
}
|
||||
|
||||
describe('Schema repair on malformed database', () => {
|
||||
it('should repair a database with an orphaned index referencing a non-existent column', () => {
|
||||
if (!hasPython()) {
|
||||
console.log('Python3 not available, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const dbPath = tempDbPath();
|
||||
try {
|
||||
// Step 1: Create a valid database with all migrations
|
||||
const db = new Database(dbPath, { create: true, readwrite: true });
|
||||
db.run('PRAGMA journal_mode = WAL');
|
||||
db.run('PRAGMA foreign_keys = ON');
|
||||
|
||||
const runner = new MigrationRunner(db);
|
||||
runner.runAllMigrations();
|
||||
|
||||
// Verify content_hash column and index exist
|
||||
const hasContentHash = db.prepare('PRAGMA table_info(observations)').all()
|
||||
.some((col: any) => col.name === 'content_hash');
|
||||
expect(hasContentHash).toBe(true);
|
||||
|
||||
// Checkpoint WAL so all data is in the main file
|
||||
db.run('PRAGMA wal_checkpoint(TRUNCATE)');
|
||||
db.close();
|
||||
|
||||
// Step 2: Corrupt the DB
|
||||
corruptDbViaPython(dbPath);
|
||||
|
||||
// Step 3: Verify the DB is actually corrupted
|
||||
const corruptDb = new Database(dbPath, { readwrite: true });
|
||||
let threw = false;
|
||||
try {
|
||||
corruptDb.query('SELECT name FROM sqlite_master WHERE type = "table" LIMIT 1').all();
|
||||
} catch (e: any) {
|
||||
threw = true;
|
||||
expect(e.message).toContain('malformed database schema');
|
||||
expect(e.message).toContain('idx_observations_content_hash');
|
||||
}
|
||||
corruptDb.close();
|
||||
expect(threw).toBe(true);
|
||||
|
||||
// Step 4: Open via ClaudeMemDatabase — it should auto-repair
|
||||
const repaired = new ClaudeMemDatabase(dbPath);
|
||||
|
||||
// Verify the DB is functional
|
||||
const tables = repaired.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name")
|
||||
.all() as { name: string }[];
|
||||
const tableNames = tables.map(t => t.name);
|
||||
expect(tableNames).toContain('observations');
|
||||
expect(tableNames).toContain('sdk_sessions');
|
||||
|
||||
// Verify the index was recreated by the migration runner
|
||||
const indexes = repaired.db.prepare("SELECT name FROM sqlite_master WHERE type='index' AND name='idx_observations_content_hash'")
|
||||
.all() as { name: string }[];
|
||||
expect(indexes.length).toBe(1);
|
||||
|
||||
// Verify the content_hash column was re-added by the migration
|
||||
const columns = repaired.db.prepare('PRAGMA table_info(observations)').all() as { name: string }[];
|
||||
expect(columns.some(c => c.name === 'content_hash')).toBe(true);
|
||||
|
||||
repaired.close();
|
||||
} finally {
|
||||
cleanup(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle a fresh database without triggering repair', () => {
|
||||
const dbPath = tempDbPath();
|
||||
try {
|
||||
const db = new ClaudeMemDatabase(dbPath);
|
||||
const tables = db.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
|
||||
.all() as { name: string }[];
|
||||
expect(tables.length).toBeGreaterThan(0);
|
||||
db.close();
|
||||
} finally {
|
||||
cleanup(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
it('should repair a corrupted DB that has no schema_versions table', () => {
|
||||
if (!hasPython()) {
|
||||
console.log('Python3 not available, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const dbPath = tempDbPath();
|
||||
const scriptPath = join(tmpdir(), `corrupt-nosv-${Date.now()}.py`);
|
||||
try {
|
||||
// Build a minimal DB with only a malformed observations table and orphaned index
|
||||
// — no schema_versions table. This simulates a partially-initialized DB that was
|
||||
// synced before migrations ever ran.
|
||||
writeFileSync(scriptPath, `
|
||||
import sqlite3, sys
|
||||
c = sqlite3.connect(sys.argv[1])
|
||||
c.execute('PRAGMA writable_schema = ON')
|
||||
# Inject an orphaned index into sqlite_master without any backing table.
|
||||
# This simulates a partially-synced DB where index metadata arrived but
|
||||
# the table schema is incomplete or missing columns.
|
||||
idx_sql = 'CREATE INDEX idx_observations_content_hash ON observations(content_hash, created_at_epoch)'
|
||||
c.execute(
|
||||
"INSERT INTO sqlite_master (type, name, tbl_name, rootpage, sql) VALUES ('index', 'idx_observations_content_hash', 'observations', 0, ?)",
|
||||
(idx_sql,)
|
||||
)
|
||||
c.execute('PRAGMA writable_schema = OFF')
|
||||
c.commit()
|
||||
c.close()
|
||||
`);
|
||||
execFileSync('python3', [scriptPath, dbPath], { timeout: 10000 });
|
||||
|
||||
// Verify it's corrupted
|
||||
const corruptDb = new Database(dbPath, { readwrite: true });
|
||||
let threw = false;
|
||||
try {
|
||||
corruptDb.query('SELECT name FROM sqlite_master WHERE type = "table" LIMIT 1').all();
|
||||
} catch (e: any) {
|
||||
threw = true;
|
||||
expect(e.message).toContain('malformed database schema');
|
||||
}
|
||||
corruptDb.close();
|
||||
expect(threw).toBe(true);
|
||||
|
||||
// ClaudeMemDatabase must repair and fully initialize despite missing schema_versions
|
||||
const repaired = new ClaudeMemDatabase(dbPath);
|
||||
const tables = repaired.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name")
|
||||
.all() as { name: string }[];
|
||||
const tableNames = tables.map(t => t.name);
|
||||
expect(tableNames).toContain('schema_versions');
|
||||
expect(tableNames).toContain('observations');
|
||||
expect(tableNames).toContain('sdk_sessions');
|
||||
repaired.close();
|
||||
} finally {
|
||||
cleanup(dbPath);
|
||||
if (existsSync(scriptPath)) unlinkSync(scriptPath);
|
||||
}
|
||||
});
|
||||
|
||||
it('should preserve existing data through repair and re-migration', () => {
|
||||
if (!hasPython()) {
|
||||
console.log('Python3 not available, skipping test');
|
||||
return;
|
||||
}
|
||||
|
||||
const dbPath = tempDbPath();
|
||||
try {
|
||||
// Step 1: Create a fully migrated DB and insert a session + observation
|
||||
const db = new Database(dbPath, { create: true, readwrite: true });
|
||||
db.run('PRAGMA journal_mode = WAL');
|
||||
db.run('PRAGMA foreign_keys = ON');
|
||||
|
||||
const runner = new MigrationRunner(db);
|
||||
runner.runAllMigrations();
|
||||
|
||||
const now = new Date().toISOString();
|
||||
const epoch = Date.now();
|
||||
db.prepare(`
|
||||
INSERT INTO sdk_sessions (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`).run('test-content-1', 'test-memory-1', 'test-project', now, epoch, 'active');
|
||||
|
||||
db.prepare(`
|
||||
INSERT INTO observations (memory_session_id, project, type, created_at, created_at_epoch)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
`).run('test-memory-1', 'test-project', 'discovery', now, epoch);
|
||||
|
||||
db.run('PRAGMA wal_checkpoint(TRUNCATE)');
|
||||
db.close();
|
||||
|
||||
// Step 2: Corrupt the DB
|
||||
corruptDbViaPython(dbPath);
|
||||
|
||||
// Step 3: Repair via ClaudeMemDatabase
|
||||
const repaired = new ClaudeMemDatabase(dbPath);
|
||||
|
||||
// Data must survive the repair + re-migration
|
||||
const sessions = repaired.db.prepare('SELECT COUNT(*) as count FROM sdk_sessions').get() as { count: number };
|
||||
const observations = repaired.db.prepare('SELECT COUNT(*) as count FROM observations').get() as { count: number };
|
||||
expect(sessions.count).toBe(1);
|
||||
expect(observations.count).toBe(1);
|
||||
|
||||
repaired.close();
|
||||
} finally {
|
||||
cleanup(dbPath);
|
||||
}
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user