diff --git a/scripts/cwd-remap.ts b/scripts/cwd-remap.ts new file mode 100644 index 00000000..8eb12683 --- /dev/null +++ b/scripts/cwd-remap.ts @@ -0,0 +1,174 @@ +#!/usr/bin/env bun +/** + * cwd-remap — Rewrite sdk_sessions.project (+ observations.project, + * session_summaries.project) using the cwd captured per-message in + * pending_messages.cwd as the single source of truth. + * + * For each distinct cwd: + * - git -C rev-parse --git-dir AND --git-common-dir + * If they differ → worktree. parent = basename(dirname(common-dir)), + * project = parent/. + * Else → project = basename(cwd). + * - If the directory doesn't exist, or git errors, skip that cwd. + * + * Usage: + * bun scripts/cwd-remap.ts # dry-run (default) + * bun scripts/cwd-remap.ts --apply # write updates in a single transaction + */ + +import { Database } from 'bun:sqlite'; +import { homedir } from 'os'; +import { join, basename, dirname } from 'path'; +import { existsSync, copyFileSync } from 'fs'; +import { spawnSync } from 'child_process'; + +const DB_PATH = join(homedir(), '.claude-mem', 'claude-mem.db'); +const APPLY = process.argv.includes('--apply'); + +type Classification = + | { kind: 'main'; project: string } + | { kind: 'worktree'; project: string; parent: string } + | { kind: 'skip'; reason: string }; + +function git(cwd: string, args: string[]): string | null { + const r = spawnSync('git', ['-C', cwd, ...args], { encoding: 'utf8' }); + if (r.status !== 0) return null; + return r.stdout.trim(); +} + +function classify(cwd: string): Classification { + if (!existsSync(cwd)) return { kind: 'skip', reason: 'cwd-missing' }; + + const gitDir = git(cwd, ['rev-parse', '--absolute-git-dir']); + if (!gitDir) return { kind: 'skip', reason: 'not-a-git-repo' }; + + const commonDir = git(cwd, ['rev-parse', '--path-format=absolute', '--git-common-dir']); + if (!commonDir) return { kind: 'skip', reason: 'no-common-dir' }; + + // Use the worktree root, not the cwd — a session may be in a subdir. + const toplevel = git(cwd, ['rev-parse', '--show-toplevel']); + if (!toplevel) return { kind: 'skip', reason: 'no-toplevel' }; + const leaf = basename(toplevel); + + if (gitDir === commonDir) { + return { kind: 'main', project: leaf }; + } + + // worktree: common-dir = /.git (or .git for bare) + const parentRepoDir = commonDir.endsWith('/.git') ? dirname(commonDir) : dirname(commonDir); + const parent = basename(parentRepoDir); + return { kind: 'worktree', project: `${parent}/${leaf}`, parent }; +} + +function main() { + if (!existsSync(DB_PATH)) { + console.error(`DB not found at ${DB_PATH}`); + process.exit(1); + } + + if (APPLY) { + const backup = `${DB_PATH}.bak-cwd-remap-${Date.now()}`; + copyFileSync(DB_PATH, backup); + console.log(`Backup created: ${backup}`); + } + + const db = new Database(DB_PATH); + + const cwdRows = db.prepare(` + SELECT cwd, COUNT(*) AS messages + FROM pending_messages + WHERE cwd IS NOT NULL AND cwd != '' + GROUP BY cwd + `).all() as Array<{ cwd: string; messages: number }>; + + console.log(`Classifying ${cwdRows.length} distinct cwds via git...`); + + const byCwd = new Map(); + const counts = { main: 0, worktree: 0, skip: 0 }; + for (const { cwd } of cwdRows) { + const c = classify(cwd); + byCwd.set(cwd, c); + counts[c.kind]++; + } + console.log(` main=${counts.main} worktree=${counts.worktree} skip=${counts.skip}`); + + // Skipped cwds (so user sees what's missing) + const skipped = [...byCwd.entries()].filter(([, c]) => c.kind === 'skip') as Array<[string, Extract]>; + if (skipped.length) { + console.log('\nSkipped cwds:'); + for (const [cwd, c] of skipped) console.log(` [${c.reason}] ${cwd}`); + } + + // Per-session target: use the EARLIEST pending_messages.cwd for each session. + // (Dominant-cwd is wrong: claude-mem's own hooks run from nested dirs like + // `.context/claude-mem/` and dominate the count, misattributing the session.) + const sessionRows = db.prepare(` + SELECT s.id AS session_id, s.memory_session_id, s.content_session_id, s.project AS old_project, p.cwd + FROM sdk_sessions s + JOIN pending_messages p ON p.content_session_id = s.content_session_id + WHERE p.cwd IS NOT NULL AND p.cwd != '' + AND p.id = ( + SELECT MIN(p2.id) FROM pending_messages p2 + WHERE p2.content_session_id = s.content_session_id + AND p2.cwd IS NOT NULL AND p2.cwd != '' + ) + `).all() as Array<{ session_id: number; memory_session_id: string | null; content_session_id: string; old_project: string; cwd: string }>; + + type Target = { sessionId: number; memorySessionId: string | null; contentSessionId: string; oldProject: string; newProject: string; cwd: string }; + const perSession = new Map(); + + for (const r of sessionRows) { + const c = byCwd.get(r.cwd); + if (!c || c.kind === 'skip') continue; + perSession.set(r.session_id, { + sessionId: r.session_id, + memorySessionId: r.memory_session_id, + contentSessionId: r.content_session_id, + oldProject: r.old_project, + newProject: c.project, + cwd: r.cwd, + }); + } + + const targets = [...perSession.values()].filter(t => t.oldProject !== t.newProject); + + console.log(`\nSessions linked to a classified cwd: ${perSession.size}`); + console.log(`Sessions whose project would change: ${targets.length}`); + + const summary = new Map(); + for (const t of targets) { + const key = `${t.oldProject} → ${t.newProject}`; + summary.set(key, (summary.get(key) ?? 0) + 1); + } + const rows = [...summary.entries()] + .map(([mapping, n]) => ({ mapping, sessions: n })) + .sort((a, b) => b.sessions - a.sessions); + console.log('\nTop mappings:'); + console.table(rows.slice(0, 30)); + if (rows.length > 30) console.log(` …and ${rows.length - 30} more mappings`); + + if (!APPLY) { + console.log('\nDry-run only. Re-run with --apply to perform UPDATEs.'); + return; + } + + const updSession = db.prepare('UPDATE sdk_sessions SET project = ? WHERE id = ?'); + const updObs = db.prepare('UPDATE observations SET project = ? WHERE memory_session_id = ?'); + const updSum = db.prepare('UPDATE session_summaries SET project = ? WHERE memory_session_id = ?'); + + let sessionN = 0, obsN = 0, sumN = 0; + const tx = db.transaction(() => { + for (const t of targets) { + sessionN += updSession.run(t.newProject, t.sessionId).changes; + if (t.memorySessionId) { + obsN += updObs.run(t.newProject, t.memorySessionId).changes; + sumN += updSum.run(t.newProject, t.memorySessionId).changes; + } + } + }); + tx(); + + console.log(`\nApplied. sessions=${sessionN} observations=${obsN} session_summaries=${sumN}`); +} + +main(); diff --git a/scripts/worktree-remap.ts b/scripts/worktree-remap.ts deleted file mode 100644 index 2cecc253..00000000 --- a/scripts/worktree-remap.ts +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env bun -/** - * worktree-remap — Retroactively reattribute past sessions that were written - * with a plain project name (e.g. `claude-mem`) to the `parent/worktree` - * composite name when the original worktree can be inferred from the paths - * in the session's observations or user prompt. - * - * Only sessions with HIGH-CONFIDENCE worktree path signatures are remapped. - * Everything else is left alone. - * - * Usage: - * bun scripts/worktree-remap.ts # dry-run (default) - * bun scripts/worktree-remap.ts --apply # write changes in a transaction - */ - -import { Database } from 'bun:sqlite'; -import { homedir } from 'os'; -import { join } from 'path'; -import { existsSync, copyFileSync } from 'fs'; - -const DB_PATH = join(homedir(), '.claude-mem', 'claude-mem.db'); -const APPLY = process.argv.includes('--apply'); - -const WORKTREE_PATTERNS: Array<{ name: string; regex: RegExp }> = [ - { name: 'conductor', regex: /\/conductor\/workspaces\/([^/]+)\/([^/"'\s)]+)/ }, - { name: 'superset', regex: /\/\.superset\/worktrees\/([^/]+)\/([^/"'\s)]+)/ }, -]; - -interface SessionRow { - id: number; - memory_session_id: string | null; - project: string; - user_prompt: string | null; -} - -function allMatches(text: string | null | undefined): Array<{ parent: string; worktree: string }> { - if (!text) return []; - const results: Array<{ parent: string; worktree: string }> = []; - for (const p of WORKTREE_PATTERNS) { - const global = new RegExp(p.regex.source, 'g'); - let m: RegExpExecArray | null; - while ((m = global.exec(text)) !== null) { - results.push({ parent: m[1], worktree: m[2] }); - } - } - return results; -} - -/** - * Collects every worktree path match across the session's observations + user prompt, - * then picks the inference using this priority: - * 1. A match whose worktree basename === the session's current plain project name. - * (Pre-#1820 sessions stored the worktree basename as `project` — these are trusted.) - * 2. If none match the current project, and there's a single unambiguous (parent, worktree) - * across ALL signals, use it. - * 3. Otherwise skip (ambiguous — likely cross-worktree reads). - */ -function inferWorktree( - db: Database, - memorySessionId: string | null, - userPrompt: string | null, - currentProject: string -): { parent: string; worktree: string } | null { - const matches: Array<{ parent: string; worktree: string }> = []; - - if (memorySessionId) { - const rows = db.prepare(` - SELECT files_read, files_modified, source_input_summary, metadata - FROM observations - WHERE memory_session_id = ? - AND (files_read LIKE '%/conductor/workspaces/%' OR files_modified LIKE '%/conductor/workspaces/%' - OR source_input_summary LIKE '%/conductor/workspaces/%' OR metadata LIKE '%/conductor/workspaces/%' - OR files_read LIKE '%.superset/worktrees/%' OR files_modified LIKE '%.superset/worktrees/%' - OR source_input_summary LIKE '%.superset/worktrees/%' OR metadata LIKE '%.superset/worktrees/%') - `).all(memorySessionId) as Array<{ files_read: string | null; files_modified: string | null; source_input_summary: string | null; metadata: string | null }>; - - for (const r of rows) { - matches.push(...allMatches(r.files_read)); - matches.push(...allMatches(r.files_modified)); - matches.push(...allMatches(r.source_input_summary)); - matches.push(...allMatches(r.metadata)); - } - } - - matches.push(...allMatches(userPrompt)); - if (matches.length === 0) return null; - - const wtMatch = matches.find(m => m.worktree === currentProject); - if (wtMatch) return wtMatch; - - const signatures = new Set(matches.map(m => `${m.parent}/${m.worktree}`)); - if (signatures.size === 1) return matches[0]; - - return null; -} - -function main() { - if (!existsSync(DB_PATH)) { - console.error(`DB not found at ${DB_PATH}`); - process.exit(1); - } - - if (APPLY) { - const backup = `${DB_PATH}.bak-worktree-remap-${Date.now()}`; - copyFileSync(DB_PATH, backup); - console.log(`Backup created: ${backup}`); - } - - const db = new Database(DB_PATH); - - const sessions = db.prepare(` - SELECT id, memory_session_id, project, user_prompt - FROM sdk_sessions - WHERE project NOT LIKE '%/%' AND project != '' - `).all() as SessionRow[]; - - console.log(`Scanning ${sessions.length} plain-project sessions...`); - - type Remap = { sessionId: number; memorySessionId: string | null; oldProject: string; newProject: string }; - const remaps: Remap[] = []; - const summary = new Map(); - - for (const s of sessions) { - const hit = inferWorktree(db, s.memory_session_id, s.user_prompt, s.project); - if (!hit) continue; - - const newProject = `${hit.parent}/${hit.worktree}`; - if (newProject === s.project) continue; - - remaps.push({ sessionId: s.id, memorySessionId: s.memory_session_id, oldProject: s.project, newProject }); - const key = `${s.project} → ${newProject}`; - const entry = summary.get(key); - if (entry) entry.count++; - else summary.set(key, { count: 1, firstExample: s.id }); - } - - const rows = Array.from(summary.entries()) - .map(([mapping, v]) => ({ mapping, sessions: v.count, exampleSessionId: v.firstExample })) - .sort((a, b) => b.sessions - a.sessions); - - console.log('\nRemap summary:'); - console.table(rows); - console.log(`\nTotal sessions to remap: ${remaps.length}`); - - if (!APPLY) { - console.log('\nDry-run only. Re-run with --apply to perform UPDATEs.'); - return; - } - - console.log('\nApplying updates in a single transaction...'); - const updateSession = db.prepare('UPDATE sdk_sessions SET project=? WHERE id=?'); - const updateObs = db.prepare('UPDATE observations SET project=? WHERE memory_session_id=?'); - const updateSum = db.prepare('UPDATE session_summaries SET project=? WHERE memory_session_id=?'); - - let sessionUpdates = 0, obsUpdates = 0, sumUpdates = 0; - const tx = db.transaction(() => { - for (const r of remaps) { - sessionUpdates += updateSession.run(r.newProject, r.sessionId).changes; - if (r.memorySessionId) { - obsUpdates += updateObs.run(r.newProject, r.memorySessionId).changes; - sumUpdates += updateSum.run(r.newProject, r.memorySessionId).changes; - } - } - }); - tx(); - - console.log(`Done. sessions=${sessionUpdates} observations=${obsUpdates} session_summaries=${sumUpdates}`); -} - -main();