feat: Implement user prompt syncing to Chroma and enhance timeline querying

- Added `getObservationById` method to retrieve observations by ID in SessionStore.
- Introduced `getSessionSummariesByIds` and `getUserPromptsByIds` methods for fetching session summaries and user prompts by IDs.
- Developed `getTimelineAroundTimestamp` and `getTimelineAroundObservation` methods to provide a unified timeline of observations, sessions, and prompts around a specified anchor point.
- Enhanced ChromaSync to format and sync user prompts, including a new `syncUserPrompt` method.
- Updated WorkerService to sync the latest user prompt to Chroma after updating the worker port.
- Created tests for timeline querying and MCP handler logic to ensure functionality.
- Documented the implementation plan for user prompts and timeline context tool in the Chroma search completion plan.
This commit is contained in:
Alex Newman
2025-11-03 16:55:33 -05:00
parent c6bf72ca72
commit 633f89a5fb
18 changed files with 2152 additions and 229 deletions
+9 -9
View File
@@ -190,10 +190,10 @@ function contextHook(input?: SessionStartInput, useColors: boolean = false, useI
if (timelineObs.length > 0) {
// Legend/Key
if (useColors) {
output.push(`${colors.dim}Legend: 🎯 session-request | 🔴 bugfix | 🟢 feature | 🔵 refactor | change | 🟡 discovery | 🟤 decision${colors.reset}`);
output.push(`${colors.dim}Legend: 🎯 session-request | 🔴 bugfix | 🟣 feature | 🔄 refactor | change | 🔵 discovery | 🧠 decision${colors.reset}`);
output.push('');
} else {
output.push(`**Legend:** 🎯 session-request | 🔴 bugfix | 🟢 feature | 🔵 refactor | change | 🟡 discovery | 🟤 decision`);
output.push(`**Legend:** 🎯 session-request | 🔴 bugfix | 🟣 feature | 🔄 refactor | change | 🔵 discovery | 🧠 decision`);
output.push('');
}
@@ -202,13 +202,13 @@ function contextHook(input?: SessionStartInput, useColors: boolean = false, useI
output.push(`${colors.dim}💡 Progressive Disclosure: This index shows WHAT exists (titles) and retrieval COST (token counts).${colors.reset}`);
output.push(`${colors.dim} → Use MCP search tools to fetch full observation details on-demand (Layer 2)${colors.reset}`);
output.push(`${colors.dim} → Prefer searching observations over re-reading code for past decisions and learnings${colors.reset}`);
output.push(`${colors.dim} → Critical types (🔴 bugfix, 🟤 decision) often worth fetching immediately${colors.reset}`);
output.push(`${colors.dim} → Critical types (🔴 bugfix, 🧠 decision) often worth fetching immediately${colors.reset}`);
output.push('');
} else {
output.push(`💡 **Progressive Disclosure:** This index shows WHAT exists (titles) and retrieval COST (token counts).`);
output.push(`- Use MCP search tools to fetch full observation details on-demand (Layer 2)`);
output.push(`- Prefer searching observations over re-reading code for past decisions and learnings`);
output.push(`- Critical types (🔴 bugfix, 🟤 decision) often worth fetching immediately`);
output.push(`- Critical types (🔴 bugfix, 🧠 decision) often worth fetching immediately`);
output.push('');
}
@@ -340,19 +340,19 @@ function contextHook(input?: SessionStartInput, useColors: boolean = false, useI
icon = '🔴';
break;
case 'feature':
icon = '🟢';
icon = '🟣';
break;
case 'refactor':
icon = '🔵';
icon = '🔄';
break;
case 'change':
icon = '';
icon = '';
break;
case 'discovery':
icon = '🟡';
icon = '🔵';
break;
case 'decision':
icon = '🟤';
icon = '🧠';
break;
default:
icon = '•';
+383 -14
View File
@@ -67,17 +67,29 @@ async function queryChroma(
return { ids: [], distances: [], metadatas: [] };
}
// Extract unique observation IDs from document IDs
// Extract unique IDs from document IDs
const ids: number[] = [];
const docIds = parsed.ids?.[0] || [];
for (const docId of docIds) {
// Extract sqlite_id from document ID (format: obs_{id}_narrative, obs_{id}_fact_0, etc)
const match = docId.match(/obs_(\d+)_/);
if (match) {
const sqliteId = parseInt(match[1], 10);
if (!ids.includes(sqliteId)) {
ids.push(sqliteId);
}
// Extract sqlite_id from document ID (supports three formats):
// - obs_{id}_narrative, obs_{id}_fact_0, etc (observations)
// - summary_{id}_request, summary_{id}_learned, etc (session summaries)
// - prompt_{id} (user prompts)
const obsMatch = docId.match(/obs_(\d+)_/);
const summaryMatch = docId.match(/summary_(\d+)_/);
const promptMatch = docId.match(/prompt_(\d+)/);
let sqliteId: number | null = null;
if (obsMatch) {
sqliteId = parseInt(obsMatch[1], 10);
} else if (summaryMatch) {
sqliteId = parseInt(summaryMatch[1], 10);
} else if (promptMatch) {
sqliteId = parseInt(promptMatch[1], 10);
}
if (sqliteId !== null && !ids.includes(sqliteId)) {
ids.push(sqliteId);
}
}
@@ -285,9 +297,9 @@ function formatSessionResult(session: SessionSummarySearchResult, index: number)
* Format user prompt as index entry (truncated text, date, ID only)
*/
function formatUserPromptIndex(prompt: UserPromptSearchResult, index: number): string {
const truncated = prompt.prompt_text.length > 100
? prompt.prompt_text.substring(0, 100) + '...'
: prompt.prompt_text;
const truncated = prompt.prompt.length > 100
? prompt.prompt.substring(0, 100) + '...'
: prompt.prompt;
const date = new Date(prompt.created_at_epoch).toLocaleString();
return `${index + 1}. "${truncated}"
@@ -303,7 +315,7 @@ function formatUserPromptResult(prompt: UserPromptSearchResult, index: number):
contentParts.push(`## User Prompt #${prompt.prompt_number}`);
contentParts.push(`*Source: claude-mem://user-prompt/${prompt.id}*`);
contentParts.push('');
contentParts.push(prompt.prompt_text);
contentParts.push(prompt.prompt);
contentParts.push('');
contentParts.push('---');
@@ -441,7 +453,44 @@ const tools = [
handler: async (args: any) => {
try {
const { query, format = 'index', ...options } = args;
const results = search.searchSessions(query, options);
let results: SessionSummarySearchResult[] = [];
// Hybrid search: Try Chroma semantic search first, fall back to FTS5
if (chromaClient) {
try {
console.error('[search-server] Using hybrid semantic search for sessions');
// Step 1: Chroma semantic search (top 100)
const chromaResults = await queryChroma(query, 100, { doc_type: 'session_summary' });
console.error(`[search-server] Chroma returned ${chromaResults.ids.length} semantic matches`);
if (chromaResults.ids.length > 0) {
// Step 2: Filter by recency (90 days)
const ninetyDaysAgo = Math.floor(Date.now() / 1000) - (90 * 24 * 60 * 60);
const recentIds = chromaResults.ids.filter((id, idx) => {
const meta = chromaResults.metadatas[idx];
return meta && meta.created_at_epoch > ninetyDaysAgo;
});
console.error(`[search-server] ${recentIds.length} results within 90-day window`);
// Step 3: Hydrate from SQLite in temporal order
if (recentIds.length > 0) {
const limit = options.limit || 20;
results = store.getSessionSummariesByIds(recentIds, { orderBy: 'date_desc', limit });
console.error(`[search-server] Hydrated ${results.length} sessions from SQLite`);
}
}
} catch (chromaError: any) {
console.error('[search-server] Chroma query failed, falling back to FTS5:', chromaError.message);
}
}
// Fall back to FTS5 if Chroma unavailable or returned no results
if (results.length === 0) {
console.error('[search-server] Using FTS5 keyword search');
results = search.searchSessions(query, options);
}
if (results.length === 0) {
return {
@@ -970,7 +1019,44 @@ const tools = [
handler: async (args: any) => {
try {
const { query, format = 'index', ...options } = args;
const results = search.searchUserPrompts(query, options);
let results: UserPromptSearchResult[] = [];
// Hybrid search: Try Chroma semantic search first, fall back to FTS5
if (chromaClient) {
try {
console.error('[search-server] Using hybrid semantic search for user prompts');
// Step 1: Chroma semantic search (top 100)
const chromaResults = await queryChroma(query, 100, { doc_type: 'user_prompt' });
console.error(`[search-server] Chroma returned ${chromaResults.ids.length} semantic matches`);
if (chromaResults.ids.length > 0) {
// Step 2: Filter by recency (90 days)
const ninetyDaysAgo = Math.floor(Date.now() / 1000) - (90 * 24 * 60 * 60);
const recentIds = chromaResults.ids.filter((id, idx) => {
const meta = chromaResults.metadatas[idx];
return meta && meta.created_at_epoch > ninetyDaysAgo;
});
console.error(`[search-server] ${recentIds.length} results within 90-day window`);
// Step 3: Hydrate from SQLite in temporal order
if (recentIds.length > 0) {
const limit = options.limit || 20;
results = store.getUserPromptsByIds(recentIds, { orderBy: 'date_desc', limit });
console.error(`[search-server] Hydrated ${results.length} user prompts from SQLite`);
}
}
} catch (chromaError: any) {
console.error('[search-server] Chroma query failed, falling back to FTS5:', chromaError.message);
}
}
// Fall back to FTS5 if Chroma unavailable or returned no results
if (results.length === 0) {
console.error('[search-server] Using FTS5 keyword search');
results = search.searchUserPrompts(query, options);
}
if (results.length === 0) {
return {
@@ -1008,6 +1094,289 @@ const tools = [
};
}
}
},
{
name: 'get_context_timeline',
description: 'Get a unified timeline of context (observations, sessions, and prompts) around a specific point in time. All record types are interleaved chronologically. Useful for understanding "what was happening when X occurred". Returns depth_before records before anchor + anchor + depth_after records after (total: depth_before + 1 + depth_after mixed records).',
inputSchema: z.object({
anchor: z.union([
z.number().describe('Observation ID to center timeline around'),
z.string().describe('Session ID (format: S123) or ISO timestamp to center timeline around')
]).describe('Anchor point: observation ID, session ID (e.g., "S123"), or ISO timestamp'),
depth_before: z.number().min(0).max(50).default(10).describe('Number of records to retrieve before anchor, not including anchor (default: 10)'),
depth_after: z.number().min(0).max(50).default(10).describe('Number of records to retrieve after anchor, not including anchor (default: 10)'),
project: z.string().optional().describe('Filter by project name')
}),
handler: async (args: any) => {
try {
const { anchor, depth_before = 10, depth_after = 10, project } = args;
let anchorEpoch: number;
let anchorId: string | number = anchor;
// Resolve anchor and get timeline data
let timeline;
if (typeof anchor === 'number') {
// Observation ID - use ID-based boundary detection
const obs = store.getObservationById(anchor);
if (!obs) {
return {
content: [{
type: 'text' as const,
text: `Observation #${anchor} not found`
}],
isError: true
};
}
anchorEpoch = obs.created_at_epoch;
timeline = store.getTimelineAroundObservation(anchor, anchorEpoch, depth_before, depth_after, project);
} else if (typeof anchor === 'string') {
// Session ID or ISO timestamp
if (anchor.startsWith('S') || anchor.startsWith('#S')) {
const sessionId = anchor.replace(/^#?S/, '');
const sessionNum = parseInt(sessionId, 10);
const sessions = store.getSessionSummariesByIds([sessionNum]);
if (sessions.length === 0) {
return {
content: [{
type: 'text' as const,
text: `Session #${sessionNum} not found`
}],
isError: true
};
}
anchorEpoch = sessions[0].created_at_epoch;
anchorId = `S${sessionNum}`;
timeline = store.getTimelineAroundTimestamp(anchorEpoch, depth_before, depth_after, project);
} else {
// ISO timestamp
const date = new Date(anchor);
if (isNaN(date.getTime())) {
return {
content: [{
type: 'text' as const,
text: `Invalid timestamp: ${anchor}`
}],
isError: true
};
}
anchorEpoch = date.getTime(); // Keep as milliseconds
timeline = store.getTimelineAroundTimestamp(anchorEpoch, depth_before, depth_after, project);
}
} else {
return {
content: [{
type: 'text' as const,
text: 'Invalid anchor: must be observation ID (number), session ID (e.g., "S123"), or ISO timestamp'
}],
isError: true
};
}
// Combine and sort all items chronologically
interface TimelineItem {
type: 'observation' | 'session' | 'prompt';
data: any;
epoch: number;
}
const items: TimelineItem[] = [
...timeline.observations.map(obs => ({ type: 'observation' as const, data: obs, epoch: obs.created_at_epoch })),
...timeline.sessions.map(sess => ({ type: 'session' as const, data: sess, epoch: sess.created_at_epoch })),
...timeline.prompts.map(prompt => ({ type: 'prompt' as const, data: prompt, epoch: prompt.created_at_epoch }))
];
items.sort((a, b) => a.epoch - b.epoch);
if (items.length === 0) {
const anchorDate = new Date(anchorEpoch).toLocaleString();
return {
content: [{
type: 'text' as const,
text: `No context found around ${anchorDate} (${depth_before} records before, ${depth_after} records after)`
}]
};
}
// Helper functions matching context-hook.ts
function formatDate(epochMs: number): string {
const date = new Date(epochMs);
return date.toLocaleString('en-US', {
month: 'short',
day: 'numeric',
year: 'numeric'
});
}
function formatTime(epochMs: number): string {
const date = new Date(epochMs);
return date.toLocaleString('en-US', {
hour: 'numeric',
minute: '2-digit',
hour12: true
});
}
function formatDateTime(epochMs: number): string {
const date = new Date(epochMs);
return date.toLocaleString('en-US', {
month: 'short',
day: 'numeric',
hour: 'numeric',
minute: '2-digit',
hour12: true
});
}
function estimateTokens(text: string | null): number {
if (!text) return 0;
return Math.ceil(text.length / 4);
}
// Format results matching context-hook.ts exactly
const lines: string[] = [];
// Header
lines.push(`# Timeline around anchor: ${anchorId}`);
lines.push(`**Window:** ${depth_before} records before → ${depth_after} records after | **Items:** ${items.length} (${timeline.observations.length} obs, ${timeline.sessions.length} sessions, ${timeline.prompts.length} prompts)`);
lines.push('');
// Legend
lines.push(`**Legend:** 🎯 session-request | 🔴 bugfix | 🟣 feature | 🔄 refactor | ✅ change | 🔵 discovery | 🧠 decision`);
lines.push('');
// Group by day
const dayMap = new Map<string, TimelineItem[]>();
for (const item of items) {
const day = formatDate(item.epoch);
if (!dayMap.has(day)) {
dayMap.set(day, []);
}
dayMap.get(day)!.push(item);
}
// Sort days chronologically
const sortedDays = Array.from(dayMap.entries()).sort((a, b) => {
const aDate = new Date(a[0]).getTime();
const bDate = new Date(b[0]).getTime();
return aDate - bDate;
});
// Render each day
for (const [day, dayItems] of sortedDays) {
lines.push(`### ${day}`);
lines.push('');
let currentFile: string | null = null;
let lastTime = '';
let tableOpen = false;
for (const item of dayItems) {
const isAnchor = (
(typeof anchorId === 'number' && item.type === 'observation' && item.data.id === anchorId) ||
(typeof anchorId === 'string' && anchorId.startsWith('S') && item.type === 'session' && `S${item.data.id}` === anchorId)
);
if (item.type === 'session') {
// Close any open table
if (tableOpen) {
lines.push('');
tableOpen = false;
currentFile = null;
lastTime = '';
}
// Render session
const sess = item.data;
const title = sess.request || 'Session summary';
const link = `claude-mem://session-summary/${sess.id}`;
const marker = isAnchor ? ' ← **ANCHOR**' : '';
lines.push(`**🎯 #S${sess.id}** ${title} (${formatDateTime(item.epoch)}) [→](${link})${marker}`);
lines.push('');
} else if (item.type === 'prompt') {
// Close any open table
if (tableOpen) {
lines.push('');
tableOpen = false;
currentFile = null;
lastTime = '';
}
// Render prompt
const prompt = item.data;
const truncated = prompt.prompt.length > 100 ? prompt.prompt.substring(0, 100) + '...' : prompt.prompt;
lines.push(`**💬 User Prompt #${prompt.prompt_number}** (${formatDateTime(item.epoch)})`);
lines.push(`> ${truncated}`);
lines.push('');
} else if (item.type === 'observation') {
// Render observation in table
const obs = item.data;
const file = 'General'; // Simplified for timeline view
// Check if we need a new file section
if (file !== currentFile) {
// Close previous table
if (tableOpen) {
lines.push('');
}
// File header
lines.push(`**${file}**`);
lines.push(`| ID | Time | T | Title | Tokens |`);
lines.push(`|----|------|---|-------|--------|`);
currentFile = file;
tableOpen = true;
lastTime = '';
}
// Map observation type to emoji
let icon = '•';
switch (obs.type) {
case 'bugfix': icon = '🔴'; break;
case 'feature': icon = '🟣'; break;
case 'refactor': icon = '🔄'; break;
case 'change': icon = '✅'; break;
case 'discovery': icon = '🔵'; break;
case 'decision': icon = '🧠'; break;
}
const time = formatTime(item.epoch);
const title = obs.title || 'Untitled';
const tokens = estimateTokens(obs.narrative);
const showTime = time !== lastTime;
const timeDisplay = showTime ? time : '″';
lastTime = time;
const anchorMarker = isAnchor ? ' ← **ANCHOR**' : '';
lines.push(`| #${obs.id} | ${timeDisplay} | ${icon} | ${title}${anchorMarker} | ~${tokens} |`);
}
}
// Close final table if open
if (tableOpen) {
lines.push('');
}
}
return {
content: [{
type: 'text' as const,
text: lines.join('\n')
}]
};
} catch (error: any) {
return {
content: [{
type: 'text' as const,
text: `Timeline query failed: ${error.message}`
}],
isError: true
};
}
}
}
];
+231
View File
@@ -616,6 +616,19 @@ export class SessionStore {
return stmt.all(sdkSessionId) as any[];
}
/**
* Get a single observation by ID
*/
getObservationById(id: number): any | null {
const stmt = this.db.prepare(`
SELECT *
FROM observations
WHERE id = ?
`);
return stmt.get(id) as any || null;
}
/**
* Get observations by array of IDs with ordering and limit
*/
@@ -1115,6 +1128,224 @@ export class SessionStore {
return result.changes;
}
/**
* Get session summaries by IDs (for hybrid Chroma search)
* Returns summaries in specified temporal order
*/
getSessionSummariesByIds(
ids: number[],
options: { orderBy?: 'date_desc' | 'date_asc'; limit?: number } = {}
): any[] {
if (ids.length === 0) return [];
const { orderBy = 'date_desc', limit } = options;
const orderClause = orderBy === 'date_asc' ? 'ASC' : 'DESC';
const limitClause = limit ? `LIMIT ${limit}` : '';
const placeholders = ids.map(() => '?').join(',');
const stmt = this.db.prepare(`
SELECT * FROM session_summaries
WHERE id IN (${placeholders})
ORDER BY created_at_epoch ${orderClause}
${limitClause}
`);
return stmt.all(...ids) as any[];
}
/**
* Get user prompts by IDs (for hybrid Chroma search)
* Returns prompts in specified temporal order
*/
getUserPromptsByIds(
ids: number[],
options: { orderBy?: 'date_desc' | 'date_asc'; limit?: number } = {}
): any[] {
if (ids.length === 0) return [];
const { orderBy = 'date_desc', limit } = options;
const orderClause = orderBy === 'date_asc' ? 'ASC' : 'DESC';
const limitClause = limit ? `LIMIT ${limit}` : '';
const placeholders = ids.map(() => '?').join(',');
const stmt = this.db.prepare(`
SELECT
up.*,
s.project,
s.sdk_session_id
FROM user_prompts up
JOIN sdk_sessions s ON up.claude_session_id = s.claude_session_id
WHERE up.id IN (${placeholders})
ORDER BY up.created_at_epoch ${orderClause}
${limitClause}
`);
return stmt.all(...ids) as any[];
}
/**
* Get a unified timeline of all records (observations, sessions, prompts) around an anchor point
* @param anchorEpoch The anchor timestamp (epoch milliseconds)
* @param depthBefore Number of records to retrieve before anchor (any type)
* @param depthAfter Number of records to retrieve after anchor (any type)
* @param project Optional project filter
* @returns Object containing observations, sessions, and prompts for the specified window
*/
getTimelineAroundTimestamp(
anchorEpoch: number,
depthBefore: number = 10,
depthAfter: number = 10,
project?: string
): {
observations: any[];
sessions: any[];
prompts: any[];
} {
return this.getTimelineAroundObservation(null, anchorEpoch, depthBefore, depthAfter, project);
}
/**
* Get timeline around a specific observation ID
* Uses observation ID offsets to determine time boundaries, then fetches all record types in that window
*/
getTimelineAroundObservation(
anchorObservationId: number | null,
anchorEpoch: number,
depthBefore: number = 10,
depthAfter: number = 10,
project?: string
): {
observations: any[];
sessions: any[];
prompts: any[];
} {
const projectFilter = project ? 'AND project = ?' : '';
const projectParams = project ? [project] : [];
let startEpoch: number;
let endEpoch: number;
if (anchorObservationId !== null) {
// Get boundary observations by ID offset
const beforeQuery = `
SELECT id, created_at_epoch
FROM observations
WHERE id <= ? ${projectFilter}
ORDER BY id DESC
LIMIT ?
`;
const afterQuery = `
SELECT id, created_at_epoch
FROM observations
WHERE id >= ? ${projectFilter}
ORDER BY id ASC
LIMIT ?
`;
try {
const beforeRecords = this.db.prepare(beforeQuery).all(anchorObservationId, ...projectParams, depthBefore + 1) as any[];
const afterRecords = this.db.prepare(afterQuery).all(anchorObservationId, ...projectParams, depthAfter + 1) as any[];
// Get the earliest and latest timestamps from boundary observations
if (beforeRecords.length === 0 && afterRecords.length === 0) {
return { observations: [], sessions: [], prompts: [] };
}
startEpoch = beforeRecords.length > 0 ? beforeRecords[beforeRecords.length - 1].created_at_epoch : anchorEpoch;
endEpoch = afterRecords.length > 0 ? afterRecords[afterRecords.length - 1].created_at_epoch : anchorEpoch;
} catch (err: any) {
console.error('[SessionStore] Error getting boundary observations:', err.message);
return { observations: [], sessions: [], prompts: [] };
}
} else {
// For timestamp-based anchors, use time-based boundaries
// Get observations to find the time window
const beforeQuery = `
SELECT created_at_epoch
FROM observations
WHERE created_at_epoch <= ? ${projectFilter}
ORDER BY created_at_epoch DESC
LIMIT ?
`;
const afterQuery = `
SELECT created_at_epoch
FROM observations
WHERE created_at_epoch >= ? ${projectFilter}
ORDER BY created_at_epoch ASC
LIMIT ?
`;
try {
const beforeRecords = this.db.prepare(beforeQuery).all(anchorEpoch, ...projectParams, depthBefore) as any[];
const afterRecords = this.db.prepare(afterQuery).all(anchorEpoch, ...projectParams, depthAfter + 1) as any[];
if (beforeRecords.length === 0 && afterRecords.length === 0) {
return { observations: [], sessions: [], prompts: [] };
}
startEpoch = beforeRecords.length > 0 ? beforeRecords[beforeRecords.length - 1].created_at_epoch : anchorEpoch;
endEpoch = afterRecords.length > 0 ? afterRecords[afterRecords.length - 1].created_at_epoch : anchorEpoch;
} catch (err: any) {
console.error('[SessionStore] Error getting boundary timestamps:', err.message);
return { observations: [], sessions: [], prompts: [] };
}
}
// Now query ALL record types within the time window
const obsQuery = `
SELECT *
FROM observations
WHERE created_at_epoch >= ? AND created_at_epoch <= ? ${projectFilter}
ORDER BY created_at_epoch ASC
`;
const sessQuery = `
SELECT *
FROM session_summaries
WHERE created_at_epoch >= ? AND created_at_epoch <= ? ${projectFilter}
ORDER BY created_at_epoch ASC
`;
const promptQuery = `
SELECT up.*, s.project, s.sdk_session_id
FROM user_prompts up
JOIN sdk_sessions s ON up.claude_session_id = s.claude_session_id
WHERE up.created_at_epoch >= ? AND up.created_at_epoch <= ? ${projectFilter.replace('project', 's.project')}
ORDER BY up.created_at_epoch ASC
`;
try {
const observations = this.db.prepare(obsQuery).all(startEpoch, endEpoch, ...projectParams) as any[];
const sessions = this.db.prepare(sessQuery).all(startEpoch, endEpoch, ...projectParams) as any[];
const prompts = this.db.prepare(promptQuery).all(startEpoch, endEpoch, ...projectParams) as any[];
return {
observations,
sessions: sessions.map(s => ({
id: s.id,
sdk_session_id: s.sdk_session_id,
project: s.project,
request: s.request,
completed: s.completed,
next_steps: s.next_steps,
created_at: s.created_at,
created_at_epoch: s.created_at_epoch
})),
prompts: prompts.map(p => ({
id: p.id,
claude_session_id: p.claude_session_id,
project: p.project,
prompt: p.prompt_text,
created_at: p.created_at,
created_at_epoch: p.created_at_epoch
}))
};
} catch (err: any) {
console.error('[SessionStore] Error querying timeline records:', err.message);
return { observations: [], sessions: [], prompts: [] };
}
}
/**
* Close the database connection
*/
+247 -10
View File
@@ -55,6 +55,17 @@ interface StoredSummary {
created_at_epoch: number;
}
interface StoredUserPrompt {
id: number;
claude_session_id: string;
prompt_number: number;
prompt_text: string;
created_at: string;
created_at_epoch: number;
sdk_session_id: string;
project: string;
}
export class ChromaSync {
private client: Client | null = null;
private connected: boolean = false;
@@ -404,27 +415,182 @@ export class ChromaSync {
await this.addDocuments(documents);
}
/**
* Format user prompt into Chroma document
* Each prompt becomes a single document (unlike observations/summaries which split by field)
*/
private formatUserPromptDoc(prompt: StoredUserPrompt): ChromaDocument {
return {
id: `prompt_${prompt.id}`,
document: prompt.prompt_text,
metadata: {
sqlite_id: prompt.id,
doc_type: 'user_prompt',
sdk_session_id: prompt.sdk_session_id,
project: prompt.project,
created_at_epoch: prompt.created_at_epoch,
prompt_number: prompt.prompt_number
}
};
}
/**
* Sync a single user prompt to Chroma
* Blocks until sync completes, throws on error
*/
async syncUserPrompt(
promptId: number,
sdkSessionId: string,
project: string,
promptText: string,
promptNumber: number,
createdAtEpoch: number
): Promise<void> {
// Create StoredUserPrompt format
const stored: StoredUserPrompt = {
id: promptId,
claude_session_id: '', // Not needed for Chroma sync
prompt_number: promptNumber,
prompt_text: promptText,
created_at: new Date(createdAtEpoch * 1000).toISOString(),
created_at_epoch: createdAtEpoch,
sdk_session_id: sdkSessionId,
project: project
};
const document = this.formatUserPromptDoc(stored);
logger.info('CHROMA_SYNC', 'Syncing user prompt', {
promptId,
project
});
await this.addDocuments([document]);
}
/**
* Fetch all existing document IDs from Chroma collection
* Returns Sets of SQLite IDs for observations, summaries, and prompts
*/
private async getExistingChromaIds(): Promise<{
observations: Set<number>;
summaries: Set<number>;
prompts: Set<number>;
}> {
await this.ensureConnection();
if (!this.client) {
throw new Error('Chroma client not initialized');
}
const observationIds = new Set<number>();
const summaryIds = new Set<number>();
const promptIds = new Set<number>();
let offset = 0;
const limit = 1000; // Large batches, metadata only = fast
logger.info('CHROMA_SYNC', 'Fetching existing Chroma document IDs...', { project: this.project });
while (true) {
try {
const result = await this.client.callTool({
name: 'chroma_get_documents',
arguments: {
collection_name: this.collectionName,
limit,
offset,
where: { project: this.project }, // Filter by project
include: ['metadatas']
}
});
const data = result.content[0];
if (data.type !== 'text') {
throw new Error('Unexpected response type from chroma_get_documents');
}
const parsed = JSON.parse(data.text);
const metadatas = parsed.metadatas || [];
if (metadatas.length === 0) {
break; // No more documents
}
// Extract SQLite IDs from metadata
for (const meta of metadatas) {
if (meta.sqlite_id) {
if (meta.doc_type === 'observation') {
observationIds.add(meta.sqlite_id);
} else if (meta.doc_type === 'summary') {
summaryIds.add(meta.sqlite_id);
} else if (meta.doc_type === 'prompt') {
promptIds.add(meta.sqlite_id);
}
}
}
offset += limit;
logger.debug('CHROMA_SYNC', 'Fetched batch of existing IDs', {
project: this.project,
offset,
batchSize: metadatas.length
});
} catch (error) {
logger.error('CHROMA_SYNC', 'Failed to fetch existing IDs', { project: this.project }, error as Error);
throw error;
}
}
logger.info('CHROMA_SYNC', 'Existing IDs fetched', {
project: this.project,
observations: observationIds.size,
summaries: summaryIds.size,
prompts: promptIds.size
});
return { observations: observationIds, summaries: summaryIds, prompts: promptIds };
}
/**
* Backfill: Sync all observations missing from Chroma
* Reads from SQLite and syncs in batches
* Throws error if backfill fails
*/
async ensureBackfilled(): Promise<void> {
logger.info('CHROMA_SYNC', 'Starting backfill', { project: this.project });
logger.info('CHROMA_SYNC', 'Starting smart backfill', { project: this.project });
await this.ensureCollection();
// Fetch existing IDs from Chroma (fast, metadata only)
const existing = await this.getExistingChromaIds();
const db = new SessionStore();
try {
// Get all observations for this project
// Build exclusion list for observations
const existingObsIds = Array.from(existing.observations);
const obsExclusionClause = existingObsIds.length > 0
? `AND id NOT IN (${existingObsIds.join(',')})`
: '';
// Get only observations missing from Chroma
const observations = db.db.prepare(`
SELECT * FROM observations WHERE project = ? ORDER BY id ASC
SELECT * FROM observations
WHERE project = ? ${obsExclusionClause}
ORDER BY id ASC
`).all(this.project) as StoredObservation[];
const totalObsCount = db.db.prepare(`
SELECT COUNT(*) as count FROM observations WHERE project = ?
`).get(this.project) as { count: number };
logger.info('CHROMA_SYNC', 'Backfilling observations', {
project: this.project,
count: observations.length
missing: observations.length,
existing: existing.observations.size,
total: totalObsCount.count
});
// Format all observation documents
@@ -444,14 +610,28 @@ export class ChromaSync {
});
}
// Get all summaries for this project
// Build exclusion list for summaries
const existingSummaryIds = Array.from(existing.summaries);
const summaryExclusionClause = existingSummaryIds.length > 0
? `AND id NOT IN (${existingSummaryIds.join(',')})`
: '';
// Get only summaries missing from Chroma
const summaries = db.db.prepare(`
SELECT * FROM session_summaries WHERE project = ? ORDER BY id ASC
SELECT * FROM session_summaries
WHERE project = ? ${summaryExclusionClause}
ORDER BY id ASC
`).all(this.project) as StoredSummary[];
const totalSummaryCount = db.db.prepare(`
SELECT COUNT(*) as count FROM session_summaries WHERE project = ?
`).get(this.project) as { count: number };
logger.info('CHROMA_SYNC', 'Backfilling summaries', {
project: this.project,
count: summaries.length
missing: summaries.length,
existing: existing.summaries.size,
total: totalSummaryCount.count
});
// Format all summary documents
@@ -471,10 +651,67 @@ export class ChromaSync {
});
}
logger.info('CHROMA_SYNC', 'Backfill complete', {
// Build exclusion list for prompts
const existingPromptIds = Array.from(existing.prompts);
const promptExclusionClause = existingPromptIds.length > 0
? `AND up.id NOT IN (${existingPromptIds.join(',')})`
: '';
// Get only user prompts missing from Chroma
const prompts = db.db.prepare(`
SELECT
up.*,
s.project,
s.sdk_session_id
FROM user_prompts up
JOIN sdk_sessions s ON up.claude_session_id = s.claude_session_id
WHERE s.project = ? ${promptExclusionClause}
ORDER BY up.id ASC
`).all(this.project) as StoredUserPrompt[];
const totalPromptCount = db.db.prepare(`
SELECT COUNT(*) as count
FROM user_prompts up
JOIN sdk_sessions s ON up.claude_session_id = s.claude_session_id
WHERE s.project = ?
`).get(this.project) as { count: number };
logger.info('CHROMA_SYNC', 'Backfilling user prompts', {
project: this.project,
observationDocs: allDocs.length,
summaryDocs: summaryDocs.length
missing: prompts.length,
existing: existing.prompts.size,
total: totalPromptCount.count
});
// Format all prompt documents
const promptDocs: ChromaDocument[] = [];
for (const prompt of prompts) {
promptDocs.push(this.formatUserPromptDoc(prompt));
}
// Sync in batches
for (let i = 0; i < promptDocs.length; i += this.BATCH_SIZE) {
const batch = promptDocs.slice(i, i + this.BATCH_SIZE);
await this.addDocuments(batch);
logger.info('CHROMA_SYNC', 'Backfill progress', {
project: this.project,
progress: `${Math.min(i + this.BATCH_SIZE, promptDocs.length)}/${promptDocs.length}`
});
}
logger.info('CHROMA_SYNC', 'Smart backfill complete', {
project: this.project,
synced: {
observationDocs: allDocs.length,
summaryDocs: summaryDocs.length,
promptDocs: promptDocs.length
},
skipped: {
observations: existing.observations.size,
summaries: existing.summaries.size,
prompts: existing.prompts.size
}
});
} catch (error) {
+29
View File
@@ -195,8 +195,37 @@ class WorkerService {
// Update port in database
db.setWorkerPort(sessionDbId, this.port!);
// Get the latest user_prompt for this session to sync to Chroma
const latestPrompt = db.db.prepare(`
SELECT
up.*,
s.sdk_session_id,
s.project
FROM user_prompts up
JOIN sdk_sessions s ON up.claude_session_id = s.claude_session_id
WHERE up.claude_session_id = ?
ORDER BY up.created_at_epoch DESC
LIMIT 1
`).get(claudeSessionId) as any;
db.close();
// Sync user prompt to Chroma (fire-and-forget, but crash on failure)
if (latestPrompt) {
this.chromaSync.syncUserPrompt(
latestPrompt.id,
latestPrompt.sdk_session_id,
latestPrompt.project,
latestPrompt.prompt_text,
latestPrompt.prompt_number,
latestPrompt.created_at_epoch
).catch(err => {
logger.failure('WORKER', 'Failed to sync user_prompt to Chroma', { promptId: latestPrompt.id }, err);
process.exit(1); // Fail fast - Chroma sync is critical
});
}
// Start SDK agent in background
session.generatorPromise = this.runSDKAgent(session).catch(err => {
logger.failure('WORKER', 'SDK agent error', { sessionId: sessionDbId }, err);