Files
claude-mem/experiment/chroma-sync-experiment.ts
T
Alex Newman 633f89a5fb feat: Implement user prompt syncing to Chroma and enhance timeline querying
- Added `getObservationById` method to retrieve observations by ID in SessionStore.
- Introduced `getSessionSummariesByIds` and `getUserPromptsByIds` methods for fetching session summaries and user prompts by IDs.
- Developed `getTimelineAroundTimestamp` and `getTimelineAroundObservation` methods to provide a unified timeline of observations, sessions, and prompts around a specified anchor point.
- Enhanced ChromaSync to format and sync user prompts, including a new `syncUserPrompt` method.
- Updated WorkerService to sync the latest user prompt to Chroma after updating the worker port.
- Created tests for timeline querying and MCP handler logic to ensure functionality.
- Documented the implementation plan for user prompts and timeline context tool in the Chroma search completion plan.
2025-11-03 16:55:33 -05:00

381 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* Chroma MCP Sync Experiment
*
* This script tests syncing SQLite observations/summaries to ChromaDB
* via the existing Chroma MCP server (uvx chroma-mcp).
*
* NO PRODUCTION CODE CHANGES - Pure experiment.
*/
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
import { SessionStore } from '../src/services/sqlite/SessionStore.js';
import path from 'path';
import os from 'os';
interface ChromaDocument {
id: string;
document: string;
metadata: Record<string, string | number>;
}
async function main() {
console.log('🧪 Chroma MCP Sync Experiment\n');
// Initialize MCP client to Chroma server
console.log('📡 Connecting to Chroma MCP server...');
const transport = new StdioClientTransport({
command: 'uvx',
args: [
'chroma-mcp',
'--client-type', 'persistent',
'--data-dir', path.join(os.homedir(), '.claude-mem', 'vector-db')
]
});
const client = new Client({
name: 'chroma-sync-experiment',
version: '1.0.0'
}, {
capabilities: {}
});
await client.connect(transport);
console.log('✅ Connected to Chroma MCP\n');
// List available tools
const { tools } = await client.listTools();
console.log('🔧 Available MCP tools:');
tools.forEach(tool => console.log(` - ${tool.name}`));
console.log();
// Initialize SessionStore to read SQLite data
const dbPath = path.join(os.homedir(), '.claude-mem', 'claude-mem.db');
const store = new SessionStore();
// Get project name (for collection naming)
const project = 'claude-mem';
const collectionName = `cm__${project}`;
console.log(`🗑️ Deleting existing collection: ${collectionName}`);
try {
await client.callTool({
name: 'chroma_delete_collection',
arguments: {
collection_name: collectionName
}
});
console.log('✅ Collection deleted\n');
} catch (error) {
console.log('️ Collection does not exist (first run)\n');
}
console.log(`📚 Creating collection: ${collectionName}`);
// Create collection via MCP
const createResult = await client.callTool({
name: 'chroma_create_collection',
arguments: {
collection_name: collectionName,
embedding_function_name: 'default'
}
});
console.log('✅ Collection created:', createResult.content[0]);
console.log();
// Fetch observations from SQLite using raw query
console.log('📖 Reading observations from SQLite...');
const observations = store.db.prepare(`
SELECT * FROM observations WHERE project = ? ORDER BY created_at_epoch DESC
`).all(project) as any[];
console.log(`Found ${observations.length} observations\n`);
// Prepare documents for Chroma - each semantic chunk is its own document
const documents: ChromaDocument[] = [];
for (const obs of observations) {
// Parse JSON fields
const facts = obs.facts ? JSON.parse(obs.facts) : [];
const concepts = obs.concepts ? JSON.parse(obs.concepts) : [];
const files_read = obs.files_read ? JSON.parse(obs.files_read) : [];
const files_modified = obs.files_modified ? JSON.parse(obs.files_modified) : [];
const baseMetadata = {
sqlite_id: obs.id,
doc_type: 'observation',
sdk_session_id: obs.sdk_session_id,
project: obs.project,
created_at_epoch: obs.created_at_epoch,
type: obs.type || 'discovery',
title: obs.title || 'Untitled',
...(obs.subtitle && { subtitle: obs.subtitle }),
...(concepts.length && { concepts: concepts.join(',') }),
...(files_read.length && { files_read: files_read.join(',') }),
...(files_modified.length && { files_modified: files_modified.join(',') })
};
// Narrative as separate document
if (obs.narrative) {
documents.push({
id: `obs_${obs.id}_narrative`,
document: obs.narrative,
metadata: { ...baseMetadata, field_type: 'narrative' }
});
}
// Text as separate document
if (obs.text) {
documents.push({
id: `obs_${obs.id}_text`,
document: obs.text,
metadata: { ...baseMetadata, field_type: 'text' }
});
}
// Each fact as separate document
facts.forEach((fact: string, index: number) => {
documents.push({
id: `obs_${obs.id}_fact_${index}`,
document: fact,
metadata: { ...baseMetadata, field_type: 'fact', fact_index: index }
});
});
}
console.log(`Created ${documents.length} observation field documents (narratives, texts, facts)\n`);
// Sync in batches of 100
console.log('⬆️ Syncing observation fields to ChromaDB...');
const batchSize = 100;
const totalBatches = Math.ceil(documents.length / batchSize);
const startTime = Date.now();
for (let i = 0; i < documents.length; i += batchSize) {
const batch = documents.slice(i, i + batchSize);
const batchNumber = Math.floor(i / batchSize) + 1;
const progress = Math.round((batchNumber / totalBatches) * 100);
const docsProcessed = Math.min(i + batchSize, documents.length);
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
process.stdout.write(` [${batchNumber}/${totalBatches}] ${progress}% - Syncing docs ${i + 1}-${docsProcessed}/${documents.length} (${elapsed}s elapsed)...`);
await client.callTool({
name: 'chroma_add_documents',
arguments: {
collection_name: collectionName,
documents: batch.map(d => d.document),
ids: batch.map(d => d.id),
metadatas: batch.map(d => d.metadata)
}
});
console.log(' ✓');
}
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`✅ Synced ${documents.length} observation documents in ${totalTime}s\n`);
// Fetch session summaries
console.log('📖 Reading session summaries from SQLite...');
const summaries = store.db.prepare(`
SELECT * FROM session_summaries WHERE project = ? ORDER BY created_at_epoch DESC LIMIT 100
`).all(project) as any[];
console.log(`Found ${summaries.length} session summaries`);
// Prepare session documents - each field is its own document
const sessionDocs: ChromaDocument[] = [];
for (const summary of summaries) {
const baseMetadata = {
sqlite_id: summary.id,
doc_type: 'session_summary',
sdk_session_id: summary.sdk_session_id,
project: summary.project,
created_at_epoch: summary.created_at_epoch,
prompt_number: summary.prompt_number || 0
};
// Each field becomes a separate document
if (summary.request) {
sessionDocs.push({
id: `summary_${summary.id}_request`,
document: summary.request,
metadata: { ...baseMetadata, field_type: 'request' }
});
}
if (summary.investigated) {
sessionDocs.push({
id: `summary_${summary.id}_investigated`,
document: summary.investigated,
metadata: { ...baseMetadata, field_type: 'investigated' }
});
}
if (summary.learned) {
sessionDocs.push({
id: `summary_${summary.id}_learned`,
document: summary.learned,
metadata: { ...baseMetadata, field_type: 'learned' }
});
}
if (summary.completed) {
sessionDocs.push({
id: `summary_${summary.id}_completed`,
document: summary.completed,
metadata: { ...baseMetadata, field_type: 'completed' }
});
}
if (summary.next_steps) {
sessionDocs.push({
id: `summary_${summary.id}_next_steps`,
document: summary.next_steps,
metadata: { ...baseMetadata, field_type: 'next_steps' }
});
}
if (summary.notes) {
sessionDocs.push({
id: `summary_${summary.id}_notes`,
document: summary.notes,
metadata: { ...baseMetadata, field_type: 'notes' }
});
}
}
console.log(`Created ${sessionDocs.length} session field documents\n`);
// Sync sessions
console.log('⬆️ Syncing session fields to ChromaDB...');
const sessionBatches = Math.ceil(sessionDocs.length / batchSize);
const sessionStartTime = Date.now();
for (let i = 0; i < sessionDocs.length; i += batchSize) {
const batch = sessionDocs.slice(i, i + batchSize);
const batchNumber = Math.floor(i / batchSize) + 1;
const progress = Math.round((batchNumber / sessionBatches) * 100);
const docsProcessed = Math.min(i + batchSize, sessionDocs.length);
const elapsed = ((Date.now() - sessionStartTime) / 1000).toFixed(1);
process.stdout.write(` [${batchNumber}/${sessionBatches}] ${progress}% - Syncing docs ${i + 1}-${docsProcessed}/${sessionDocs.length} (${elapsed}s elapsed)...`);
await client.callTool({
name: 'chroma_add_documents',
arguments: {
collection_name: collectionName,
documents: batch.map(d => d.document),
ids: batch.map(d => d.id),
metadatas: batch.map(d => d.metadata)
}
});
console.log(' ✓');
}
const sessionTotalTime = ((Date.now() - sessionStartTime) / 1000).toFixed(1);
console.log(`✅ Synced ${sessionDocs.length} session documents in ${sessionTotalTime}s\n`);
// Fetch user prompts
console.log('📖 Reading user prompts from SQLite...');
const prompts = store.db.prepare(`
SELECT
up.*,
s.project,
s.sdk_session_id
FROM user_prompts up
JOIN sdk_sessions s ON up.claude_session_id = s.claude_session_id
WHERE s.project = ?
ORDER BY up.created_at_epoch DESC
LIMIT 1000
`).all(project) as any[];
console.log(`Found ${prompts.length} user prompts`);
// Prepare prompt documents - one document per prompt
const promptDocs: ChromaDocument[] = [];
for (const prompt of prompts) {
promptDocs.push({
id: `prompt_${prompt.id}`,
document: prompt.prompt_text,
metadata: {
sqlite_id: prompt.id,
doc_type: 'user_prompt',
sdk_session_id: prompt.sdk_session_id,
project: prompt.project,
created_at_epoch: prompt.created_at_epoch,
prompt_number: prompt.prompt_number || 0
}
});
}
console.log(`Created ${promptDocs.length} user prompt documents\n`);
// Sync prompts in batches
console.log('⬆️ Syncing user prompts to ChromaDB...');
const promptBatches = Math.ceil(promptDocs.length / batchSize);
const promptStartTime = Date.now();
for (let i = 0; i < promptDocs.length; i += batchSize) {
const batch = promptDocs.slice(i, i + batchSize);
const batchNumber = Math.floor(i / batchSize) + 1;
const progress = Math.round((batchNumber / promptBatches) * 100);
const docsProcessed = Math.min(i + batchSize, promptDocs.length);
const elapsed = ((Date.now() - promptStartTime) / 1000).toFixed(1);
process.stdout.write(` [${batchNumber}/${promptBatches}] ${progress}% - Syncing docs ${i + 1}-${docsProcessed}/${promptDocs.length} (${elapsed}s elapsed)...`);
await client.callTool({
name: 'chroma_add_documents',
arguments: {
collection_name: collectionName,
documents: batch.map(d => d.document),
ids: batch.map(d => d.id),
metadatas: batch.map(d => d.metadata)
}
});
console.log(' ✓');
}
const promptTotalTime = ((Date.now() - promptStartTime) / 1000).toFixed(1);
console.log(`✅ Synced ${promptDocs.length} user prompt documents in ${promptTotalTime}s\n`);
// Get collection info
const infoResult = await client.callTool({
name: 'chroma_get_collection_info',
arguments: {
collection_name: collectionName
}
});
console.log('📊 Collection Info:');
console.log(infoResult.content[0]);
console.log();
// Get count
const countResult = await client.callTool({
name: 'chroma_get_collection_count',
arguments: {
collection_name: collectionName
}
});
console.log('📊 Total Documents:', countResult.content[0]);
console.log();
console.log('✅ Sync experiment complete!\n');
console.log('Next: Run chroma-search-test.ts to test semantic search');
await client.close();
}
main().catch(error => {
console.error('❌ Experiment failed:', error);
process.exit(1);
});