feat: replace WASM embeddings with persistent chroma-mcp MCP connection (#1176)
* feat: replace WASM embeddings with persistent chroma-mcp MCP connection Replace ChromaServerManager (npx chroma run + chromadb npm + ONNX/WASM) with ChromaMcpManager, a singleton stdio MCP client that communicates with chroma-mcp via uvx. This eliminates native binary issues, segfaults, and WASM embedding failures that plagued cross-platform installs. Key changes: - Add ChromaMcpManager: singleton MCP client with lazy connect, auto-reconnect, connection lock, and Zscaler SSL cert support - Rewrite ChromaSync to use MCP tool calls instead of chromadb npm client - Handle chroma-mcp's non-JSON responses (plain text success/error messages) - Treat "collection already exists" as idempotent success - Wire ChromaMcpManager into GracefulShutdown for clean subprocess teardown - Delete ChromaServerManager (no longer needed) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address PR review — connection guard leak, timer leak, async reset - Clear connecting guard in finally block to prevent permanent reconnection block - Clear timeout after successful connection to prevent timer leak - Make reset() async to await stop() before nullifying instance - Delete obsolete chroma-server-manager test (imports deleted class) - Update graceful-shutdown test to use chromaMcpManager property name Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: prevent chroma-mcp spawn storm — zombie cleanup, stale onclose guard, reconnect backoff Three bugs caused chroma-mcp processes to accumulate (92+ observed): 1. Zombie on timeout: failed connections left subprocess alive because only the timer was cleared, not the transport. Now catch block explicitly closes transport+client before rethrowing. 2. Stale onclose race: old transport's onclose handler captured `this` and overwrote the current connection reference after reconnect, orphaning the new subprocess. Now guarded with reference check. 3. No backoff: every failure triggered immediate reconnect. With backfill doing hundreds of MCP calls, this created rapid-fire spawning. Added 10s backoff on both connection failure and unexpected process death. Also includes ChromaSync fixes from PR review: - queryChroma deduplication now preserves index-aligned arrays - SQL injection guard on backfill ID exclusion lists Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -18,7 +18,7 @@ import { HOOK_TIMEOUTS } from '../shared/hook-constants.js';
|
||||
import { SettingsDefaultsManager } from '../shared/SettingsDefaultsManager.js';
|
||||
import { getAuthMethodDescription } from '../shared/EnvManager.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { ChromaServerManager } from './sync/ChromaServerManager.js';
|
||||
import { ChromaMcpManager } from './sync/ChromaMcpManager.js';
|
||||
import { ChromaSync } from './sync/ChromaSync.js';
|
||||
|
||||
// Windows: avoid repeated spawn popups when startup fails (issue #921)
|
||||
@@ -166,8 +166,8 @@ export class WorkerService {
|
||||
// Route handlers
|
||||
private searchRoutes: SearchRoutes | null = null;
|
||||
|
||||
// Chroma server (local mode)
|
||||
private chromaServer: ChromaServerManager | null = null;
|
||||
// Chroma MCP manager (lazy - connects on first use)
|
||||
private chromaMcpManager: ChromaMcpManager | null = null;
|
||||
|
||||
// Initialization tracking
|
||||
private initializationComplete: Promise<void>;
|
||||
@@ -373,31 +373,12 @@ export class WorkerService {
|
||||
const { ModeManager } = await import('./domain/ModeManager.js');
|
||||
const { SettingsDefaultsManager } = await import('../shared/SettingsDefaultsManager.js');
|
||||
const { USER_SETTINGS_PATH } = await import('../shared/paths.js');
|
||||
const os = await import('os');
|
||||
|
||||
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
|
||||
|
||||
// Start Chroma server if in local mode
|
||||
const chromaMode = settings.CLAUDE_MEM_CHROMA_MODE || 'local';
|
||||
if (chromaMode === 'local') {
|
||||
logger.info('SYSTEM', 'Starting local Chroma server...');
|
||||
this.chromaServer = ChromaServerManager.getInstance({
|
||||
dataDir: path.join(os.homedir(), '.claude-mem', 'vector-db'),
|
||||
host: settings.CLAUDE_MEM_CHROMA_HOST || '127.0.0.1',
|
||||
port: parseInt(settings.CLAUDE_MEM_CHROMA_PORT || '8000', 10)
|
||||
});
|
||||
|
||||
const ready = await this.chromaServer.start(60000);
|
||||
|
||||
if (ready) {
|
||||
logger.success('SYSTEM', 'Chroma server ready');
|
||||
} else {
|
||||
logger.warn('SYSTEM', 'Chroma server failed to start - vector search disabled');
|
||||
this.chromaServer = null;
|
||||
}
|
||||
} else {
|
||||
logger.info('SYSTEM', 'Chroma remote mode - skipping local server');
|
||||
}
|
||||
// Initialize ChromaMcpManager (lazy - connects on first use via ChromaSync)
|
||||
this.chromaMcpManager = ChromaMcpManager.getInstance();
|
||||
logger.info('SYSTEM', 'ChromaMcpManager initialized (lazy - connects on first use)');
|
||||
|
||||
const modeId = settings.CLAUDE_MEM_MODE;
|
||||
ModeManager.getInstance().loadMode(modeId);
|
||||
@@ -428,7 +409,7 @@ export class WorkerService {
|
||||
logger.info('WORKER', 'SearchManager initialized and search routes registered');
|
||||
|
||||
// Auto-backfill Chroma for all projects if out of sync with SQLite (fire-and-forget)
|
||||
if (this.chromaServer !== null || chromaMode !== 'local') {
|
||||
if (this.chromaMcpManager) {
|
||||
ChromaSync.backfillAllProjects().then(() => {
|
||||
logger.info('CHROMA_SYNC', 'Backfill check complete for all projects');
|
||||
}).catch(error => {
|
||||
@@ -855,7 +836,7 @@ export class WorkerService {
|
||||
sessionManager: this.sessionManager,
|
||||
mcpClient: this.mcpClient,
|
||||
dbManager: this.dbManager,
|
||||
chromaServer: this.chromaServer || undefined
|
||||
chromaMcpManager: this.chromaMcpManager || undefined
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user