Merge pull request #1122 from thedotmack/claude/friendly-pascal

fix: resolve orphaned subprocesses and Chroma HTTP regressions
2026-02-15 23:31:10 -05:00
parent 685d54f2cb 055888e181
commit 2dea824cc0
6 changed files with 299 additions and 257 deletions
@@ -97,8 +97,8 @@
  },
  "dependencies": {
    "@anthropic-ai/claude-agent-sdk": "^0.1.76",
    "@chroma-core/default-embed": "^0.1.9",
    "@modelcontextprotocol/sdk": "^1.25.1",
    "@chroma-core/default-embed": "^0.1.9",
    "ansi-to-html": "^0.7.2",
    "chromadb": "^3.2.2",
    "dompurify": "^3.3.1",
@@ -11,7 +11,7 @@
 import { spawn, ChildProcess, execSync } from 'child_process';
 import path from 'path';
 import os from 'os';
-import fs from 'fs';
+import fs, { existsSync } from 'fs';
 import { logger } from '../../utils/logger.js';
 export interface ChromaServerConfig {
@@ -108,14 +108,35 @@ export class ChromaServerManager {
    // Cross-platform: use npx.cmd on Windows
    const isWindows = process.platform === 'win32';
    const command = isWindows ? 'npx.cmd' : 'npx';
-    const args = [
+    // Resolve chroma binary absolutely — npx fails when spawned from cache dirs (#1120)
-      'chroma', 'run',
+    let command: string;
-      '--path', this.config.dataDir,
+    let args: string[];
-      '--host', this.config.host,
+    try {
-      '--port', String(this.config.port)
+      // chromadb package installs a 'chroma' bin entry
-    ];
+      const chromaBinDir = path.dirname(require.resolve('chromadb/package.json'));
      // Check project-level .bin first (most common npm/bun installation layout)
      const projectBin = path.join(chromaBinDir, '..', '.bin', isWindows ? 'chroma.cmd' : 'chroma');
      // Fallback: nested node_modules .bin (rare — pnpm or workspace hoisting)
      const nestedBin = path.join(chromaBinDir, 'node_modules', '.bin', isWindows ? 'chroma.cmd' : 'chroma');
      if (existsSync(projectBin)) {
        command = projectBin;
      } else if (existsSync(nestedBin)) {
        command = nestedBin;
      } else {
        // Last resort: npx with explicit cwd
        command = isWindows ? 'npx.cmd' : 'npx';
      }
    } catch {
      command = isWindows ? 'npx.cmd' : 'npx';
    }
    if (command.includes('npx')) {
      args = ['chroma', 'run', '--path', this.config.dataDir, '--host', this.config.host, '--port', String(this.config.port)];
    } else {
      args = ['run', '--path', this.config.dataDir, '--host', this.config.host, '--port', String(this.config.port)];
    }
    logger.info('CHROMA_SERVER', 'Starting Chroma server', {
      command,
@@ -125,11 +146,20 @@ export class ChromaServerManager {
    const spawnEnv = this.getSpawnEnv();
    // Resolve cwd for npx fallback — ensures node_modules is findable (#1120)
    let spawnCwd: string | undefined;
    try {
      spawnCwd = path.dirname(require.resolve('chromadb/package.json'));
    } catch {
      // If chromadb isn't resolvable, omit cwd and let npx handle it
    }
    this.serverProcess = spawn(command, args, {
      stdio: ['ignore', 'pipe', 'pipe'],
      detached: !isWindows,  // Don't detach on Windows (no process groups)
      windowsHide: true,     // Hide console window on Windows
-      env: spawnEnv
+      env: spawnEnv,
      ...(spawnCwd && { cwd: spawnCwd })
    });
    // Log server output for debugging
@@ -189,17 +189,20 @@ export class ChromaSync {
    }
    try {
-      // getOrCreateCollection handles both cases
+      // Use WASM backend to avoid native ONNX binary issues (#1104, #1105, #1110).
-      // Lazy-load DefaultEmbeddingFunction to avoid eagerly pulling in
+      // Same model (all-MiniLM-L6-v2), same embeddings, but runs in WASM —
-      // @huggingface/transformers → sharp native binaries at bundle startup
+      // no native binary loading, no segfaults, no ENOENT errors.
      const { DefaultEmbeddingFunction } = await import('@chroma-core/default-embed');
-      const embeddingFunction = new DefaultEmbeddingFunction();
+      const embeddingFunction = new DefaultEmbeddingFunction({ wasm: true });
      this.collection = await this.chromaClient.getOrCreateCollection({
        name: this.collectionName,
        embeddingFunction
      });
-      logger.debug('CHROMA_SYNC', 'Collection ready', { collection: this.collectionName });
+      logger.debug('CHROMA_SYNC', 'Collection ready', {
        collection: this.collectionName
      });
    } catch (error) {
      logger.error('CHROMA_SYNC', 'Failed to get/create collection', { collection: this.collectionName }, error as Error);
      throw new Error(`Collection setup failed: ${error instanceof Error ? error.message : String(error)}`);
@@ -141,134 +141,143 @@ export class SDKAgent {
      }
    });
-    // Process SDK messages
+    // Process SDK messages — cleanup in finally ensures subprocess termination
-    for await (const message of queryResult) {
+    // even if the loop throws (e.g., context overflow, invalid API key)
-      // Capture or update memory session ID from SDK message
+    try {
-      // IMPORTANT: The SDK may return a DIFFERENT session_id on resume than what we sent!
+      for await (const message of queryResult) {
-      // We must always sync the DB to match what the SDK actually uses.
+        // Capture or update memory session ID from SDK message
-      //
+        // IMPORTANT: The SDK may return a DIFFERENT session_id on resume than what we sent!
-      // MULTI-TERMINAL COLLISION FIX (FK constraint bug):
+        // We must always sync the DB to match what the SDK actually uses.
-      // Use ensureMemorySessionIdRegistered() instead of updateMemorySessionId() because:
+        //
-      // 1. It's idempotent - safe to call multiple times
+        // MULTI-TERMINAL COLLISION FIX (FK constraint bug):
-      // 2. It verifies the update happened (SELECT before UPDATE)
+        // Use ensureMemorySessionIdRegistered() instead of updateMemorySessionId() because:
-      // 3. Consistent with ResponseProcessor's usage pattern
+        // 1. It's idempotent - safe to call multiple times
-      // This ensures FK constraint compliance BEFORE any observations are stored.
+        // 2. It verifies the update happened (SELECT before UPDATE)
-      if (message.session_id && message.session_id !== session.memorySessionId) {
+        // 3. Consistent with ResponseProcessor's usage pattern
-        const previousId = session.memorySessionId;
+        // This ensures FK constraint compliance BEFORE any observations are stored.
-        session.memorySessionId = message.session_id;
+        if (message.session_id && message.session_id !== session.memorySessionId) {
-        // Persist to database IMMEDIATELY for FK constraint compliance
+          const previousId = session.memorySessionId;
-        // This must happen BEFORE any observations referencing this ID are stored
+          session.memorySessionId = message.session_id;
-        this.dbManager.getSessionStore().ensureMemorySessionIdRegistered(
+          // Persist to database IMMEDIATELY for FK constraint compliance
-          session.sessionDbId,
+          // This must happen BEFORE any observations referencing this ID are stored
-          message.session_id
+          this.dbManager.getSessionStore().ensureMemorySessionIdRegistered(
-        );
+            session.sessionDbId,
-        // Verify the update by reading back from DB
+            message.session_id
-        const verification = this.dbManager.getSessionStore().getSessionById(session.sessionDbId);
+          );
-        const dbVerified = verification?.memory_session_id === message.session_id;
+          // Verify the update by reading back from DB
-        const logMessage = previousId
+          const verification = this.dbManager.getSessionStore().getSessionById(session.sessionDbId);
-          ? `MEMORY_ID_CHANGED | sessionDbId=${session.sessionDbId} | from=${previousId} | to=${message.session_id} | dbVerified=${dbVerified}`
+          const dbVerified = verification?.memory_session_id === message.session_id;
-          : `MEMORY_ID_CAPTURED | sessionDbId=${session.sessionDbId} | memorySessionId=${message.session_id} | dbVerified=${dbVerified}`;
+          const logMessage = previousId
-        logger.info('SESSION', logMessage, {
+            ? `MEMORY_ID_CHANGED | sessionDbId=${session.sessionDbId} | from=${previousId} | to=${message.session_id} | dbVerified=${dbVerified}`
-          sessionId: session.sessionDbId,
+            : `MEMORY_ID_CAPTURED | sessionDbId=${session.sessionDbId} | memorySessionId=${message.session_id} | dbVerified=${dbVerified}`;
-          memorySessionId: message.session_id,
+          logger.info('SESSION', logMessage, {
-          previousId
+            sessionId: session.sessionDbId,
-        });
+            memorySessionId: message.session_id,
-        if (!dbVerified) {
+            previousId
          logger.error('SESSION', `MEMORY_ID_MISMATCH | sessionDbId=${session.sessionDbId} | expected=${message.session_id} | got=${verification?.memory_session_id}`, {
            sessionId: session.sessionDbId
          });
-        }
+          if (!dbVerified) {
-        // Debug-level alignment log for detailed tracing
+            logger.error('SESSION', `MEMORY_ID_MISMATCH | sessionDbId=${session.sessionDbId} | expected=${message.session_id} | got=${verification?.memory_session_id}`, {
-        logger.debug('SDK', `[ALIGNMENT] ${previousId ? 'Updated' : 'Captured'} | contentSessionId=${session.contentSessionId} → memorySessionId=${message.session_id} | Future prompts will resume with this ID`);
+              sessionId: session.sessionDbId
-      }
+            });
-
+          }
-      // Handle assistant messages
+          // Debug-level alignment log for detailed tracing
-      if (message.type === 'assistant') {
+          logger.debug('SDK', `[ALIGNMENT] ${previousId ? 'Updated' : 'Captured'} | contentSessionId=${session.contentSessionId} → memorySessionId=${message.session_id} | Future prompts will resume with this ID`);
        const content = message.message.content;
        const textContent = Array.isArray(content)
          ? content.filter((c: any) => c.type === 'text').map((c: any) => c.text).join('\n')
          : typeof content === 'string' ? content : '';
        // Check for context overflow - prevents infinite retry loops
        if (textContent.includes('prompt is too long') ||
            textContent.includes('context window')) {
          logger.error('SDK', 'Context overflow detected - terminating session');
          session.abortController.abort();
          return;
        }
-        const responseSize = textContent.length;
+        // Handle assistant messages
        if (message.type === 'assistant') {
          const content = message.message.content;
          const textContent = Array.isArray(content)
            ? content.filter((c: any) => c.type === 'text').map((c: any) => c.text).join('\n')
            : typeof content === 'string' ? content : '';
-        // Capture token state BEFORE updating (for delta calculation)
+          // Check for context overflow - prevents infinite retry loops
-        const tokensBeforeResponse = session.cumulativeInputTokens + session.cumulativeOutputTokens;
+          if (textContent.includes('prompt is too long') ||
-
+              textContent.includes('context window')) {
-        // Extract and track token usage
+            logger.error('SDK', 'Context overflow detected - terminating session');
-        const usage = message.message.usage;
+            session.abortController.abort();
-        if (usage) {
+            return;
          session.cumulativeInputTokens += usage.input_tokens || 0;
          session.cumulativeOutputTokens += usage.output_tokens || 0;
          // Cache creation counts as discovery, cache read doesn't
          if (usage.cache_creation_input_tokens) {
            session.cumulativeInputTokens += usage.cache_creation_input_tokens;
          }
-          logger.debug('SDK', 'Token usage captured', {
+          const responseSize = textContent.length;
-            sessionId: session.sessionDbId,
+
-            inputTokens: usage.input_tokens,
+          // Capture token state BEFORE updating (for delta calculation)
-            outputTokens: usage.output_tokens,
+          const tokensBeforeResponse = session.cumulativeInputTokens + session.cumulativeOutputTokens;
-            cacheCreation: usage.cache_creation_input_tokens || 0,
+
-            cacheRead: usage.cache_read_input_tokens || 0,
+          // Extract and track token usage
-            cumulativeInput: session.cumulativeInputTokens,
+          const usage = message.message.usage;
-            cumulativeOutput: session.cumulativeOutputTokens
+          if (usage) {
-          });
+            session.cumulativeInputTokens += usage.input_tokens || 0;
            session.cumulativeOutputTokens += usage.output_tokens || 0;
            // Cache creation counts as discovery, cache read doesn't
            if (usage.cache_creation_input_tokens) {
              session.cumulativeInputTokens += usage.cache_creation_input_tokens;
            }
            logger.debug('SDK', 'Token usage captured', {
              sessionId: session.sessionDbId,
              inputTokens: usage.input_tokens,
              outputTokens: usage.output_tokens,
              cacheCreation: usage.cache_creation_input_tokens || 0,
              cacheRead: usage.cache_read_input_tokens || 0,
              cumulativeInput: session.cumulativeInputTokens,
              cumulativeOutput: session.cumulativeOutputTokens
            });
          }
          // Calculate discovery tokens (delta for this response only)
          const discoveryTokens = (session.cumulativeInputTokens + session.cumulativeOutputTokens) - tokensBeforeResponse;
          // Process response (empty or not) and mark messages as processed
          // Capture earliest timestamp BEFORE processing (will be cleared after)
          const originalTimestamp = session.earliestPendingTimestamp;
          if (responseSize > 0) {
            const truncatedResponse = responseSize > 100
              ? textContent.substring(0, 100) + '...'
              : textContent;
            logger.dataOut('SDK', `Response received (${responseSize} chars)`, {
              sessionId: session.sessionDbId,
              promptNumber: session.lastPromptNumber
            }, truncatedResponse);
          }
          // Detect fatal context overflow and terminate gracefully (issue #870)
          if (typeof textContent === 'string' && textContent.includes('Prompt is too long')) {
            throw new Error('Claude session context overflow: prompt is too long');
          }
          // Detect invalid API key — SDK returns this as response text, not an error.
          // Throw so it surfaces in health endpoint and prevents silent failures.
          if (typeof textContent === 'string' && textContent.includes('Invalid API key')) {
            throw new Error('Invalid API key: check your API key configuration in ~/.claude-mem/settings.json or ~/.claude-mem/.env');
          }
          // Parse and process response using shared ResponseProcessor
          await processAgentResponse(
            textContent,
            session,
            this.dbManager,
            this.sessionManager,
            worker,
            discoveryTokens,
            originalTimestamp,
            'SDK',
            cwdTracker.lastCwd
          );
        }
-        // Calculate discovery tokens (delta for this response only)
+        // Log result messages
-        const discoveryTokens = (session.cumulativeInputTokens + session.cumulativeOutputTokens) - tokensBeforeResponse;
+        if (message.type === 'result' && message.subtype === 'success') {
-
+          // Usage telemetry is captured at SDK level
        // Process response (empty or not) and mark messages as processed
        // Capture earliest timestamp BEFORE processing (will be cleared after)
        const originalTimestamp = session.earliestPendingTimestamp;
        if (responseSize > 0) {
          const truncatedResponse = responseSize > 100
            ? textContent.substring(0, 100) + '...'
            : textContent;
          logger.dataOut('SDK', `Response received (${responseSize} chars)`, {
            sessionId: session.sessionDbId,
            promptNumber: session.lastPromptNumber
          }, truncatedResponse);
        }
        // Detect fatal context overflow and terminate gracefully (issue #870)
        if (typeof textContent === 'string' && textContent.includes('Prompt is too long')) {
          throw new Error('Claude session context overflow: prompt is too long');
        }
        // Detect invalid API key — SDK returns this as response text, not an error.
        // Throw so it surfaces in health endpoint and prevents silent failures.
        if (typeof textContent === 'string' && textContent.includes('Invalid API key')) {
          throw new Error('Invalid API key: check your API key configuration in ~/.claude-mem/settings.json or ~/.claude-mem/.env');
        }
        // Parse and process response using shared ResponseProcessor
        await processAgentResponse(
          textContent,
          session,
          this.dbManager,
          this.sessionManager,
          worker,
          discoveryTokens,
          originalTimestamp,
          'SDK',
          cwdTracker.lastCwd
        );
      }
-
+    } finally {
-      // Log result messages
+      // Ensure subprocess is terminated after query completes (or on error)
-      if (message.type === 'result' && message.subtype === 'success') {
+      const tracked = getProcessBySession(session.sessionDbId);
-        // Usage telemetry is captured at SDK level
+      if (tracked && !tracked.process.killed && tracked.process.exitCode === null) {
        await ensureProcessExit(tracked, 5000);
      }
    }