feat: semantic context injection via Chroma on UserPromptSubmit (#1568)

* feat: semantic context injection via Chroma on every UserPromptSubmit On each prompt, queries ChromaDB for the top-N most relevant past observations and injects them as additionalContext. Replaces the recency-based "last N observations" approach with relevance-based semantic search. Changes: - session-init.ts: After session init, query /api/context/semantic with user's prompt text. If results found, return as hookSpecificOutput with hookEventName 'UserPromptSubmit'. - SearchRoutes.ts: New GET /api/context/semantic endpoint that queries SearchManager with format='json' and formats results as markdown. - SettingsDefaultsManager.ts: New settings CLAUDE_MEM_SEMANTIC_INJECT (default: true) and CLAUDE_MEM_SEMANTIC_INJECT_LIMIT (default: 5). Key behaviors: - Fires on every UserPromptSubmit (not just SessionStart) - Minimum prompt length: 20 chars (skips "ok", "yes", etc.) - Skips media-only prompts - Graceful degradation: if worker/Chroma unavailable, no injection - Survives /clear: re-injects on next prompt (not session-bound) - Uses workerHttpRequest (v10.6.3 API, not raw fetch) Production data (23 days, 3,400+ observations): - Before: 8 most recent observations (often irrelevant to current topic) - After: 5 most relevant observations (semantic match) - Token cost: ~1800 → ~800-1200 per injection Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: address CodeRabbit review on PR #1568 - session-init: don't skip semantic injection when contextInjected=true (only skip agent re-init, semantic lookup must run every prompt) - session-init: normalize SEMANTIC_INJECT toggle via String().toLowerCase() - semantic endpoint: change from GET to POST to avoid URL-length limits and prompt exposure in access logs. Handler accepts both body and query for backwards compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Alessandro Costa <alessandro@claudio.dev> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 19:16:46 -03:00
parent 64cce2bf10
commit 876cc4d837
3 changed files with 105 additions and 5 deletions
@@ -87,17 +87,18 @@ export const sessionInitHandler: EventHandler = {

    // Skip SDK agent re-initialization if context was already injected for this session (#1079)
    // The prompt was already saved to the database by /api/sessions/init above —
-    // no need to re-start the SDK agent on every turn
-    if (initResult.contextInjected) {
+    // no need to re-start the SDK agent on every turn.
+    // Note: we do NOT return here — semantic injection below must run on every prompt.
+    const skipAgentInit = Boolean(initResult.contextInjected);
+    if (skipAgentInit) {
      logger.info('HOOK', `INIT_COMPLETE | sessionDbId=${sessionDbId} | promptNumber=${promptNumber} | skipped_agent_init=true | reason=context_already_injected`, {
        sessionId: sessionDbId
      });
-      return { continue: true, suppressOutput: true };
    }

    // Only initialize SDK agent for Claude Code (not Cursor)
    // Cursor doesn't use the SDK agent - it only needs session/observation storage
-    if (input.platform !== 'cursor' && sessionDbId) {
+    if (!skipAgentInit && input.platform !== 'cursor' && sessionDbId) {
      // Strip leading slash from commands for memory agent
      // /review 101 -> review 101 (more semantic for observations)
      const cleanedPrompt = prompt.startsWith('/') ? prompt.substring(1) : prompt;
@@ -115,14 +116,58 @@ export const sessionInitHandler: EventHandler = {
        // Log but don't throw - SDK agent failure should not block the user's prompt
        logger.failure('HOOK', `SDK agent start failed: ${response.status}`, { sessionDbId, promptNumber });
      }
-    } else if (input.platform === 'cursor') {
+    } else if (!skipAgentInit && input.platform === 'cursor') {
      logger.debug('HOOK', 'session-init: Skipping SDK agent init for Cursor platform', { sessionDbId, promptNumber });
    }

+    // Semantic context injection: query Chroma for relevant past observations
+    // and inject as additionalContext so Claude receives relevant memory each prompt.
+    // Controlled by CLAUDE_MEM_SEMANTIC_INJECT setting (default: true).
+    const semanticInject =
+      String(settings.CLAUDE_MEM_SEMANTIC_INJECT).toLowerCase() === 'true';
+    let additionalContext = '';
+
+    if (semanticInject && prompt && prompt.length >= 20 && prompt !== '[media prompt]') {
+      try {
+        const limit = settings.CLAUDE_MEM_SEMANTIC_INJECT_LIMIT || '5';
+        const semanticRes = await workerHttpRequest('/api/context/semantic', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ q: prompt, project, limit })
+        });
+        if (semanticRes.ok) {
+          const data = await semanticRes.json() as { context: string; count: number };
+          if (data.context) {
+            additionalContext = data.context;
+            logger.debug('HOOK', `Semantic injection: ${data.count} observations for prompt`, {
+              sessionId: sessionDbId, count: data.count
+            });
+          }
+        }
+      } catch (e) {
+        // Graceful degradation — semantic injection is optional
+        logger.debug('HOOK', 'Semantic injection unavailable', {
+          error: e instanceof Error ? e.message : String(e)
+        });
+      }
+    }
+
    logger.info('HOOK', `INIT_COMPLETE | sessionDbId=${sessionDbId} | promptNumber=${promptNumber} | project=${project}`, {
      sessionId: sessionDbId
    });

+    // Return with semantic context if available
+    if (additionalContext) {
+      return {
+        continue: true,
+        suppressOutput: true,
+        hookSpecificOutput: {
+          hookEventName: 'UserPromptSubmit',
+          additionalContext
+        }
+      };
+    }
+
    return { continue: true, suppressOutput: true };
  }
 };