feat: semantic context injection via Chroma on UserPromptSubmit (#1568)
* feat: semantic context injection via Chroma on every UserPromptSubmit On each prompt, queries ChromaDB for the top-N most relevant past observations and injects them as additionalContext. Replaces the recency-based "last N observations" approach with relevance-based semantic search. Changes: - session-init.ts: After session init, query /api/context/semantic with user's prompt text. If results found, return as hookSpecificOutput with hookEventName 'UserPromptSubmit'. - SearchRoutes.ts: New GET /api/context/semantic endpoint that queries SearchManager with format='json' and formats results as markdown. - SettingsDefaultsManager.ts: New settings CLAUDE_MEM_SEMANTIC_INJECT (default: true) and CLAUDE_MEM_SEMANTIC_INJECT_LIMIT (default: 5). Key behaviors: - Fires on every UserPromptSubmit (not just SessionStart) - Minimum prompt length: 20 chars (skips "ok", "yes", etc.) - Skips media-only prompts - Graceful degradation: if worker/Chroma unavailable, no injection - Survives /clear: re-injects on next prompt (not session-bound) - Uses workerHttpRequest (v10.6.3 API, not raw fetch) Production data (23 days, 3,400+ observations): - Before: 8 most recent observations (often irrelevant to current topic) - After: 5 most relevant observations (semantic match) - Token cost: ~1800 → ~800-1200 per injection Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: address CodeRabbit review on PR #1568 - session-init: don't skip semantic injection when contextInjected=true (only skip agent re-init, semantic lookup must run every prompt) - session-init: normalize SEMANTIC_INJECT toggle via String().toLowerCase() - semantic endpoint: change from GET to POST to avoid URL-length limits and prompt exposure in access logs. Handler accepts both body and query for backwards compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Alessandro Costa <alessandro@claudio.dev> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -87,17 +87,18 @@ export const sessionInitHandler: EventHandler = {
|
||||
|
||||
// Skip SDK agent re-initialization if context was already injected for this session (#1079)
|
||||
// The prompt was already saved to the database by /api/sessions/init above —
|
||||
// no need to re-start the SDK agent on every turn
|
||||
if (initResult.contextInjected) {
|
||||
// no need to re-start the SDK agent on every turn.
|
||||
// Note: we do NOT return here — semantic injection below must run on every prompt.
|
||||
const skipAgentInit = Boolean(initResult.contextInjected);
|
||||
if (skipAgentInit) {
|
||||
logger.info('HOOK', `INIT_COMPLETE | sessionDbId=${sessionDbId} | promptNumber=${promptNumber} | skipped_agent_init=true | reason=context_already_injected`, {
|
||||
sessionId: sessionDbId
|
||||
});
|
||||
return { continue: true, suppressOutput: true };
|
||||
}
|
||||
|
||||
// Only initialize SDK agent for Claude Code (not Cursor)
|
||||
// Cursor doesn't use the SDK agent - it only needs session/observation storage
|
||||
if (input.platform !== 'cursor' && sessionDbId) {
|
||||
if (!skipAgentInit && input.platform !== 'cursor' && sessionDbId) {
|
||||
// Strip leading slash from commands for memory agent
|
||||
// /review 101 -> review 101 (more semantic for observations)
|
||||
const cleanedPrompt = prompt.startsWith('/') ? prompt.substring(1) : prompt;
|
||||
@@ -115,14 +116,58 @@ export const sessionInitHandler: EventHandler = {
|
||||
// Log but don't throw - SDK agent failure should not block the user's prompt
|
||||
logger.failure('HOOK', `SDK agent start failed: ${response.status}`, { sessionDbId, promptNumber });
|
||||
}
|
||||
} else if (input.platform === 'cursor') {
|
||||
} else if (!skipAgentInit && input.platform === 'cursor') {
|
||||
logger.debug('HOOK', 'session-init: Skipping SDK agent init for Cursor platform', { sessionDbId, promptNumber });
|
||||
}
|
||||
|
||||
// Semantic context injection: query Chroma for relevant past observations
|
||||
// and inject as additionalContext so Claude receives relevant memory each prompt.
|
||||
// Controlled by CLAUDE_MEM_SEMANTIC_INJECT setting (default: true).
|
||||
const semanticInject =
|
||||
String(settings.CLAUDE_MEM_SEMANTIC_INJECT).toLowerCase() === 'true';
|
||||
let additionalContext = '';
|
||||
|
||||
if (semanticInject && prompt && prompt.length >= 20 && prompt !== '[media prompt]') {
|
||||
try {
|
||||
const limit = settings.CLAUDE_MEM_SEMANTIC_INJECT_LIMIT || '5';
|
||||
const semanticRes = await workerHttpRequest('/api/context/semantic', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ q: prompt, project, limit })
|
||||
});
|
||||
if (semanticRes.ok) {
|
||||
const data = await semanticRes.json() as { context: string; count: number };
|
||||
if (data.context) {
|
||||
additionalContext = data.context;
|
||||
logger.debug('HOOK', `Semantic injection: ${data.count} observations for prompt`, {
|
||||
sessionId: sessionDbId, count: data.count
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// Graceful degradation — semantic injection is optional
|
||||
logger.debug('HOOK', 'Semantic injection unavailable', {
|
||||
error: e instanceof Error ? e.message : String(e)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('HOOK', `INIT_COMPLETE | sessionDbId=${sessionDbId} | promptNumber=${promptNumber} | project=${project}`, {
|
||||
sessionId: sessionDbId
|
||||
});
|
||||
|
||||
// Return with semantic context if available
|
||||
if (additionalContext) {
|
||||
return {
|
||||
continue: true,
|
||||
suppressOutput: true,
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'UserPromptSubmit',
|
||||
additionalContext
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return { continue: true, suppressOutput: true };
|
||||
}
|
||||
};
|
||||
|
||||
@@ -38,6 +38,7 @@ export class SearchRoutes extends BaseRouteHandler {
|
||||
app.get('/api/context/timeline', this.handleGetContextTimeline.bind(this));
|
||||
app.get('/api/context/preview', this.handleContextPreview.bind(this));
|
||||
app.get('/api/context/inject', this.handleContextInject.bind(this));
|
||||
app.post('/api/context/semantic', this.handleSemanticContext.bind(this));
|
||||
|
||||
// Timeline and help endpoints
|
||||
app.get('/api/timeline/by-query', this.handleGetTimelineByQuery.bind(this));
|
||||
@@ -246,6 +247,54 @@ export class SearchRoutes extends BaseRouteHandler {
|
||||
res.send(contextText);
|
||||
});
|
||||
|
||||
/**
|
||||
* Semantic context search for per-prompt injection
|
||||
* GET /api/context/semantic?q=<prompt>&project=<project>&limit=5
|
||||
*
|
||||
* Queries Chroma for observations semantically similar to the user's prompt.
|
||||
* Returns compact markdown for injection as additionalContext.
|
||||
*/
|
||||
private handleSemanticContext = this.wrapHandler(async (req: Request, res: Response): Promise<void> => {
|
||||
const query = (req.body?.q || req.query.q) as string;
|
||||
const project = (req.body?.project || req.query.project) as string;
|
||||
const limit = parseInt(String(req.body?.limit || req.query.limit || '5'), 10);
|
||||
|
||||
if (!query || query.length < 20) {
|
||||
res.json({ context: '', count: 0 });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await this.searchManager.search({
|
||||
query,
|
||||
type: 'observations',
|
||||
project,
|
||||
limit: String(limit),
|
||||
format: 'json'
|
||||
});
|
||||
|
||||
const observations = (result as any)?.observations || [];
|
||||
if (!observations.length) {
|
||||
res.json({ context: '', count: 0 });
|
||||
return;
|
||||
}
|
||||
|
||||
// Format as compact markdown for context injection
|
||||
const lines: string[] = ['## Relevant Past Work (semantic match)\n'];
|
||||
for (const obs of observations.slice(0, limit)) {
|
||||
const date = obs.created_at?.slice(0, 10) || '';
|
||||
lines.push(`### ${obs.title || 'Observation'} (${date})`);
|
||||
if (obs.narrative) lines.push(obs.narrative);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
res.json({ context: lines.join('\n'), count: observations.length });
|
||||
} catch (error) {
|
||||
logger.error('SEARCH', 'Semantic context query failed', {}, error as Error);
|
||||
res.json({ context: '', count: 0 });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get timeline by query (search first, then get timeline around best match)
|
||||
* GET /api/timeline/by-query?query=...&mode=auto&depth_before=10&depth_after=10
|
||||
|
||||
@@ -54,6 +54,9 @@ export interface SettingsDefaults {
|
||||
// Exclusion Settings
|
||||
CLAUDE_MEM_EXCLUDED_PROJECTS: string; // Comma-separated glob patterns for excluded project paths
|
||||
CLAUDE_MEM_FOLDER_MD_EXCLUDE: string; // JSON array of folder paths to exclude from CLAUDE.md generation
|
||||
// Semantic Context Injection (per-prompt via Chroma)
|
||||
CLAUDE_MEM_SEMANTIC_INJECT: string; // 'true' | 'false' - inject relevant observations on each prompt
|
||||
CLAUDE_MEM_SEMANTIC_INJECT_LIMIT: string; // Max observations to inject per prompt
|
||||
// Chroma Vector Database Configuration
|
||||
CLAUDE_MEM_CHROMA_ENABLED: string; // 'true' | 'false' - set to 'false' for SQLite-only mode
|
||||
CLAUDE_MEM_CHROMA_MODE: string; // 'local' | 'remote'
|
||||
@@ -113,6 +116,9 @@ export class SettingsDefaultsManager {
|
||||
// Exclusion Settings
|
||||
CLAUDE_MEM_EXCLUDED_PROJECTS: '', // Comma-separated glob patterns for excluded project paths
|
||||
CLAUDE_MEM_FOLDER_MD_EXCLUDE: '[]', // JSON array of folder paths to exclude from CLAUDE.md generation
|
||||
// Semantic Context Injection (per-prompt via Chroma vector search)
|
||||
CLAUDE_MEM_SEMANTIC_INJECT: 'true', // Inject relevant past observations on every UserPromptSubmit
|
||||
CLAUDE_MEM_SEMANTIC_INJECT_LIMIT: '5', // Top-N most relevant observations to inject per prompt
|
||||
// Chroma Vector Database Configuration
|
||||
CLAUDE_MEM_CHROMA_ENABLED: 'true', // Set to 'false' to disable Chroma and use SQLite-only search
|
||||
CLAUDE_MEM_CHROMA_MODE: 'local', // 'local' uses persistent chroma-mcp via uvx, 'remote' connects to existing server
|
||||
|
||||
Reference in New Issue
Block a user