diff --git a/src/services/sqlite/PendingMessageStore.ts b/src/services/sqlite/PendingMessageStore.ts index bb2d33ef..b61a9ea8 100644 --- a/src/services/sqlite/PendingMessageStore.ts +++ b/src/services/sqlite/PendingMessageStore.ts @@ -397,6 +397,19 @@ export class PendingMessageStore { return result.count; } + /** + * Peek at pending message types for a session (for tier routing). + * Returns list of { message_type, tool_name } without claiming. + */ + peekPendingTypes(sessionDbId: number): Array<{ message_type: string; tool_name: string | null }> { + const stmt = this.db.prepare(` + SELECT message_type, tool_name FROM pending_messages + WHERE session_db_id = ? AND status IN ('pending', 'processing') + ORDER BY id ASC + `); + return stmt.all(sessionDbId) as Array<{ message_type: string; tool_name: string | null }>; + } + /** * Check if any session has pending work. * Excludes 'processing' messages stuck for >5 minutes (resets them to 'pending' as a side effect). diff --git a/src/services/sqlite/migrations.ts b/src/services/sqlite/migrations.ts index ad0f877b..5be21f6a 100644 --- a/src/services/sqlite/migrations.ts +++ b/src/services/sqlite/migrations.ts @@ -509,6 +509,38 @@ export const migration007: Migration = { }; +/** + * All migrations in order + */ +/** + * Migration 008: Observation feedback table for tracking observation usage + * + * Tracks how observations are used (semantic injection hits, search access, + * explicit retrieval). Foundation for future Thompson Sampling optimization. + */ +export const migration008: Migration = { + version: 25, + up: (db: Database) => { + db.run(` + CREATE TABLE IF NOT EXISTS observation_feedback ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + observation_id INTEGER NOT NULL, + signal_type TEXT NOT NULL, + session_db_id INTEGER, + created_at_epoch INTEGER NOT NULL, + metadata TEXT, + FOREIGN KEY (observation_id) REFERENCES observations(id) ON DELETE CASCADE + ) + `); + db.run(`CREATE INDEX IF NOT EXISTS idx_feedback_observation ON observation_feedback(observation_id)`); + db.run(`CREATE INDEX IF NOT EXISTS idx_feedback_signal ON observation_feedback(signal_type)`); + console.log('✅ Created observation_feedback table for usage tracking'); + }, + down: (db: Database) => { + db.run(`DROP TABLE IF EXISTS observation_feedback`); + } +}; + /** * All migrations in order */ @@ -519,5 +551,6 @@ export const migrations: Migration[] = [ migration004, migration005, migration006, - migration007 + migration007, + migration008 ]; \ No newline at end of file diff --git a/src/services/sqlite/migrations/runner.ts b/src/services/sqlite/migrations/runner.ts index 51c8a257..aff03d6e 100644 --- a/src/services/sqlite/migrations/runner.ts +++ b/src/services/sqlite/migrations/runner.ts @@ -34,6 +34,7 @@ export class MigrationRunner { this.addOnUpdateCascadeToForeignKeys(); this.addObservationContentHashColumn(); this.addSessionCustomTitleColumn(); + this.createObservationFeedbackTable(); } /** @@ -863,4 +864,31 @@ export class MigrationRunner { this.db.prepare('INSERT OR IGNORE INTO schema_versions (version, applied_at) VALUES (?, ?)').run(23, new Date().toISOString()); } + + /** + * Create observation_feedback table for tracking observation usage signals. + * Foundation for tier routing optimization and future Thompson Sampling. + * Schema version 24. + */ + private createObservationFeedbackTable(): void { + const applied = this.db.query('SELECT 1 FROM schema_versions WHERE version = 24').get(); + if (applied) return; + + this.db.run(` + CREATE TABLE IF NOT EXISTS observation_feedback ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + observation_id INTEGER NOT NULL, + signal_type TEXT NOT NULL, + session_db_id INTEGER, + created_at_epoch INTEGER NOT NULL, + metadata TEXT, + FOREIGN KEY (observation_id) REFERENCES observations(id) ON DELETE CASCADE + ) + `); + this.db.run('CREATE INDEX IF NOT EXISTS idx_feedback_observation ON observation_feedback(observation_id)'); + this.db.run('CREATE INDEX IF NOT EXISTS idx_feedback_signal ON observation_feedback(signal_type)'); + + this.db.prepare('INSERT OR IGNORE INTO schema_versions (version, applied_at) VALUES (?, ?)').run(24, new Date().toISOString()); + logger.debug('DB', 'Created observation_feedback table for usage tracking'); + } } diff --git a/src/services/worker-types.ts b/src/services/worker-types.ts index c4fd89eb..5962bfa4 100644 --- a/src/services/worker-types.ts +++ b/src/services/worker-types.ts @@ -40,6 +40,8 @@ export interface ActiveSession { // CLAIM-CONFIRM FIX: Track IDs of messages currently being processed // These IDs will be confirmed (deleted) after successful storage processingMessageIds: number[]; + // Tier routing: model override per session based on queue complexity + modelOverride?: string; } export interface PendingMessage { diff --git a/src/services/worker/SDKAgent.ts b/src/services/worker/SDKAgent.ts index 2af48068..5a9b5866 100644 --- a/src/services/worker/SDKAgent.ts +++ b/src/services/worker/SDKAgent.ts @@ -49,8 +49,8 @@ export class SDKAgent { // Find Claude executable const claudePath = this.findClaudeExecutable(); - // Get model ID and disallowed tools - const modelId = this.getModelId(); + // Get model ID (tier routing override takes precedence) + const modelId = session.modelOverride || this.getModelId(); // Memory agent is OBSERVER ONLY - no tools allowed const disallowedTools = [ 'Bash', // Prevent infinite loops diff --git a/src/services/worker/http/routes/SessionRoutes.ts b/src/services/worker/http/routes/SessionRoutes.ts index 2f32efb5..9db84dc2 100644 --- a/src/services/worker/http/routes/SessionRoutes.ts +++ b/src/services/worker/http/routes/SessionRoutes.ts @@ -106,6 +106,8 @@ export class SessionRoutes extends BaseRouteHandler { // Start generator if not running if (!session.generatorPromise) { + // Apply tier routing before starting the generator + this.applyTierRouting(session); this.spawnInProgress.set(sessionDbId, true); this.startGeneratorWithProvider(session, selectedProvider, source); return; @@ -126,6 +128,7 @@ export class SessionRoutes extends BaseRouteHandler { session.abortController = new AbortController(); session.lastGeneratorActivity = Date.now(); // Start a fresh generator + this.applyTierRouting(session); this.spawnInProgress.set(sessionDbId, true); this.startGeneratorWithProvider(session, selectedProvider, 'stale-recovery'); return; @@ -283,6 +286,7 @@ export class SessionRoutes extends BaseRouteHandler { this.crashRecoveryScheduled.delete(sessionDbId); const stillExists = this.sessionManager.getSession(sessionDbId); if (stillExists && !stillExists.generatorPromise) { + this.applyTierRouting(stillExists); this.startGeneratorWithProvider(stillExists, this.getSelectedProvider(), 'crash-recovery'); } }, backoffMs); @@ -813,4 +817,60 @@ export class SessionRoutes extends BaseRouteHandler { contextInjected }); }); + + // Simple tool names that produce low-complexity observations + private static readonly SIMPLE_TOOLS = new Set([ + 'Read', 'Glob', 'Grep', 'LS', 'ListMcpResourcesTool' + ]); + + /** + * Apply tier routing: select model based on pending queue complexity. + * - Summarize in queue → summary model (e.g., Opus) + * - All simple tools → simple model (e.g., Haiku) + * - Otherwise → default model (no override) + */ + private applyTierRouting(session: NonNullable>): void { + const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH); + if (settings.CLAUDE_MEM_TIER_ROUTING_ENABLED === 'false') { + session.modelOverride = undefined; + return; + } + + // Clear stale override before re-evaluating — prevents previous tier + // from persisting when queue composition changes between spawns. + session.modelOverride = undefined; + + const pendingStore = this.sessionManager.getPendingMessageStore(); + const pending = pendingStore.peekPendingTypes(session.sessionDbId); + + if (pending.length === 0) { + session.modelOverride = undefined; + return; + } + + const hasSummarize = pending.some(m => m.message_type === 'summarize'); + const allSimple = pending.every(m => + m.message_type === 'observation' && m.tool_name && SessionRoutes.SIMPLE_TOOLS.has(m.tool_name) + ); + + if (hasSummarize) { + const summaryModel = settings.CLAUDE_MEM_TIER_SUMMARY_MODEL; + if (summaryModel) { + session.modelOverride = summaryModel; + logger.debug('SESSION', `Tier routing: summary model`, { + sessionId: session.sessionDbId, model: summaryModel + }); + } + } else if (allSimple) { + const simpleModel = settings.CLAUDE_MEM_TIER_SIMPLE_MODEL; + if (simpleModel) { + session.modelOverride = simpleModel; + logger.debug('SESSION', `Tier routing: simple model`, { + sessionId: session.sessionDbId, model: simpleModel + }); + } + } else { + session.modelOverride = undefined; + } + } } diff --git a/src/shared/SettingsDefaultsManager.ts b/src/shared/SettingsDefaultsManager.ts index faa4d2ad..6d347f5a 100644 --- a/src/shared/SettingsDefaultsManager.ts +++ b/src/shared/SettingsDefaultsManager.ts @@ -57,6 +57,10 @@ export interface SettingsDefaults { // Semantic Context Injection (per-prompt via Chroma) CLAUDE_MEM_SEMANTIC_INJECT: string; // 'true' | 'false' - inject relevant observations on each prompt CLAUDE_MEM_SEMANTIC_INJECT_LIMIT: string; // Max observations to inject per prompt + // Tier Routing (model selection by queue complexity) + CLAUDE_MEM_TIER_ROUTING_ENABLED: string; // 'true' | 'false' - enable model tier routing + CLAUDE_MEM_TIER_SIMPLE_MODEL: string; // Tier alias or model ID for simple tool observations (Read, Glob, Grep) + CLAUDE_MEM_TIER_SUMMARY_MODEL: string; // Tier alias or model ID for session summaries // Chroma Vector Database Configuration CLAUDE_MEM_CHROMA_ENABLED: string; // 'true' | 'false' - set to 'false' for SQLite-only mode CLAUDE_MEM_CHROMA_MODE: string; // 'local' | 'remote' @@ -119,6 +123,10 @@ export class SettingsDefaultsManager { // Semantic Context Injection (per-prompt via Chroma vector search) CLAUDE_MEM_SEMANTIC_INJECT: 'true', // Inject relevant past observations on every UserPromptSubmit CLAUDE_MEM_SEMANTIC_INJECT_LIMIT: '5', // Top-N most relevant observations to inject per prompt + // Tier Routing (model selection by queue complexity) + CLAUDE_MEM_TIER_ROUTING_ENABLED: 'true', // Route observations to models by complexity + CLAUDE_MEM_TIER_SIMPLE_MODEL: 'haiku', // Portable tier alias — works across Direct API, Bedrock, Vertex, Azure (see #1463) + CLAUDE_MEM_TIER_SUMMARY_MODEL: '', // Empty = use default model for summaries // Chroma Vector Database Configuration CLAUDE_MEM_CHROMA_ENABLED: 'true', // Set to 'false' to disable Chroma and use SQLite-only search CLAUDE_MEM_CHROMA_MODE: 'local', // 'local' uses persistent chroma-mcp via uvx, 'remote' connects to existing server