Merge branch 'feat/tier-routing-feedback' into thedotmack/merge-alessandro-prs
This commit is contained in:
@@ -397,6 +397,19 @@ export class PendingMessageStore {
|
|||||||
return result.count;
|
return result.count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Peek at pending message types for a session (for tier routing).
|
||||||
|
* Returns list of { message_type, tool_name } without claiming.
|
||||||
|
*/
|
||||||
|
peekPendingTypes(sessionDbId: number): Array<{ message_type: string; tool_name: string | null }> {
|
||||||
|
const stmt = this.db.prepare(`
|
||||||
|
SELECT message_type, tool_name FROM pending_messages
|
||||||
|
WHERE session_db_id = ? AND status IN ('pending', 'processing')
|
||||||
|
ORDER BY id ASC
|
||||||
|
`);
|
||||||
|
return stmt.all(sessionDbId) as Array<{ message_type: string; tool_name: string | null }>;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if any session has pending work.
|
* Check if any session has pending work.
|
||||||
* Excludes 'processing' messages stuck for >5 minutes (resets them to 'pending' as a side effect).
|
* Excludes 'processing' messages stuck for >5 minutes (resets them to 'pending' as a side effect).
|
||||||
|
|||||||
@@ -509,6 +509,38 @@ export const migration007: Migration = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* All migrations in order
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* Migration 008: Observation feedback table for tracking observation usage
|
||||||
|
*
|
||||||
|
* Tracks how observations are used (semantic injection hits, search access,
|
||||||
|
* explicit retrieval). Foundation for future Thompson Sampling optimization.
|
||||||
|
*/
|
||||||
|
export const migration008: Migration = {
|
||||||
|
version: 25,
|
||||||
|
up: (db: Database) => {
|
||||||
|
db.run(`
|
||||||
|
CREATE TABLE IF NOT EXISTS observation_feedback (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
observation_id INTEGER NOT NULL,
|
||||||
|
signal_type TEXT NOT NULL,
|
||||||
|
session_db_id INTEGER,
|
||||||
|
created_at_epoch INTEGER NOT NULL,
|
||||||
|
metadata TEXT,
|
||||||
|
FOREIGN KEY (observation_id) REFERENCES observations(id) ON DELETE CASCADE
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
db.run(`CREATE INDEX IF NOT EXISTS idx_feedback_observation ON observation_feedback(observation_id)`);
|
||||||
|
db.run(`CREATE INDEX IF NOT EXISTS idx_feedback_signal ON observation_feedback(signal_type)`);
|
||||||
|
console.log('✅ Created observation_feedback table for usage tracking');
|
||||||
|
},
|
||||||
|
down: (db: Database) => {
|
||||||
|
db.run(`DROP TABLE IF EXISTS observation_feedback`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* All migrations in order
|
* All migrations in order
|
||||||
*/
|
*/
|
||||||
@@ -519,5 +551,6 @@ export const migrations: Migration[] = [
|
|||||||
migration004,
|
migration004,
|
||||||
migration005,
|
migration005,
|
||||||
migration006,
|
migration006,
|
||||||
migration007
|
migration007,
|
||||||
|
migration008
|
||||||
];
|
];
|
||||||
@@ -34,6 +34,7 @@ export class MigrationRunner {
|
|||||||
this.addOnUpdateCascadeToForeignKeys();
|
this.addOnUpdateCascadeToForeignKeys();
|
||||||
this.addObservationContentHashColumn();
|
this.addObservationContentHashColumn();
|
||||||
this.addSessionCustomTitleColumn();
|
this.addSessionCustomTitleColumn();
|
||||||
|
this.createObservationFeedbackTable();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -863,4 +864,31 @@ export class MigrationRunner {
|
|||||||
|
|
||||||
this.db.prepare('INSERT OR IGNORE INTO schema_versions (version, applied_at) VALUES (?, ?)').run(23, new Date().toISOString());
|
this.db.prepare('INSERT OR IGNORE INTO schema_versions (version, applied_at) VALUES (?, ?)').run(23, new Date().toISOString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create observation_feedback table for tracking observation usage signals.
|
||||||
|
* Foundation for tier routing optimization and future Thompson Sampling.
|
||||||
|
* Schema version 24.
|
||||||
|
*/
|
||||||
|
private createObservationFeedbackTable(): void {
|
||||||
|
const applied = this.db.query('SELECT 1 FROM schema_versions WHERE version = 24').get();
|
||||||
|
if (applied) return;
|
||||||
|
|
||||||
|
this.db.run(`
|
||||||
|
CREATE TABLE IF NOT EXISTS observation_feedback (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
observation_id INTEGER NOT NULL,
|
||||||
|
signal_type TEXT NOT NULL,
|
||||||
|
session_db_id INTEGER,
|
||||||
|
created_at_epoch INTEGER NOT NULL,
|
||||||
|
metadata TEXT,
|
||||||
|
FOREIGN KEY (observation_id) REFERENCES observations(id) ON DELETE CASCADE
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
this.db.run('CREATE INDEX IF NOT EXISTS idx_feedback_observation ON observation_feedback(observation_id)');
|
||||||
|
this.db.run('CREATE INDEX IF NOT EXISTS idx_feedback_signal ON observation_feedback(signal_type)');
|
||||||
|
|
||||||
|
this.db.prepare('INSERT OR IGNORE INTO schema_versions (version, applied_at) VALUES (?, ?)').run(24, new Date().toISOString());
|
||||||
|
logger.debug('DB', 'Created observation_feedback table for usage tracking');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ export interface ActiveSession {
|
|||||||
// CLAIM-CONFIRM FIX: Track IDs of messages currently being processed
|
// CLAIM-CONFIRM FIX: Track IDs of messages currently being processed
|
||||||
// These IDs will be confirmed (deleted) after successful storage
|
// These IDs will be confirmed (deleted) after successful storage
|
||||||
processingMessageIds: number[];
|
processingMessageIds: number[];
|
||||||
|
// Tier routing: model override per session based on queue complexity
|
||||||
|
modelOverride?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PendingMessage {
|
export interface PendingMessage {
|
||||||
|
|||||||
@@ -49,8 +49,8 @@ export class SDKAgent {
|
|||||||
// Find Claude executable
|
// Find Claude executable
|
||||||
const claudePath = this.findClaudeExecutable();
|
const claudePath = this.findClaudeExecutable();
|
||||||
|
|
||||||
// Get model ID and disallowed tools
|
// Get model ID (tier routing override takes precedence)
|
||||||
const modelId = this.getModelId();
|
const modelId = session.modelOverride || this.getModelId();
|
||||||
// Memory agent is OBSERVER ONLY - no tools allowed
|
// Memory agent is OBSERVER ONLY - no tools allowed
|
||||||
const disallowedTools = [
|
const disallowedTools = [
|
||||||
'Bash', // Prevent infinite loops
|
'Bash', // Prevent infinite loops
|
||||||
|
|||||||
@@ -106,6 +106,8 @@ export class SessionRoutes extends BaseRouteHandler {
|
|||||||
|
|
||||||
// Start generator if not running
|
// Start generator if not running
|
||||||
if (!session.generatorPromise) {
|
if (!session.generatorPromise) {
|
||||||
|
// Apply tier routing before starting the generator
|
||||||
|
this.applyTierRouting(session);
|
||||||
this.spawnInProgress.set(sessionDbId, true);
|
this.spawnInProgress.set(sessionDbId, true);
|
||||||
this.startGeneratorWithProvider(session, selectedProvider, source);
|
this.startGeneratorWithProvider(session, selectedProvider, source);
|
||||||
return;
|
return;
|
||||||
@@ -126,6 +128,7 @@ export class SessionRoutes extends BaseRouteHandler {
|
|||||||
session.abortController = new AbortController();
|
session.abortController = new AbortController();
|
||||||
session.lastGeneratorActivity = Date.now();
|
session.lastGeneratorActivity = Date.now();
|
||||||
// Start a fresh generator
|
// Start a fresh generator
|
||||||
|
this.applyTierRouting(session);
|
||||||
this.spawnInProgress.set(sessionDbId, true);
|
this.spawnInProgress.set(sessionDbId, true);
|
||||||
this.startGeneratorWithProvider(session, selectedProvider, 'stale-recovery');
|
this.startGeneratorWithProvider(session, selectedProvider, 'stale-recovery');
|
||||||
return;
|
return;
|
||||||
@@ -283,6 +286,7 @@ export class SessionRoutes extends BaseRouteHandler {
|
|||||||
this.crashRecoveryScheduled.delete(sessionDbId);
|
this.crashRecoveryScheduled.delete(sessionDbId);
|
||||||
const stillExists = this.sessionManager.getSession(sessionDbId);
|
const stillExists = this.sessionManager.getSession(sessionDbId);
|
||||||
if (stillExists && !stillExists.generatorPromise) {
|
if (stillExists && !stillExists.generatorPromise) {
|
||||||
|
this.applyTierRouting(stillExists);
|
||||||
this.startGeneratorWithProvider(stillExists, this.getSelectedProvider(), 'crash-recovery');
|
this.startGeneratorWithProvider(stillExists, this.getSelectedProvider(), 'crash-recovery');
|
||||||
}
|
}
|
||||||
}, backoffMs);
|
}, backoffMs);
|
||||||
@@ -813,4 +817,60 @@ export class SessionRoutes extends BaseRouteHandler {
|
|||||||
contextInjected
|
contextInjected
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Simple tool names that produce low-complexity observations
|
||||||
|
private static readonly SIMPLE_TOOLS = new Set([
|
||||||
|
'Read', 'Glob', 'Grep', 'LS', 'ListMcpResourcesTool'
|
||||||
|
]);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply tier routing: select model based on pending queue complexity.
|
||||||
|
* - Summarize in queue → summary model (e.g., Opus)
|
||||||
|
* - All simple tools → simple model (e.g., Haiku)
|
||||||
|
* - Otherwise → default model (no override)
|
||||||
|
*/
|
||||||
|
private applyTierRouting(session: NonNullable<ReturnType<typeof this.sessionManager.getSession>>): void {
|
||||||
|
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
|
||||||
|
if (settings.CLAUDE_MEM_TIER_ROUTING_ENABLED === 'false') {
|
||||||
|
session.modelOverride = undefined;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear stale override before re-evaluating — prevents previous tier
|
||||||
|
// from persisting when queue composition changes between spawns.
|
||||||
|
session.modelOverride = undefined;
|
||||||
|
|
||||||
|
const pendingStore = this.sessionManager.getPendingMessageStore();
|
||||||
|
const pending = pendingStore.peekPendingTypes(session.sessionDbId);
|
||||||
|
|
||||||
|
if (pending.length === 0) {
|
||||||
|
session.modelOverride = undefined;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hasSummarize = pending.some(m => m.message_type === 'summarize');
|
||||||
|
const allSimple = pending.every(m =>
|
||||||
|
m.message_type === 'observation' && m.tool_name && SessionRoutes.SIMPLE_TOOLS.has(m.tool_name)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (hasSummarize) {
|
||||||
|
const summaryModel = settings.CLAUDE_MEM_TIER_SUMMARY_MODEL;
|
||||||
|
if (summaryModel) {
|
||||||
|
session.modelOverride = summaryModel;
|
||||||
|
logger.debug('SESSION', `Tier routing: summary model`, {
|
||||||
|
sessionId: session.sessionDbId, model: summaryModel
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if (allSimple) {
|
||||||
|
const simpleModel = settings.CLAUDE_MEM_TIER_SIMPLE_MODEL;
|
||||||
|
if (simpleModel) {
|
||||||
|
session.modelOverride = simpleModel;
|
||||||
|
logger.debug('SESSION', `Tier routing: simple model`, {
|
||||||
|
sessionId: session.sessionDbId, model: simpleModel
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
session.modelOverride = undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -57,6 +57,10 @@ export interface SettingsDefaults {
|
|||||||
// Semantic Context Injection (per-prompt via Chroma)
|
// Semantic Context Injection (per-prompt via Chroma)
|
||||||
CLAUDE_MEM_SEMANTIC_INJECT: string; // 'true' | 'false' - inject relevant observations on each prompt
|
CLAUDE_MEM_SEMANTIC_INJECT: string; // 'true' | 'false' - inject relevant observations on each prompt
|
||||||
CLAUDE_MEM_SEMANTIC_INJECT_LIMIT: string; // Max observations to inject per prompt
|
CLAUDE_MEM_SEMANTIC_INJECT_LIMIT: string; // Max observations to inject per prompt
|
||||||
|
// Tier Routing (model selection by queue complexity)
|
||||||
|
CLAUDE_MEM_TIER_ROUTING_ENABLED: string; // 'true' | 'false' - enable model tier routing
|
||||||
|
CLAUDE_MEM_TIER_SIMPLE_MODEL: string; // Tier alias or model ID for simple tool observations (Read, Glob, Grep)
|
||||||
|
CLAUDE_MEM_TIER_SUMMARY_MODEL: string; // Tier alias or model ID for session summaries
|
||||||
// Chroma Vector Database Configuration
|
// Chroma Vector Database Configuration
|
||||||
CLAUDE_MEM_CHROMA_ENABLED: string; // 'true' | 'false' - set to 'false' for SQLite-only mode
|
CLAUDE_MEM_CHROMA_ENABLED: string; // 'true' | 'false' - set to 'false' for SQLite-only mode
|
||||||
CLAUDE_MEM_CHROMA_MODE: string; // 'local' | 'remote'
|
CLAUDE_MEM_CHROMA_MODE: string; // 'local' | 'remote'
|
||||||
@@ -119,6 +123,10 @@ export class SettingsDefaultsManager {
|
|||||||
// Semantic Context Injection (per-prompt via Chroma vector search)
|
// Semantic Context Injection (per-prompt via Chroma vector search)
|
||||||
CLAUDE_MEM_SEMANTIC_INJECT: 'true', // Inject relevant past observations on every UserPromptSubmit
|
CLAUDE_MEM_SEMANTIC_INJECT: 'true', // Inject relevant past observations on every UserPromptSubmit
|
||||||
CLAUDE_MEM_SEMANTIC_INJECT_LIMIT: '5', // Top-N most relevant observations to inject per prompt
|
CLAUDE_MEM_SEMANTIC_INJECT_LIMIT: '5', // Top-N most relevant observations to inject per prompt
|
||||||
|
// Tier Routing (model selection by queue complexity)
|
||||||
|
CLAUDE_MEM_TIER_ROUTING_ENABLED: 'true', // Route observations to models by complexity
|
||||||
|
CLAUDE_MEM_TIER_SIMPLE_MODEL: 'haiku', // Portable tier alias — works across Direct API, Bedrock, Vertex, Azure (see #1463)
|
||||||
|
CLAUDE_MEM_TIER_SUMMARY_MODEL: '', // Empty = use default model for summaries
|
||||||
// Chroma Vector Database Configuration
|
// Chroma Vector Database Configuration
|
||||||
CLAUDE_MEM_CHROMA_ENABLED: 'true', // Set to 'false' to disable Chroma and use SQLite-only search
|
CLAUDE_MEM_CHROMA_ENABLED: 'true', // Set to 'false' to disable Chroma and use SQLite-only search
|
||||||
CLAUDE_MEM_CHROMA_MODE: 'local', // 'local' uses persistent chroma-mcp via uvx, 'remote' connects to existing server
|
CLAUDE_MEM_CHROMA_MODE: 'local', // 'local' uses persistent chroma-mcp via uvx, 'remote' connects to existing server
|
||||||
|
|||||||
Reference in New Issue
Block a user