6382d6f9c7
Add restart limit (max 3 consecutive restarts) with exponential backoff to prevent infinite generator restart loops. Also add defensive memorySessionId checks in GeminiAgent and OpenRouterAgent before expensive LLM calls to fail fast when session ID hasn't been captured. Based on PR #693 by @ajbmachon (applied to current main). Co-Authored-By: Andre Machon <ajbmachon2@gmail.com> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
463 lines
17 KiB
TypeScript
463 lines
17 KiB
TypeScript
/**
|
|
* OpenRouterAgent: OpenRouter-based observation extraction
|
|
*
|
|
* Alternative to SDKAgent that uses OpenRouter's unified API
|
|
* for accessing 100+ models from different providers.
|
|
*
|
|
* Responsibility:
|
|
* - Call OpenRouter REST API for observation extraction
|
|
* - Parse XML responses (same format as Claude/Gemini)
|
|
* - Sync to database and Chroma
|
|
* - Support dynamic model selection across providers
|
|
*/
|
|
|
|
import { DatabaseManager } from './DatabaseManager.js';
|
|
import { SessionManager } from './SessionManager.js';
|
|
import { logger } from '../../utils/logger.js';
|
|
import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js';
|
|
import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
|
|
import { USER_SETTINGS_PATH } from '../../shared/paths.js';
|
|
import { getCredential } from '../../shared/EnvManager.js';
|
|
import type { ActiveSession, ConversationMessage } from '../worker-types.js';
|
|
import { ModeManager } from '../domain/ModeManager.js';
|
|
import {
|
|
processAgentResponse,
|
|
shouldFallbackToClaude,
|
|
isAbortError,
|
|
type WorkerRef,
|
|
type FallbackAgent
|
|
} from './agents/index.js';
|
|
|
|
// OpenRouter API endpoint
|
|
const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions';
|
|
|
|
// Context window management constants (defaults, overridable via settings)
|
|
const DEFAULT_MAX_CONTEXT_MESSAGES = 20; // Maximum messages to keep in conversation history
|
|
const DEFAULT_MAX_ESTIMATED_TOKENS = 100000; // ~100k tokens max context (safety limit)
|
|
const CHARS_PER_TOKEN_ESTIMATE = 4; // Conservative estimate: 1 token = 4 chars
|
|
|
|
// OpenAI-compatible message format
|
|
interface OpenAIMessage {
|
|
role: 'user' | 'assistant' | 'system';
|
|
content: string;
|
|
}
|
|
|
|
interface OpenRouterResponse {
|
|
choices?: Array<{
|
|
message?: {
|
|
role?: string;
|
|
content?: string;
|
|
};
|
|
finish_reason?: string;
|
|
}>;
|
|
usage?: {
|
|
prompt_tokens?: number;
|
|
completion_tokens?: number;
|
|
total_tokens?: number;
|
|
};
|
|
error?: {
|
|
message?: string;
|
|
code?: string;
|
|
};
|
|
}
|
|
|
|
export class OpenRouterAgent {
|
|
private dbManager: DatabaseManager;
|
|
private sessionManager: SessionManager;
|
|
private fallbackAgent: FallbackAgent | null = null;
|
|
|
|
constructor(dbManager: DatabaseManager, sessionManager: SessionManager) {
|
|
this.dbManager = dbManager;
|
|
this.sessionManager = sessionManager;
|
|
}
|
|
|
|
/**
|
|
* Set the fallback agent (Claude SDK) for when OpenRouter API fails
|
|
* Must be set after construction to avoid circular dependency
|
|
*/
|
|
setFallbackAgent(agent: FallbackAgent): void {
|
|
this.fallbackAgent = agent;
|
|
}
|
|
|
|
/**
|
|
* Start OpenRouter agent for a session
|
|
* Uses multi-turn conversation to maintain context across messages
|
|
*/
|
|
async startSession(session: ActiveSession, worker?: WorkerRef): Promise<void> {
|
|
try {
|
|
// Get OpenRouter configuration
|
|
const { apiKey, model, siteUrl, appName } = this.getOpenRouterConfig();
|
|
|
|
if (!apiKey) {
|
|
throw new Error('OpenRouter API key not configured. Set CLAUDE_MEM_OPENROUTER_API_KEY in settings or OPENROUTER_API_KEY environment variable.');
|
|
}
|
|
|
|
// Load active mode
|
|
const mode = ModeManager.getInstance().getActiveMode();
|
|
|
|
// Build initial prompt
|
|
const initPrompt = session.lastPromptNumber === 1
|
|
? buildInitPrompt(session.project, session.contentSessionId, session.userPrompt, mode)
|
|
: buildContinuationPrompt(session.userPrompt, session.lastPromptNumber, session.contentSessionId, mode);
|
|
|
|
// Add to conversation history and query OpenRouter with full context
|
|
session.conversationHistory.push({ role: 'user', content: initPrompt });
|
|
const initResponse = await this.queryOpenRouterMultiTurn(session.conversationHistory, apiKey, model, siteUrl, appName);
|
|
|
|
if (initResponse.content) {
|
|
// Add response to conversation history
|
|
session.conversationHistory.push({ role: 'assistant', content: initResponse.content });
|
|
|
|
// Track token usage
|
|
const tokensUsed = initResponse.tokensUsed || 0;
|
|
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7); // Rough estimate
|
|
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
|
|
|
|
// Process response using shared ResponseProcessor (no original timestamp for init - not from queue)
|
|
await processAgentResponse(
|
|
initResponse.content,
|
|
session,
|
|
this.dbManager,
|
|
this.sessionManager,
|
|
worker,
|
|
tokensUsed,
|
|
null,
|
|
'OpenRouter',
|
|
undefined // No lastCwd yet - before message processing
|
|
);
|
|
} else {
|
|
logger.error('SDK', 'Empty OpenRouter init response - session may lack context', {
|
|
sessionId: session.sessionDbId,
|
|
model
|
|
});
|
|
}
|
|
|
|
// Track lastCwd from messages for CLAUDE.md generation
|
|
let lastCwd: string | undefined;
|
|
|
|
// Process pending messages
|
|
for await (const message of this.sessionManager.getMessageIterator(session.sessionDbId)) {
|
|
// Capture cwd from messages for proper worktree support
|
|
if (message.cwd) {
|
|
lastCwd = message.cwd;
|
|
}
|
|
// Capture earliest timestamp BEFORE processing (will be cleared after)
|
|
const originalTimestamp = session.earliestPendingTimestamp;
|
|
|
|
if (message.type === 'observation') {
|
|
// Update last prompt number
|
|
if (message.prompt_number !== undefined) {
|
|
session.lastPromptNumber = message.prompt_number;
|
|
}
|
|
|
|
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
|
|
// This prevents wasting tokens when we won't be able to store the result anyway
|
|
if (!session.memorySessionId) {
|
|
throw new Error('Cannot process observations: memorySessionId not yet captured. This session may need to be reinitialized.');
|
|
}
|
|
|
|
// Build observation prompt
|
|
const obsPrompt = buildObservationPrompt({
|
|
id: 0,
|
|
tool_name: message.tool_name!,
|
|
tool_input: JSON.stringify(message.tool_input),
|
|
tool_output: JSON.stringify(message.tool_response),
|
|
created_at_epoch: originalTimestamp ?? Date.now(),
|
|
cwd: message.cwd
|
|
});
|
|
|
|
// Add to conversation history and query OpenRouter with full context
|
|
session.conversationHistory.push({ role: 'user', content: obsPrompt });
|
|
const obsResponse = await this.queryOpenRouterMultiTurn(session.conversationHistory, apiKey, model, siteUrl, appName);
|
|
|
|
let tokensUsed = 0;
|
|
if (obsResponse.content) {
|
|
// Add response to conversation history
|
|
session.conversationHistory.push({ role: 'assistant', content: obsResponse.content });
|
|
|
|
tokensUsed = obsResponse.tokensUsed || 0;
|
|
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7);
|
|
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
|
|
}
|
|
|
|
// Process response using shared ResponseProcessor
|
|
await processAgentResponse(
|
|
obsResponse.content || '',
|
|
session,
|
|
this.dbManager,
|
|
this.sessionManager,
|
|
worker,
|
|
tokensUsed,
|
|
originalTimestamp,
|
|
'OpenRouter',
|
|
lastCwd
|
|
);
|
|
|
|
} else if (message.type === 'summarize') {
|
|
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
|
|
if (!session.memorySessionId) {
|
|
throw new Error('Cannot process summary: memorySessionId not yet captured. This session may need to be reinitialized.');
|
|
}
|
|
|
|
// Build summary prompt
|
|
const summaryPrompt = buildSummaryPrompt({
|
|
id: session.sessionDbId,
|
|
memory_session_id: session.memorySessionId,
|
|
project: session.project,
|
|
user_prompt: session.userPrompt,
|
|
last_assistant_message: message.last_assistant_message || ''
|
|
}, mode);
|
|
|
|
// Add to conversation history and query OpenRouter with full context
|
|
session.conversationHistory.push({ role: 'user', content: summaryPrompt });
|
|
const summaryResponse = await this.queryOpenRouterMultiTurn(session.conversationHistory, apiKey, model, siteUrl, appName);
|
|
|
|
let tokensUsed = 0;
|
|
if (summaryResponse.content) {
|
|
// Add response to conversation history
|
|
session.conversationHistory.push({ role: 'assistant', content: summaryResponse.content });
|
|
|
|
tokensUsed = summaryResponse.tokensUsed || 0;
|
|
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7);
|
|
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
|
|
}
|
|
|
|
// Process response using shared ResponseProcessor
|
|
await processAgentResponse(
|
|
summaryResponse.content || '',
|
|
session,
|
|
this.dbManager,
|
|
this.sessionManager,
|
|
worker,
|
|
tokensUsed,
|
|
originalTimestamp,
|
|
'OpenRouter',
|
|
lastCwd
|
|
);
|
|
}
|
|
}
|
|
|
|
// Mark session complete
|
|
const sessionDuration = Date.now() - session.startTime;
|
|
logger.success('SDK', 'OpenRouter agent completed', {
|
|
sessionId: session.sessionDbId,
|
|
duration: `${(sessionDuration / 1000).toFixed(1)}s`,
|
|
historyLength: session.conversationHistory.length,
|
|
model
|
|
});
|
|
|
|
} catch (error: unknown) {
|
|
if (isAbortError(error)) {
|
|
logger.warn('SDK', 'OpenRouter agent aborted', { sessionId: session.sessionDbId });
|
|
throw error;
|
|
}
|
|
|
|
// Check if we should fall back to Claude
|
|
if (shouldFallbackToClaude(error) && this.fallbackAgent) {
|
|
logger.warn('SDK', 'OpenRouter API failed, falling back to Claude SDK', {
|
|
sessionDbId: session.sessionDbId,
|
|
error: error instanceof Error ? error.message : String(error),
|
|
historyLength: session.conversationHistory.length
|
|
});
|
|
|
|
// Fall back to Claude - it will use the same session with shared conversationHistory
|
|
// Note: With claim-and-delete queue pattern, messages are already deleted on claim
|
|
return this.fallbackAgent.startSession(session, worker);
|
|
}
|
|
|
|
logger.failure('SDK', 'OpenRouter agent error', { sessionDbId: session.sessionDbId }, error as Error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Estimate token count from text (conservative estimate)
|
|
*/
|
|
private estimateTokens(text: string): number {
|
|
return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE);
|
|
}
|
|
|
|
/**
|
|
* Truncate conversation history to prevent runaway context costs
|
|
* Keeps most recent messages within token budget
|
|
*/
|
|
private truncateHistory(history: ConversationMessage[]): ConversationMessage[] {
|
|
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
|
|
|
|
const MAX_CONTEXT_MESSAGES = parseInt(settings.CLAUDE_MEM_OPENROUTER_MAX_CONTEXT_MESSAGES) || DEFAULT_MAX_CONTEXT_MESSAGES;
|
|
const MAX_ESTIMATED_TOKENS = parseInt(settings.CLAUDE_MEM_OPENROUTER_MAX_TOKENS) || DEFAULT_MAX_ESTIMATED_TOKENS;
|
|
|
|
if (history.length <= MAX_CONTEXT_MESSAGES) {
|
|
// Check token count even if message count is ok
|
|
const totalTokens = history.reduce((sum, m) => sum + this.estimateTokens(m.content), 0);
|
|
if (totalTokens <= MAX_ESTIMATED_TOKENS) {
|
|
return history;
|
|
}
|
|
}
|
|
|
|
// Sliding window: keep most recent messages within limits
|
|
const truncated: ConversationMessage[] = [];
|
|
let tokenCount = 0;
|
|
|
|
// Process messages in reverse (most recent first)
|
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
const msg = history[i];
|
|
const msgTokens = this.estimateTokens(msg.content);
|
|
|
|
if (truncated.length >= MAX_CONTEXT_MESSAGES || tokenCount + msgTokens > MAX_ESTIMATED_TOKENS) {
|
|
logger.warn('SDK', 'Context window truncated to prevent runaway costs', {
|
|
originalMessages: history.length,
|
|
keptMessages: truncated.length,
|
|
droppedMessages: i + 1,
|
|
estimatedTokens: tokenCount,
|
|
tokenLimit: MAX_ESTIMATED_TOKENS
|
|
});
|
|
break;
|
|
}
|
|
|
|
truncated.unshift(msg); // Add to beginning
|
|
tokenCount += msgTokens;
|
|
}
|
|
|
|
return truncated;
|
|
}
|
|
|
|
/**
|
|
* Convert shared ConversationMessage array to OpenAI-compatible message format
|
|
*/
|
|
private conversationToOpenAIMessages(history: ConversationMessage[]): OpenAIMessage[] {
|
|
return history.map(msg => ({
|
|
role: msg.role === 'assistant' ? 'assistant' : 'user',
|
|
content: msg.content
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Query OpenRouter via REST API with full conversation history (multi-turn)
|
|
* Sends the entire conversation context for coherent responses
|
|
*/
|
|
private async queryOpenRouterMultiTurn(
|
|
history: ConversationMessage[],
|
|
apiKey: string,
|
|
model: string,
|
|
siteUrl?: string,
|
|
appName?: string
|
|
): Promise<{ content: string; tokensUsed?: number }> {
|
|
// Truncate history to prevent runaway costs
|
|
const truncatedHistory = this.truncateHistory(history);
|
|
const messages = this.conversationToOpenAIMessages(truncatedHistory);
|
|
const totalChars = truncatedHistory.reduce((sum, m) => sum + m.content.length, 0);
|
|
const estimatedTokens = this.estimateTokens(truncatedHistory.map(m => m.content).join(''));
|
|
|
|
logger.debug('SDK', `Querying OpenRouter multi-turn (${model})`, {
|
|
turns: truncatedHistory.length,
|
|
totalChars,
|
|
estimatedTokens
|
|
});
|
|
|
|
const response = await fetch(OPENROUTER_API_URL, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `Bearer ${apiKey}`,
|
|
'HTTP-Referer': siteUrl || 'https://github.com/thedotmack/claude-mem',
|
|
'X-Title': appName || 'claude-mem',
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
model,
|
|
messages,
|
|
temperature: 0.3, // Lower temperature for structured extraction
|
|
max_tokens: 4096,
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text();
|
|
throw new Error(`OpenRouter API error: ${response.status} - ${errorText}`);
|
|
}
|
|
|
|
const data = await response.json() as OpenRouterResponse;
|
|
|
|
// Check for API error in response body
|
|
if (data.error) {
|
|
throw new Error(`OpenRouter API error: ${data.error.code} - ${data.error.message}`);
|
|
}
|
|
|
|
if (!data.choices?.[0]?.message?.content) {
|
|
logger.error('SDK', 'Empty response from OpenRouter');
|
|
return { content: '' };
|
|
}
|
|
|
|
const content = data.choices[0].message.content;
|
|
const tokensUsed = data.usage?.total_tokens;
|
|
|
|
// Log actual token usage for cost tracking
|
|
if (tokensUsed) {
|
|
const inputTokens = data.usage?.prompt_tokens || 0;
|
|
const outputTokens = data.usage?.completion_tokens || 0;
|
|
// Token usage (cost varies by model - many OpenRouter models are free)
|
|
const estimatedCost = (inputTokens / 1000000 * 3) + (outputTokens / 1000000 * 15);
|
|
|
|
logger.info('SDK', 'OpenRouter API usage', {
|
|
model,
|
|
inputTokens,
|
|
outputTokens,
|
|
totalTokens: tokensUsed,
|
|
estimatedCostUSD: estimatedCost.toFixed(4),
|
|
messagesInContext: truncatedHistory.length
|
|
});
|
|
|
|
// Warn if costs are getting high
|
|
if (tokensUsed > 50000) {
|
|
logger.warn('SDK', 'High token usage detected - consider reducing context', {
|
|
totalTokens: tokensUsed,
|
|
estimatedCost: estimatedCost.toFixed(4)
|
|
});
|
|
}
|
|
}
|
|
|
|
return { content, tokensUsed };
|
|
}
|
|
|
|
/**
|
|
* Get OpenRouter configuration from settings or environment
|
|
* Issue #733: Uses centralized ~/.claude-mem/.env for credentials, not random project .env files
|
|
*/
|
|
private getOpenRouterConfig(): { apiKey: string; model: string; siteUrl?: string; appName?: string } {
|
|
const settingsPath = USER_SETTINGS_PATH;
|
|
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
|
|
|
// API key: check settings first, then centralized claude-mem .env (NOT process.env)
|
|
// This prevents Issue #733 where random project .env files could interfere
|
|
const apiKey = settings.CLAUDE_MEM_OPENROUTER_API_KEY || getCredential('OPENROUTER_API_KEY') || '';
|
|
|
|
// Model: from settings or default
|
|
const model = settings.CLAUDE_MEM_OPENROUTER_MODEL || 'xiaomi/mimo-v2-flash:free';
|
|
|
|
// Optional analytics headers
|
|
const siteUrl = settings.CLAUDE_MEM_OPENROUTER_SITE_URL || '';
|
|
const appName = settings.CLAUDE_MEM_OPENROUTER_APP_NAME || 'claude-mem';
|
|
|
|
return { apiKey, model, siteUrl, appName };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if OpenRouter is available (has API key configured)
|
|
* Issue #733: Uses centralized ~/.claude-mem/.env, not random project .env files
|
|
*/
|
|
export function isOpenRouterAvailable(): boolean {
|
|
const settingsPath = USER_SETTINGS_PATH;
|
|
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
|
return !!(settings.CLAUDE_MEM_OPENROUTER_API_KEY || getCredential('OPENROUTER_API_KEY'));
|
|
}
|
|
|
|
/**
|
|
* Check if OpenRouter is the selected provider
|
|
*/
|
|
export function isOpenRouterSelected(): boolean {
|
|
const settingsPath = USER_SETTINGS_PATH;
|
|
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
|
return settings.CLAUDE_MEM_PROVIDER === 'openrouter';
|
|
}
|