/** * GeminiAgent: Gemini-based observation extraction * * Alternative to SDKAgent that uses Google's Gemini API directly * for extracting observations from tool usage. * * Responsibility: * - Call Gemini REST API for observation extraction * - Parse XML responses (same format as Claude) * - Sync to database and Chroma */ import path from 'path'; import { homedir } from 'os'; import { DatabaseManager } from './DatabaseManager.js'; import { SessionManager } from './SessionManager.js'; import { logger } from '../../utils/logger.js'; import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js'; import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js'; import { getCredential } from '../../shared/EnvManager.js'; import { USER_SETTINGS_PATH } from '../../shared/paths.js'; import { estimateTokens } from '../../shared/timeline-formatting.js'; import type { ActiveSession, ConversationMessage } from '../worker-types.js'; import { ModeManager } from '../domain/ModeManager.js'; import { processAgentResponse, shouldFallbackToClaude, isAbortError, type WorkerRef, type FallbackAgent } from './agents/index.js'; // Gemini API endpoint — use v1 (stable), not v1beta. // v1beta does not support newer models like gemini-3-flash. const GEMINI_API_URL = 'https://generativelanguage.googleapis.com/v1/models'; // Gemini model types (available via API) export type GeminiModel = | 'gemini-2.5-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-pro' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-3-flash' | 'gemini-3-flash-preview'; // Free tier RPM limits by model (requests per minute) const GEMINI_RPM_LIMITS: Record = { 'gemini-2.5-flash-lite': 10, 'gemini-2.5-flash': 10, 'gemini-2.5-pro': 5, 'gemini-2.0-flash': 15, 'gemini-2.0-flash-lite': 30, 'gemini-3-flash': 10, 'gemini-3-flash-preview': 5, }; // Track last request time for rate limiting let lastRequestTime = 0; // Context window limits (prevents O(N²) token cost growth) const DEFAULT_MAX_CONTEXT_MESSAGES = 20; // Maximum messages to keep in conversation history const DEFAULT_MAX_ESTIMATED_TOKENS = 100000; // ~100k tokens max context (safety limit) /** * Enforce RPM rate limit for Gemini free tier. * Waits the required time between requests based on model's RPM limit + 100ms safety buffer. * Skipped entirely if rate limiting is disabled (billing users with 1000+ RPM available). */ async function enforceRateLimitForModel(model: GeminiModel, rateLimitingEnabled: boolean): Promise { // Skip rate limiting if disabled (billing users with 1000+ RPM) if (!rateLimitingEnabled) { return; } const rpm = GEMINI_RPM_LIMITS[model] || 5; const minimumDelayMs = Math.ceil(60000 / rpm) + 100; // (60s / RPM) + 100ms safety buffer const now = Date.now(); const timeSinceLastRequest = now - lastRequestTime; if (timeSinceLastRequest < minimumDelayMs) { const waitTime = minimumDelayMs - timeSinceLastRequest; logger.debug('SDK', `Rate limiting: waiting ${waitTime}ms before Gemini request`, { model, rpm }); await new Promise(resolve => setTimeout(resolve, waitTime)); } lastRequestTime = Date.now(); } interface GeminiResponse { candidates?: Array<{ content?: { parts?: Array<{ text?: string; }>; }; }>; usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; totalTokenCount?: number; }; } /** * Gemini content message format * role: "user" or "model" (Gemini uses "model" not "assistant") */ interface GeminiContent { role: 'user' | 'model'; parts: Array<{ text: string }>; } export class GeminiAgent { private dbManager: DatabaseManager; private sessionManager: SessionManager; private fallbackAgent: FallbackAgent | null = null; constructor(dbManager: DatabaseManager, sessionManager: SessionManager) { this.dbManager = dbManager; this.sessionManager = sessionManager; } /** * Set the fallback agent (Claude SDK) for when Gemini API fails * Must be set after construction to avoid circular dependency */ setFallbackAgent(agent: FallbackAgent): void { this.fallbackAgent = agent; } /** * Start Gemini agent for a session * Uses multi-turn conversation to maintain context across messages */ async startSession(session: ActiveSession, worker?: WorkerRef): Promise { try { // Get Gemini configuration const { apiKey, model, rateLimitingEnabled } = this.getGeminiConfig(); if (!apiKey) { throw new Error('Gemini API key not configured. Set CLAUDE_MEM_GEMINI_API_KEY in settings or GEMINI_API_KEY environment variable.'); } // Generate synthetic memorySessionId (Gemini is stateless, doesn't return session IDs) if (!session.memorySessionId) { const syntheticMemorySessionId = `gemini-${session.contentSessionId}-${Date.now()}`; session.memorySessionId = syntheticMemorySessionId; this.dbManager.getSessionStore().updateMemorySessionId(session.sessionDbId, syntheticMemorySessionId); logger.info('SESSION', `MEMORY_ID_GENERATED | sessionDbId=${session.sessionDbId} | provider=Gemini`); } // Load active mode const mode = ModeManager.getInstance().getActiveMode(); // Build initial prompt const initPrompt = session.lastPromptNumber === 1 ? buildInitPrompt(session.project, session.contentSessionId, session.userPrompt, mode) : buildContinuationPrompt(session.userPrompt, session.lastPromptNumber, session.contentSessionId, mode); // Add to conversation history and query Gemini with full context session.conversationHistory.push({ role: 'user', content: initPrompt }); const initResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled); if (initResponse.content) { // Add response to conversation history session.conversationHistory.push({ role: 'assistant', content: initResponse.content }); // Track token usage const tokensUsed = initResponse.tokensUsed || 0; session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7); // Rough estimate session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3); // Process response using shared ResponseProcessor (no original timestamp for init - not from queue) await processAgentResponse( initResponse.content, session, this.dbManager, this.sessionManager, worker, tokensUsed, null, 'Gemini', undefined, model ); } else { logger.error('SDK', 'Empty Gemini init response - session may lack context', { sessionId: session.sessionDbId, model }); } // Process pending messages // Track cwd from messages for CLAUDE.md generation let lastCwd: string | undefined; for await (const message of this.sessionManager.getMessageIterator(session.sessionDbId)) { // CLAIM-CONFIRM: Track message ID for confirmProcessed() after successful storage // The message is now in 'processing' status in DB until ResponseProcessor calls confirmProcessed() session.processingMessageIds.push(message._persistentId); // Capture cwd from each message for worktree support if (message.cwd) { lastCwd = message.cwd; } // Capture earliest timestamp BEFORE processing (will be cleared after) // This ensures backlog messages get their original timestamps, not current time const originalTimestamp = session.earliestPendingTimestamp; if (message.type === 'observation') { // Update last prompt number if (message.prompt_number !== undefined) { session.lastPromptNumber = message.prompt_number; } // CRITICAL: Check memorySessionId BEFORE making expensive LLM call // This prevents wasting tokens when we won't be able to store the result anyway if (!session.memorySessionId) { throw new Error('Cannot process observations: memorySessionId not yet captured. This session may need to be reinitialized.'); } // Build observation prompt const obsPrompt = buildObservationPrompt({ id: 0, tool_name: message.tool_name!, tool_input: JSON.stringify(message.tool_input), tool_output: JSON.stringify(message.tool_response), created_at_epoch: originalTimestamp ?? Date.now(), cwd: message.cwd }); // Add to conversation history and query Gemini with full context session.conversationHistory.push({ role: 'user', content: obsPrompt }); const obsResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled); let tokensUsed = 0; if (obsResponse.content) { // Add response to conversation history session.conversationHistory.push({ role: 'assistant', content: obsResponse.content }); tokensUsed = obsResponse.tokensUsed || 0; session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7); session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3); } // Process response using shared ResponseProcessor if (obsResponse.content) { await processAgentResponse( obsResponse.content, session, this.dbManager, this.sessionManager, worker, tokensUsed, originalTimestamp, 'Gemini', lastCwd, model ); } else { logger.warn('SDK', 'Empty Gemini observation response, skipping processing to preserve message', { sessionId: session.sessionDbId, messageId: session.processingMessageIds[session.processingMessageIds.length - 1] }); // Don't confirm - leave message for stale recovery } } else if (message.type === 'summarize') { // CRITICAL: Check memorySessionId BEFORE making expensive LLM call if (!session.memorySessionId) { throw new Error('Cannot process summary: memorySessionId not yet captured. This session may need to be reinitialized.'); } // Build summary prompt const summaryPrompt = buildSummaryPrompt({ id: session.sessionDbId, memory_session_id: session.memorySessionId, project: session.project, user_prompt: session.userPrompt, last_assistant_message: message.last_assistant_message || '' }, mode); // Add to conversation history and query Gemini with full context session.conversationHistory.push({ role: 'user', content: summaryPrompt }); const summaryResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled); let tokensUsed = 0; if (summaryResponse.content) { // Add response to conversation history session.conversationHistory.push({ role: 'assistant', content: summaryResponse.content }); tokensUsed = summaryResponse.tokensUsed || 0; session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7); session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3); } // Process response using shared ResponseProcessor if (summaryResponse.content) { await processAgentResponse( summaryResponse.content, session, this.dbManager, this.sessionManager, worker, tokensUsed, originalTimestamp, 'Gemini', lastCwd, model ); } else { logger.warn('SDK', 'Empty Gemini summary response, skipping processing to preserve message', { sessionId: session.sessionDbId, messageId: session.processingMessageIds[session.processingMessageIds.length - 1] }); // Don't confirm - leave message for stale recovery } } } // Mark session complete const sessionDuration = Date.now() - session.startTime; logger.success('SDK', 'Gemini agent completed', { sessionId: session.sessionDbId, duration: `${(sessionDuration / 1000).toFixed(1)}s`, historyLength: session.conversationHistory.length }); } catch (error: unknown) { if (isAbortError(error)) { logger.warn('SDK', 'Gemini agent aborted', { sessionId: session.sessionDbId }); throw error; } // Check if we should fall back to Claude if (shouldFallbackToClaude(error) && this.fallbackAgent) { logger.warn('SDK', 'Gemini API failed, falling back to Claude SDK', { sessionDbId: session.sessionDbId, error: error instanceof Error ? error.message : String(error), historyLength: session.conversationHistory.length }); // Fall back to Claude - it will use the same session with shared conversationHistory // Note: With claim-and-delete queue pattern, messages are already deleted on claim return this.fallbackAgent.startSession(session, worker); } logger.failure('SDK', 'Gemini agent error', { sessionDbId: session.sessionDbId }, error as Error); throw error; } } /** * Truncate conversation history to prevent runaway context costs. * Keeps most recent messages within both message count and token budget. * Returns a new array — never mutates the original history. */ private truncateHistory(history: ConversationMessage[]): ConversationMessage[] { const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH); const MAX_CONTEXT_MESSAGES = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) || DEFAULT_MAX_CONTEXT_MESSAGES; const MAX_ESTIMATED_TOKENS = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) || DEFAULT_MAX_ESTIMATED_TOKENS; if (history.length <= MAX_CONTEXT_MESSAGES) { // Check token count even if message count is ok const totalTokens = history.reduce((sum, m) => sum + estimateTokens(m.content), 0); if (totalTokens <= MAX_ESTIMATED_TOKENS) { return history; } } // Sliding window: keep most recent messages within limits const truncated: ConversationMessage[] = []; let tokenCount = 0; // Process messages in reverse (most recent first) for (let i = history.length - 1; i >= 0; i--) { const msg = history[i]; const msgTokens = estimateTokens(msg.content); // Always include at least the newest message — an empty contents array // would cause a hard Gemini API error, which is worse than an oversized request. if (truncated.length > 0 && (truncated.length >= MAX_CONTEXT_MESSAGES || tokenCount + msgTokens > MAX_ESTIMATED_TOKENS)) { logger.warn('SDK', 'Context window truncated to prevent runaway costs', { originalMessages: history.length, keptMessages: truncated.length, droppedMessages: i + 1, estimatedTokens: tokenCount, tokenLimit: MAX_ESTIMATED_TOKENS }); break; } truncated.unshift(msg); // Add to beginning tokenCount += msgTokens; } return truncated; } /** * Convert shared ConversationMessage array to Gemini's contents format * Maps 'assistant' role to 'model' for Gemini API compatibility */ private conversationToGeminiContents(history: ConversationMessage[]): GeminiContent[] { return history.map(msg => ({ role: msg.role === 'assistant' ? 'model' : 'user', parts: [{ text: msg.content }] })); } /** * Query Gemini via REST API with truncated conversation history (multi-turn) * Truncates history to prevent O(N²) token cost growth, then sends for coherent responses */ private async queryGeminiMultiTurn( history: ConversationMessage[], apiKey: string, model: GeminiModel, rateLimitingEnabled: boolean ): Promise<{ content: string; tokensUsed?: number }> { const truncatedHistory = this.truncateHistory(history); const contents = this.conversationToGeminiContents(truncatedHistory); const totalChars = truncatedHistory.reduce((sum, m) => sum + m.content.length, 0); logger.debug('SDK', `Querying Gemini multi-turn (${model})`, { turns: truncatedHistory.length, totalTurns: history.length, totalChars }); const url = `${GEMINI_API_URL}/${model}:generateContent?key=${apiKey}`; // Enforce RPM rate limit for free tier (skipped if rate limiting disabled) await enforceRateLimitForModel(model, rateLimitingEnabled); const response = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ contents, generationConfig: { temperature: 0.3, // Lower temperature for structured extraction maxOutputTokens: 4096, }, }), }); if (!response.ok) { const error = await response.text(); throw new Error(`Gemini API error: ${response.status} - ${error}`); } const data = await response.json() as GeminiResponse; if (!data.candidates?.[0]?.content?.parts?.[0]?.text) { logger.error('SDK', 'Empty response from Gemini'); return { content: '' }; } const content = data.candidates[0].content.parts[0].text; const tokensUsed = data.usageMetadata?.totalTokenCount; return { content, tokensUsed }; } /** * Get Gemini configuration from settings or environment * Issue #733: Uses centralized ~/.claude-mem/.env for credentials, not random project .env files */ private getGeminiConfig(): { apiKey: string; model: GeminiModel; rateLimitingEnabled: boolean } { const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json'); const settings = SettingsDefaultsManager.loadFromFile(settingsPath); // API key: check settings first, then centralized claude-mem .env (NOT process.env) // This prevents Issue #733 where random project .env files could interfere const apiKey = settings.CLAUDE_MEM_GEMINI_API_KEY || getCredential('GEMINI_API_KEY') || ''; // Model: from settings or default, with validation const defaultModel: GeminiModel = 'gemini-2.5-flash'; const configuredModel = settings.CLAUDE_MEM_GEMINI_MODEL || defaultModel; const validModels: GeminiModel[] = [ 'gemini-2.5-flash-lite', 'gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.0-flash', 'gemini-2.0-flash-lite', 'gemini-3-flash', 'gemini-3-flash-preview', ]; let model: GeminiModel; if (validModels.includes(configuredModel as GeminiModel)) { model = configuredModel as GeminiModel; } else { logger.warn('SDK', `Invalid Gemini model "${configuredModel}", falling back to ${defaultModel}`, { configured: configuredModel, validModels, }); model = defaultModel; } // Rate limiting: enabled by default for free tier users const rateLimitingEnabled = settings.CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED !== 'false'; return { apiKey, model, rateLimitingEnabled }; } } /** * Check if Gemini is available (has API key configured) * Issue #733: Uses centralized ~/.claude-mem/.env, not random project .env files */ export function isGeminiAvailable(): boolean { const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json'); const settings = SettingsDefaultsManager.loadFromFile(settingsPath); return !!(settings.CLAUDE_MEM_GEMINI_API_KEY || getCredential('GEMINI_API_KEY')); } /** * Check if Gemini is the selected provider */ export function isGeminiSelected(): boolean { const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json'); const settings = SettingsDefaultsManager.loadFromFile(settingsPath); return settings.CLAUDE_MEM_PROVIDER === 'gemini'; }