Files
claude-mem/src/services/worker/GeminiAgent.ts
T
Alex Newman d2e926fbf7 fix: post-merge breakage (Gemini, idle timeout, sharp cache) (#1138)
* fix: add gemini-3-flash to validModels array

The model was defined in the type union and RPM limits but missing from
the runtime validModels array, causing silent fallback to gemini-2.5-flash.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: skip processing when Gemini returns empty observation response

Empty responses were silently consuming messages from the queue via
processAgentResponse. Now skips processing on empty content, leaving
the message in processing status for stale recovery.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: prevent idle timeout from triggering infinite restart loop

When a session hits the 3-minute idle timeout, the finally block was
seeing stale processing messages and restarting the generator endlessly.
Now tracks idle timeout as a distinct exit reason via session flag,
resets stale messages, and skips restart.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: clear stale Bun native module cache on update

Bun's global cache retains sharp/libvips native binaries with broken
dylib references after version upgrades. Clear ~/.bun/install/cache/@img/
before install in both the end-user (smart-install) and dev (sync-marketplace)
paths to prevent ERR_DLOPEN_FAILED errors in Chroma sync.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: address PR review feedback (empty summary response, session-scoped reset, shell injection)

- Apply same empty-response guard to summary path as observation path in GeminiAgent
- Add optional sessionDbId param to resetStaleProcessingMessages for session-scoped resets
- Use JSON.stringify for gitignore pattern escaping, filter negation patterns

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 17:46:30 -05:00

472 lines
17 KiB
TypeScript

/**
* GeminiAgent: Gemini-based observation extraction
*
* Alternative to SDKAgent that uses Google's Gemini API directly
* for extracting observations from tool usage.
*
* Responsibility:
* - Call Gemini REST API for observation extraction
* - Parse XML responses (same format as Claude)
* - Sync to database and Chroma
*/
import path from 'path';
import { homedir } from 'os';
import { DatabaseManager } from './DatabaseManager.js';
import { SessionManager } from './SessionManager.js';
import { logger } from '../../utils/logger.js';
import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js';
import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
import { getCredential } from '../../shared/EnvManager.js';
import type { ActiveSession, ConversationMessage } from '../worker-types.js';
import { ModeManager } from '../domain/ModeManager.js';
import {
processAgentResponse,
shouldFallbackToClaude,
isAbortError,
type WorkerRef,
type FallbackAgent
} from './agents/index.js';
// Gemini API endpoint — use v1 (stable), not v1beta.
// v1beta does not support newer models like gemini-3-flash.
const GEMINI_API_URL = 'https://generativelanguage.googleapis.com/v1/models';
// Gemini model types (available via API)
export type GeminiModel =
| 'gemini-2.5-flash-lite'
| 'gemini-2.5-flash'
| 'gemini-2.5-pro'
| 'gemini-2.0-flash'
| 'gemini-2.0-flash-lite'
| 'gemini-3-flash'
| 'gemini-3-flash-preview';
// Free tier RPM limits by model (requests per minute)
const GEMINI_RPM_LIMITS: Record<GeminiModel, number> = {
'gemini-2.5-flash-lite': 10,
'gemini-2.5-flash': 10,
'gemini-2.5-pro': 5,
'gemini-2.0-flash': 15,
'gemini-2.0-flash-lite': 30,
'gemini-3-flash': 10,
'gemini-3-flash-preview': 5,
};
// Track last request time for rate limiting
let lastRequestTime = 0;
/**
* Enforce RPM rate limit for Gemini free tier.
* Waits the required time between requests based on model's RPM limit + 100ms safety buffer.
* Skipped entirely if rate limiting is disabled (billing users with 1000+ RPM available).
*/
async function enforceRateLimitForModel(model: GeminiModel, rateLimitingEnabled: boolean): Promise<void> {
// Skip rate limiting if disabled (billing users with 1000+ RPM)
if (!rateLimitingEnabled) {
return;
}
const rpm = GEMINI_RPM_LIMITS[model] || 5;
const minimumDelayMs = Math.ceil(60000 / rpm) + 100; // (60s / RPM) + 100ms safety buffer
const now = Date.now();
const timeSinceLastRequest = now - lastRequestTime;
if (timeSinceLastRequest < minimumDelayMs) {
const waitTime = minimumDelayMs - timeSinceLastRequest;
logger.debug('SDK', `Rate limiting: waiting ${waitTime}ms before Gemini request`, { model, rpm });
await new Promise(resolve => setTimeout(resolve, waitTime));
}
lastRequestTime = Date.now();
}
interface GeminiResponse {
candidates?: Array<{
content?: {
parts?: Array<{
text?: string;
}>;
};
}>;
usageMetadata?: {
promptTokenCount?: number;
candidatesTokenCount?: number;
totalTokenCount?: number;
};
}
/**
* Gemini content message format
* role: "user" or "model" (Gemini uses "model" not "assistant")
*/
interface GeminiContent {
role: 'user' | 'model';
parts: Array<{ text: string }>;
}
export class GeminiAgent {
private dbManager: DatabaseManager;
private sessionManager: SessionManager;
private fallbackAgent: FallbackAgent | null = null;
constructor(dbManager: DatabaseManager, sessionManager: SessionManager) {
this.dbManager = dbManager;
this.sessionManager = sessionManager;
}
/**
* Set the fallback agent (Claude SDK) for when Gemini API fails
* Must be set after construction to avoid circular dependency
*/
setFallbackAgent(agent: FallbackAgent): void {
this.fallbackAgent = agent;
}
/**
* Start Gemini agent for a session
* Uses multi-turn conversation to maintain context across messages
*/
async startSession(session: ActiveSession, worker?: WorkerRef): Promise<void> {
try {
// Get Gemini configuration
const { apiKey, model, rateLimitingEnabled } = this.getGeminiConfig();
if (!apiKey) {
throw new Error('Gemini API key not configured. Set CLAUDE_MEM_GEMINI_API_KEY in settings or GEMINI_API_KEY environment variable.');
}
// Generate synthetic memorySessionId (Gemini is stateless, doesn't return session IDs)
if (!session.memorySessionId) {
const syntheticMemorySessionId = `gemini-${session.contentSessionId}-${Date.now()}`;
session.memorySessionId = syntheticMemorySessionId;
this.dbManager.getSessionStore().updateMemorySessionId(session.sessionDbId, syntheticMemorySessionId);
logger.info('SESSION', `MEMORY_ID_GENERATED | sessionDbId=${session.sessionDbId} | provider=Gemini`);
}
// Load active mode
const mode = ModeManager.getInstance().getActiveMode();
// Build initial prompt
const initPrompt = session.lastPromptNumber === 1
? buildInitPrompt(session.project, session.contentSessionId, session.userPrompt, mode)
: buildContinuationPrompt(session.userPrompt, session.lastPromptNumber, session.contentSessionId, mode);
// Add to conversation history and query Gemini with full context
session.conversationHistory.push({ role: 'user', content: initPrompt });
const initResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
if (initResponse.content) {
// Add response to conversation history
session.conversationHistory.push({ role: 'assistant', content: initResponse.content });
// Track token usage
const tokensUsed = initResponse.tokensUsed || 0;
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7); // Rough estimate
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
// Process response using shared ResponseProcessor (no original timestamp for init - not from queue)
await processAgentResponse(
initResponse.content,
session,
this.dbManager,
this.sessionManager,
worker,
tokensUsed,
null,
'Gemini'
);
} else {
logger.error('SDK', 'Empty Gemini init response - session may lack context', {
sessionId: session.sessionDbId,
model
});
}
// Process pending messages
// Track cwd from messages for CLAUDE.md generation
let lastCwd: string | undefined;
for await (const message of this.sessionManager.getMessageIterator(session.sessionDbId)) {
// CLAIM-CONFIRM: Track message ID for confirmProcessed() after successful storage
// The message is now in 'processing' status in DB until ResponseProcessor calls confirmProcessed()
session.processingMessageIds.push(message._persistentId);
// Capture cwd from each message for worktree support
if (message.cwd) {
lastCwd = message.cwd;
}
// Capture earliest timestamp BEFORE processing (will be cleared after)
// This ensures backlog messages get their original timestamps, not current time
const originalTimestamp = session.earliestPendingTimestamp;
if (message.type === 'observation') {
// Update last prompt number
if (message.prompt_number !== undefined) {
session.lastPromptNumber = message.prompt_number;
}
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
// This prevents wasting tokens when we won't be able to store the result anyway
if (!session.memorySessionId) {
throw new Error('Cannot process observations: memorySessionId not yet captured. This session may need to be reinitialized.');
}
// Build observation prompt
const obsPrompt = buildObservationPrompt({
id: 0,
tool_name: message.tool_name!,
tool_input: JSON.stringify(message.tool_input),
tool_output: JSON.stringify(message.tool_response),
created_at_epoch: originalTimestamp ?? Date.now(),
cwd: message.cwd
});
// Add to conversation history and query Gemini with full context
session.conversationHistory.push({ role: 'user', content: obsPrompt });
const obsResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
let tokensUsed = 0;
if (obsResponse.content) {
// Add response to conversation history
session.conversationHistory.push({ role: 'assistant', content: obsResponse.content });
tokensUsed = obsResponse.tokensUsed || 0;
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7);
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
}
// Process response using shared ResponseProcessor
if (obsResponse.content) {
await processAgentResponse(
obsResponse.content,
session,
this.dbManager,
this.sessionManager,
worker,
tokensUsed,
originalTimestamp,
'Gemini',
lastCwd
);
} else {
logger.warn('SDK', 'Empty Gemini observation response, skipping processing to preserve message', {
sessionId: session.sessionDbId,
messageId: session.processingMessageIds[session.processingMessageIds.length - 1]
});
// Don't confirm - leave message for stale recovery
}
} else if (message.type === 'summarize') {
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
if (!session.memorySessionId) {
throw new Error('Cannot process summary: memorySessionId not yet captured. This session may need to be reinitialized.');
}
// Build summary prompt
const summaryPrompt = buildSummaryPrompt({
id: session.sessionDbId,
memory_session_id: session.memorySessionId,
project: session.project,
user_prompt: session.userPrompt,
last_assistant_message: message.last_assistant_message || ''
}, mode);
// Add to conversation history and query Gemini with full context
session.conversationHistory.push({ role: 'user', content: summaryPrompt });
const summaryResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
let tokensUsed = 0;
if (summaryResponse.content) {
// Add response to conversation history
session.conversationHistory.push({ role: 'assistant', content: summaryResponse.content });
tokensUsed = summaryResponse.tokensUsed || 0;
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7);
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
}
// Process response using shared ResponseProcessor
if (summaryResponse.content) {
await processAgentResponse(
summaryResponse.content,
session,
this.dbManager,
this.sessionManager,
worker,
tokensUsed,
originalTimestamp,
'Gemini',
lastCwd
);
} else {
logger.warn('SDK', 'Empty Gemini summary response, skipping processing to preserve message', {
sessionId: session.sessionDbId,
messageId: session.processingMessageIds[session.processingMessageIds.length - 1]
});
// Don't confirm - leave message for stale recovery
}
}
}
// Mark session complete
const sessionDuration = Date.now() - session.startTime;
logger.success('SDK', 'Gemini agent completed', {
sessionId: session.sessionDbId,
duration: `${(sessionDuration / 1000).toFixed(1)}s`,
historyLength: session.conversationHistory.length
});
} catch (error: unknown) {
if (isAbortError(error)) {
logger.warn('SDK', 'Gemini agent aborted', { sessionId: session.sessionDbId });
throw error;
}
// Check if we should fall back to Claude
if (shouldFallbackToClaude(error) && this.fallbackAgent) {
logger.warn('SDK', 'Gemini API failed, falling back to Claude SDK', {
sessionDbId: session.sessionDbId,
error: error instanceof Error ? error.message : String(error),
historyLength: session.conversationHistory.length
});
// Fall back to Claude - it will use the same session with shared conversationHistory
// Note: With claim-and-delete queue pattern, messages are already deleted on claim
return this.fallbackAgent.startSession(session, worker);
}
logger.failure('SDK', 'Gemini agent error', { sessionDbId: session.sessionDbId }, error as Error);
throw error;
}
}
/**
* Convert shared ConversationMessage array to Gemini's contents format
* Maps 'assistant' role to 'model' for Gemini API compatibility
*/
private conversationToGeminiContents(history: ConversationMessage[]): GeminiContent[] {
return history.map(msg => ({
role: msg.role === 'assistant' ? 'model' : 'user',
parts: [{ text: msg.content }]
}));
}
/**
* Query Gemini via REST API with full conversation history (multi-turn)
* Sends the entire conversation context for coherent responses
*/
private async queryGeminiMultiTurn(
history: ConversationMessage[],
apiKey: string,
model: GeminiModel,
rateLimitingEnabled: boolean
): Promise<{ content: string; tokensUsed?: number }> {
const contents = this.conversationToGeminiContents(history);
const totalChars = history.reduce((sum, m) => sum + m.content.length, 0);
logger.debug('SDK', `Querying Gemini multi-turn (${model})`, {
turns: history.length,
totalChars
});
const url = `${GEMINI_API_URL}/${model}:generateContent?key=${apiKey}`;
// Enforce RPM rate limit for free tier (skipped if rate limiting disabled)
await enforceRateLimitForModel(model, rateLimitingEnabled);
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
contents,
generationConfig: {
temperature: 0.3, // Lower temperature for structured extraction
maxOutputTokens: 4096,
},
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Gemini API error: ${response.status} - ${error}`);
}
const data = await response.json() as GeminiResponse;
if (!data.candidates?.[0]?.content?.parts?.[0]?.text) {
logger.error('SDK', 'Empty response from Gemini');
return { content: '' };
}
const content = data.candidates[0].content.parts[0].text;
const tokensUsed = data.usageMetadata?.totalTokenCount;
return { content, tokensUsed };
}
/**
* Get Gemini configuration from settings or environment
* Issue #733: Uses centralized ~/.claude-mem/.env for credentials, not random project .env files
*/
private getGeminiConfig(): { apiKey: string; model: GeminiModel; rateLimitingEnabled: boolean } {
const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json');
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
// API key: check settings first, then centralized claude-mem .env (NOT process.env)
// This prevents Issue #733 where random project .env files could interfere
const apiKey = settings.CLAUDE_MEM_GEMINI_API_KEY || getCredential('GEMINI_API_KEY') || '';
// Model: from settings or default, with validation
const defaultModel: GeminiModel = 'gemini-2.5-flash';
const configuredModel = settings.CLAUDE_MEM_GEMINI_MODEL || defaultModel;
const validModels: GeminiModel[] = [
'gemini-2.5-flash-lite',
'gemini-2.5-flash',
'gemini-2.5-pro',
'gemini-2.0-flash',
'gemini-2.0-flash-lite',
'gemini-3-flash',
'gemini-3-flash-preview',
];
let model: GeminiModel;
if (validModels.includes(configuredModel as GeminiModel)) {
model = configuredModel as GeminiModel;
} else {
logger.warn('SDK', `Invalid Gemini model "${configuredModel}", falling back to ${defaultModel}`, {
configured: configuredModel,
validModels,
});
model = defaultModel;
}
// Rate limiting: enabled by default for free tier users
const rateLimitingEnabled = settings.CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED !== 'false';
return { apiKey, model, rateLimitingEnabled };
}
}
/**
* Check if Gemini is available (has API key configured)
* Issue #733: Uses centralized ~/.claude-mem/.env, not random project .env files
*/
export function isGeminiAvailable(): boolean {
const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json');
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
return !!(settings.CLAUDE_MEM_GEMINI_API_KEY || getCredential('GEMINI_API_KEY'));
}
/**
* Check if Gemini is the selected provider
*/
export function isGeminiSelected(): boolean {
const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json');
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
return settings.CLAUDE_MEM_PROVIDER === 'gemini';
}