c3cb8f81ed
# Conflicts: # plugin/scripts/context-generator.cjs # plugin/scripts/mcp-server.cjs # plugin/scripts/worker-service.cjs # plugin/ui/viewer-bundle.js
532 lines
20 KiB
TypeScript
532 lines
20 KiB
TypeScript
/**
|
|
* GeminiAgent: Gemini-based observation extraction
|
|
*
|
|
* Alternative to SDKAgent that uses Google's Gemini API directly
|
|
* for extracting observations from tool usage.
|
|
*
|
|
* Responsibility:
|
|
* - Call Gemini REST API for observation extraction
|
|
* - Parse XML responses (same format as Claude)
|
|
* - Sync to database and Chroma
|
|
*/
|
|
|
|
import path from 'path';
|
|
import { homedir } from 'os';
|
|
import { DatabaseManager } from './DatabaseManager.js';
|
|
import { SessionManager } from './SessionManager.js';
|
|
import { logger } from '../../utils/logger.js';
|
|
import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js';
|
|
import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
|
|
import { getCredential } from '../../shared/EnvManager.js';
|
|
import { USER_SETTINGS_PATH } from '../../shared/paths.js';
|
|
import { estimateTokens } from '../../shared/timeline-formatting.js';
|
|
import type { ActiveSession, ConversationMessage } from '../worker-types.js';
|
|
import { ModeManager } from '../domain/ModeManager.js';
|
|
import {
|
|
processAgentResponse,
|
|
shouldFallbackToClaude,
|
|
isAbortError,
|
|
type WorkerRef,
|
|
type FallbackAgent
|
|
} from './agents/index.js';
|
|
|
|
// Gemini API endpoint — use v1 (stable), not v1beta.
|
|
// v1beta does not support newer models like gemini-3-flash.
|
|
const GEMINI_API_URL = 'https://generativelanguage.googleapis.com/v1/models';
|
|
|
|
// Gemini model types (available via API)
|
|
export type GeminiModel =
|
|
| 'gemini-2.5-flash-lite'
|
|
| 'gemini-2.5-flash'
|
|
| 'gemini-2.5-pro'
|
|
| 'gemini-2.0-flash'
|
|
| 'gemini-2.0-flash-lite'
|
|
| 'gemini-3-flash'
|
|
| 'gemini-3-flash-preview';
|
|
|
|
// Free tier RPM limits by model (requests per minute)
|
|
const GEMINI_RPM_LIMITS: Record<GeminiModel, number> = {
|
|
'gemini-2.5-flash-lite': 10,
|
|
'gemini-2.5-flash': 10,
|
|
'gemini-2.5-pro': 5,
|
|
'gemini-2.0-flash': 15,
|
|
'gemini-2.0-flash-lite': 30,
|
|
'gemini-3-flash': 10,
|
|
'gemini-3-flash-preview': 5,
|
|
};
|
|
|
|
// Track last request time for rate limiting
|
|
let lastRequestTime = 0;
|
|
|
|
// Context window limits (prevents O(N²) token cost growth)
|
|
const DEFAULT_MAX_CONTEXT_MESSAGES = 20; // Maximum messages to keep in conversation history
|
|
const DEFAULT_MAX_ESTIMATED_TOKENS = 100000; // ~100k tokens max context (safety limit)
|
|
|
|
/**
|
|
* Enforce RPM rate limit for Gemini free tier.
|
|
* Waits the required time between requests based on model's RPM limit + 100ms safety buffer.
|
|
* Skipped entirely if rate limiting is disabled (billing users with 1000+ RPM available).
|
|
*/
|
|
async function enforceRateLimitForModel(model: GeminiModel, rateLimitingEnabled: boolean): Promise<void> {
|
|
// Skip rate limiting if disabled (billing users with 1000+ RPM)
|
|
if (!rateLimitingEnabled) {
|
|
return;
|
|
}
|
|
|
|
const rpm = GEMINI_RPM_LIMITS[model] || 5;
|
|
const minimumDelayMs = Math.ceil(60000 / rpm) + 100; // (60s / RPM) + 100ms safety buffer
|
|
|
|
const now = Date.now();
|
|
const timeSinceLastRequest = now - lastRequestTime;
|
|
|
|
if (timeSinceLastRequest < minimumDelayMs) {
|
|
const waitTime = minimumDelayMs - timeSinceLastRequest;
|
|
logger.debug('SDK', `Rate limiting: waiting ${waitTime}ms before Gemini request`, { model, rpm });
|
|
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
}
|
|
|
|
lastRequestTime = Date.now();
|
|
}
|
|
|
|
interface GeminiResponse {
|
|
candidates?: Array<{
|
|
content?: {
|
|
parts?: Array<{
|
|
text?: string;
|
|
}>;
|
|
};
|
|
}>;
|
|
usageMetadata?: {
|
|
promptTokenCount?: number;
|
|
candidatesTokenCount?: number;
|
|
totalTokenCount?: number;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Gemini content message format
|
|
* role: "user" or "model" (Gemini uses "model" not "assistant")
|
|
*/
|
|
interface GeminiContent {
|
|
role: 'user' | 'model';
|
|
parts: Array<{ text: string }>;
|
|
}
|
|
|
|
export class GeminiAgent {
|
|
private dbManager: DatabaseManager;
|
|
private sessionManager: SessionManager;
|
|
private fallbackAgent: FallbackAgent | null = null;
|
|
|
|
constructor(dbManager: DatabaseManager, sessionManager: SessionManager) {
|
|
this.dbManager = dbManager;
|
|
this.sessionManager = sessionManager;
|
|
}
|
|
|
|
/**
|
|
* Set the fallback agent (Claude SDK) for when Gemini API fails
|
|
* Must be set after construction to avoid circular dependency
|
|
*/
|
|
setFallbackAgent(agent: FallbackAgent): void {
|
|
this.fallbackAgent = agent;
|
|
}
|
|
|
|
/**
|
|
* Start Gemini agent for a session
|
|
* Uses multi-turn conversation to maintain context across messages
|
|
*/
|
|
async startSession(session: ActiveSession, worker?: WorkerRef): Promise<void> {
|
|
try {
|
|
// Get Gemini configuration
|
|
const { apiKey, model, rateLimitingEnabled } = this.getGeminiConfig();
|
|
|
|
if (!apiKey) {
|
|
throw new Error('Gemini API key not configured. Set CLAUDE_MEM_GEMINI_API_KEY in settings or GEMINI_API_KEY environment variable.');
|
|
}
|
|
|
|
// Generate synthetic memorySessionId (Gemini is stateless, doesn't return session IDs)
|
|
if (!session.memorySessionId) {
|
|
const syntheticMemorySessionId = `gemini-${session.contentSessionId}-${Date.now()}`;
|
|
session.memorySessionId = syntheticMemorySessionId;
|
|
this.dbManager.getSessionStore().updateMemorySessionId(session.sessionDbId, syntheticMemorySessionId);
|
|
logger.info('SESSION', `MEMORY_ID_GENERATED | sessionDbId=${session.sessionDbId} | provider=Gemini`);
|
|
}
|
|
|
|
// Load active mode
|
|
const mode = ModeManager.getInstance().getActiveMode();
|
|
|
|
// Build initial prompt
|
|
const initPrompt = session.lastPromptNumber === 1
|
|
? buildInitPrompt(session.project, session.contentSessionId, session.userPrompt, mode)
|
|
: buildContinuationPrompt(session.userPrompt, session.lastPromptNumber, session.contentSessionId, mode);
|
|
|
|
// Add to conversation history and query Gemini with full context
|
|
session.conversationHistory.push({ role: 'user', content: initPrompt });
|
|
const initResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
|
|
|
|
if (initResponse.content) {
|
|
// Add response to conversation history
|
|
session.conversationHistory.push({ role: 'assistant', content: initResponse.content });
|
|
|
|
// Track token usage
|
|
const tokensUsed = initResponse.tokensUsed || 0;
|
|
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7); // Rough estimate
|
|
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
|
|
|
|
// Process response using shared ResponseProcessor (no original timestamp for init - not from queue)
|
|
await processAgentResponse(
|
|
initResponse.content,
|
|
session,
|
|
this.dbManager,
|
|
this.sessionManager,
|
|
worker,
|
|
tokensUsed,
|
|
null,
|
|
'Gemini',
|
|
undefined,
|
|
model
|
|
);
|
|
} else {
|
|
logger.error('SDK', 'Empty Gemini init response - session may lack context', {
|
|
sessionId: session.sessionDbId,
|
|
model
|
|
});
|
|
}
|
|
|
|
// Process pending messages
|
|
// Track cwd from messages for CLAUDE.md generation
|
|
let lastCwd: string | undefined;
|
|
|
|
for await (const message of this.sessionManager.getMessageIterator(session.sessionDbId)) {
|
|
// CLAIM-CONFIRM: Track message ID for confirmProcessed() after successful storage
|
|
// The message is now in 'processing' status in DB until ResponseProcessor calls confirmProcessed()
|
|
session.processingMessageIds.push(message._persistentId);
|
|
|
|
// Capture cwd from each message for worktree support
|
|
if (message.cwd) {
|
|
lastCwd = message.cwd;
|
|
}
|
|
// Capture earliest timestamp BEFORE processing (will be cleared after)
|
|
// This ensures backlog messages get their original timestamps, not current time
|
|
const originalTimestamp = session.earliestPendingTimestamp;
|
|
|
|
if (message.type === 'observation') {
|
|
// Update last prompt number
|
|
if (message.prompt_number !== undefined) {
|
|
session.lastPromptNumber = message.prompt_number;
|
|
}
|
|
|
|
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
|
|
// This prevents wasting tokens when we won't be able to store the result anyway
|
|
if (!session.memorySessionId) {
|
|
throw new Error('Cannot process observations: memorySessionId not yet captured. This session may need to be reinitialized.');
|
|
}
|
|
|
|
// Build observation prompt
|
|
const obsPrompt = buildObservationPrompt({
|
|
id: 0,
|
|
tool_name: message.tool_name!,
|
|
tool_input: JSON.stringify(message.tool_input),
|
|
tool_output: JSON.stringify(message.tool_response),
|
|
created_at_epoch: originalTimestamp ?? Date.now(),
|
|
cwd: message.cwd
|
|
});
|
|
|
|
// Add to conversation history and query Gemini with full context
|
|
session.conversationHistory.push({ role: 'user', content: obsPrompt });
|
|
const obsResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
|
|
|
|
let tokensUsed = 0;
|
|
if (obsResponse.content) {
|
|
// Add response to conversation history
|
|
session.conversationHistory.push({ role: 'assistant', content: obsResponse.content });
|
|
|
|
tokensUsed = obsResponse.tokensUsed || 0;
|
|
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7);
|
|
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
|
|
}
|
|
|
|
// Process response using shared ResponseProcessor
|
|
if (obsResponse.content) {
|
|
await processAgentResponse(
|
|
obsResponse.content,
|
|
session,
|
|
this.dbManager,
|
|
this.sessionManager,
|
|
worker,
|
|
tokensUsed,
|
|
originalTimestamp,
|
|
'Gemini',
|
|
lastCwd,
|
|
model
|
|
);
|
|
} else {
|
|
logger.warn('SDK', 'Empty Gemini observation response, skipping processing to preserve message', {
|
|
sessionId: session.sessionDbId,
|
|
messageId: session.processingMessageIds[session.processingMessageIds.length - 1]
|
|
});
|
|
// Don't confirm - leave message for stale recovery
|
|
}
|
|
|
|
} else if (message.type === 'summarize') {
|
|
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
|
|
if (!session.memorySessionId) {
|
|
throw new Error('Cannot process summary: memorySessionId not yet captured. This session may need to be reinitialized.');
|
|
}
|
|
|
|
// Build summary prompt
|
|
const summaryPrompt = buildSummaryPrompt({
|
|
id: session.sessionDbId,
|
|
memory_session_id: session.memorySessionId,
|
|
project: session.project,
|
|
user_prompt: session.userPrompt,
|
|
last_assistant_message: message.last_assistant_message || ''
|
|
}, mode);
|
|
|
|
// Add to conversation history and query Gemini with full context
|
|
session.conversationHistory.push({ role: 'user', content: summaryPrompt });
|
|
const summaryResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
|
|
|
|
let tokensUsed = 0;
|
|
if (summaryResponse.content) {
|
|
// Add response to conversation history
|
|
session.conversationHistory.push({ role: 'assistant', content: summaryResponse.content });
|
|
|
|
tokensUsed = summaryResponse.tokensUsed || 0;
|
|
session.cumulativeInputTokens += Math.floor(tokensUsed * 0.7);
|
|
session.cumulativeOutputTokens += Math.floor(tokensUsed * 0.3);
|
|
}
|
|
|
|
// Process response using shared ResponseProcessor
|
|
if (summaryResponse.content) {
|
|
await processAgentResponse(
|
|
summaryResponse.content,
|
|
session,
|
|
this.dbManager,
|
|
this.sessionManager,
|
|
worker,
|
|
tokensUsed,
|
|
originalTimestamp,
|
|
'Gemini',
|
|
lastCwd,
|
|
model
|
|
);
|
|
} else {
|
|
logger.warn('SDK', 'Empty Gemini summary response, skipping processing to preserve message', {
|
|
sessionId: session.sessionDbId,
|
|
messageId: session.processingMessageIds[session.processingMessageIds.length - 1]
|
|
});
|
|
// Don't confirm - leave message for stale recovery
|
|
}
|
|
}
|
|
}
|
|
|
|
// Mark session complete
|
|
const sessionDuration = Date.now() - session.startTime;
|
|
logger.success('SDK', 'Gemini agent completed', {
|
|
sessionId: session.sessionDbId,
|
|
duration: `${(sessionDuration / 1000).toFixed(1)}s`,
|
|
historyLength: session.conversationHistory.length
|
|
});
|
|
|
|
} catch (error: unknown) {
|
|
if (isAbortError(error)) {
|
|
logger.warn('SDK', 'Gemini agent aborted', { sessionId: session.sessionDbId });
|
|
throw error;
|
|
}
|
|
|
|
// Check if we should fall back to Claude
|
|
if (shouldFallbackToClaude(error) && this.fallbackAgent) {
|
|
logger.warn('SDK', 'Gemini API failed, falling back to Claude SDK', {
|
|
sessionDbId: session.sessionDbId,
|
|
error: error instanceof Error ? error.message : String(error),
|
|
historyLength: session.conversationHistory.length
|
|
});
|
|
|
|
// Fall back to Claude - it will use the same session with shared conversationHistory
|
|
// Note: With claim-and-delete queue pattern, messages are already deleted on claim
|
|
return this.fallbackAgent.startSession(session, worker);
|
|
}
|
|
|
|
logger.failure('SDK', 'Gemini agent error', { sessionDbId: session.sessionDbId }, error as Error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Truncate conversation history to prevent runaway context costs.
|
|
* Keeps most recent messages within both message count and token budget.
|
|
* Returns a new array — never mutates the original history.
|
|
*/
|
|
private truncateHistory(history: ConversationMessage[]): ConversationMessage[] {
|
|
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
|
|
|
|
const MAX_CONTEXT_MESSAGES = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) || DEFAULT_MAX_CONTEXT_MESSAGES;
|
|
const MAX_ESTIMATED_TOKENS = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) || DEFAULT_MAX_ESTIMATED_TOKENS;
|
|
|
|
if (history.length <= MAX_CONTEXT_MESSAGES) {
|
|
// Check token count even if message count is ok
|
|
const totalTokens = history.reduce((sum, m) => sum + estimateTokens(m.content), 0);
|
|
if (totalTokens <= MAX_ESTIMATED_TOKENS) {
|
|
return history;
|
|
}
|
|
}
|
|
|
|
// Sliding window: keep most recent messages within limits
|
|
const truncated: ConversationMessage[] = [];
|
|
let tokenCount = 0;
|
|
|
|
// Process messages in reverse (most recent first)
|
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
const msg = history[i];
|
|
const msgTokens = estimateTokens(msg.content);
|
|
|
|
// Always include at least the newest message — an empty contents array
|
|
// would cause a hard Gemini API error, which is worse than an oversized request.
|
|
if (truncated.length > 0 && (truncated.length >= MAX_CONTEXT_MESSAGES || tokenCount + msgTokens > MAX_ESTIMATED_TOKENS)) {
|
|
logger.warn('SDK', 'Context window truncated to prevent runaway costs', {
|
|
originalMessages: history.length,
|
|
keptMessages: truncated.length,
|
|
droppedMessages: i + 1,
|
|
estimatedTokens: tokenCount,
|
|
tokenLimit: MAX_ESTIMATED_TOKENS
|
|
});
|
|
break;
|
|
}
|
|
|
|
truncated.unshift(msg); // Add to beginning
|
|
tokenCount += msgTokens;
|
|
}
|
|
|
|
return truncated;
|
|
}
|
|
|
|
/**
|
|
* Convert shared ConversationMessage array to Gemini's contents format
|
|
* Maps 'assistant' role to 'model' for Gemini API compatibility
|
|
*/
|
|
private conversationToGeminiContents(history: ConversationMessage[]): GeminiContent[] {
|
|
return history.map(msg => ({
|
|
role: msg.role === 'assistant' ? 'model' : 'user',
|
|
parts: [{ text: msg.content }]
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Query Gemini via REST API with truncated conversation history (multi-turn)
|
|
* Truncates history to prevent O(N²) token cost growth, then sends for coherent responses
|
|
*/
|
|
private async queryGeminiMultiTurn(
|
|
history: ConversationMessage[],
|
|
apiKey: string,
|
|
model: GeminiModel,
|
|
rateLimitingEnabled: boolean
|
|
): Promise<{ content: string; tokensUsed?: number }> {
|
|
const truncatedHistory = this.truncateHistory(history);
|
|
const contents = this.conversationToGeminiContents(truncatedHistory);
|
|
const totalChars = truncatedHistory.reduce((sum, m) => sum + m.content.length, 0);
|
|
|
|
logger.debug('SDK', `Querying Gemini multi-turn (${model})`, {
|
|
turns: truncatedHistory.length,
|
|
totalTurns: history.length,
|
|
totalChars
|
|
});
|
|
|
|
const url = `${GEMINI_API_URL}/${model}:generateContent?key=${apiKey}`;
|
|
|
|
// Enforce RPM rate limit for free tier (skipped if rate limiting disabled)
|
|
await enforceRateLimitForModel(model, rateLimitingEnabled);
|
|
|
|
const response = await fetch(url, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
contents,
|
|
generationConfig: {
|
|
temperature: 0.3, // Lower temperature for structured extraction
|
|
maxOutputTokens: 4096,
|
|
},
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`Gemini API error: ${response.status} - ${error}`);
|
|
}
|
|
|
|
const data = await response.json() as GeminiResponse;
|
|
|
|
if (!data.candidates?.[0]?.content?.parts?.[0]?.text) {
|
|
logger.error('SDK', 'Empty response from Gemini');
|
|
return { content: '' };
|
|
}
|
|
|
|
const content = data.candidates[0].content.parts[0].text;
|
|
const tokensUsed = data.usageMetadata?.totalTokenCount;
|
|
|
|
return { content, tokensUsed };
|
|
}
|
|
|
|
/**
|
|
* Get Gemini configuration from settings or environment
|
|
* Issue #733: Uses centralized ~/.claude-mem/.env for credentials, not random project .env files
|
|
*/
|
|
private getGeminiConfig(): { apiKey: string; model: GeminiModel; rateLimitingEnabled: boolean } {
|
|
const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json');
|
|
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
|
|
|
// API key: check settings first, then centralized claude-mem .env (NOT process.env)
|
|
// This prevents Issue #733 where random project .env files could interfere
|
|
const apiKey = settings.CLAUDE_MEM_GEMINI_API_KEY || getCredential('GEMINI_API_KEY') || '';
|
|
|
|
// Model: from settings or default, with validation
|
|
const defaultModel: GeminiModel = 'gemini-2.5-flash';
|
|
const configuredModel = settings.CLAUDE_MEM_GEMINI_MODEL || defaultModel;
|
|
const validModels: GeminiModel[] = [
|
|
'gemini-2.5-flash-lite',
|
|
'gemini-2.5-flash',
|
|
'gemini-2.5-pro',
|
|
'gemini-2.0-flash',
|
|
'gemini-2.0-flash-lite',
|
|
'gemini-3-flash',
|
|
'gemini-3-flash-preview',
|
|
];
|
|
|
|
let model: GeminiModel;
|
|
if (validModels.includes(configuredModel as GeminiModel)) {
|
|
model = configuredModel as GeminiModel;
|
|
} else {
|
|
logger.warn('SDK', `Invalid Gemini model "${configuredModel}", falling back to ${defaultModel}`, {
|
|
configured: configuredModel,
|
|
validModels,
|
|
});
|
|
model = defaultModel;
|
|
}
|
|
|
|
// Rate limiting: enabled by default for free tier users
|
|
const rateLimitingEnabled = settings.CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED !== 'false';
|
|
|
|
return { apiKey, model, rateLimitingEnabled };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if Gemini is available (has API key configured)
|
|
* Issue #733: Uses centralized ~/.claude-mem/.env, not random project .env files
|
|
*/
|
|
export function isGeminiAvailable(): boolean {
|
|
const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json');
|
|
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
|
return !!(settings.CLAUDE_MEM_GEMINI_API_KEY || getCredential('GEMINI_API_KEY'));
|
|
}
|
|
|
|
/**
|
|
* Check if Gemini is the selected provider
|
|
*/
|
|
export function isGeminiSelected(): boolean {
|
|
const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json');
|
|
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
|
return settings.CLAUDE_MEM_PROVIDER === 'gemini';
|
|
}
|