Merge branch 'pr-1368' into integration/validation-batch
# Conflicts: # plugin/scripts/context-generator.cjs # plugin/scripts/mcp-server.cjs # plugin/scripts/worker-service.cjs # plugin/ui/viewer-bundle.js
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+259
-331
File diff suppressed because one or more lines are too long
+12
-12
File diff suppressed because one or more lines are too long
@@ -18,6 +18,8 @@ import { logger } from '../../utils/logger.js';
|
||||
import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js';
|
||||
import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
|
||||
import { getCredential } from '../../shared/EnvManager.js';
|
||||
import { USER_SETTINGS_PATH } from '../../shared/paths.js';
|
||||
import { estimateTokens } from '../../shared/timeline-formatting.js';
|
||||
import type { ActiveSession, ConversationMessage } from '../worker-types.js';
|
||||
import { ModeManager } from '../domain/ModeManager.js';
|
||||
import {
|
||||
@@ -56,6 +58,10 @@ const GEMINI_RPM_LIMITS: Record<GeminiModel, number> = {
|
||||
// Track last request time for rate limiting
|
||||
let lastRequestTime = 0;
|
||||
|
||||
// Context window limits (prevents O(N²) token cost growth)
|
||||
const DEFAULT_MAX_CONTEXT_MESSAGES = 20; // Maximum messages to keep in conversation history
|
||||
const DEFAULT_MAX_ESTIMATED_TOKENS = 100000; // ~100k tokens max context (safety limit)
|
||||
|
||||
/**
|
||||
* Enforce RPM rate limit for Gemini free tier.
|
||||
* Waits the required time between requests based on model's RPM limit + 100ms safety buffer.
|
||||
@@ -346,6 +352,54 @@ export class GeminiAgent {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate conversation history to prevent runaway context costs.
|
||||
* Keeps most recent messages within both message count and token budget.
|
||||
* Returns a new array — never mutates the original history.
|
||||
*/
|
||||
private truncateHistory(history: ConversationMessage[]): ConversationMessage[] {
|
||||
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
|
||||
|
||||
const MAX_CONTEXT_MESSAGES = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) || DEFAULT_MAX_CONTEXT_MESSAGES;
|
||||
const MAX_ESTIMATED_TOKENS = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) || DEFAULT_MAX_ESTIMATED_TOKENS;
|
||||
|
||||
if (history.length <= MAX_CONTEXT_MESSAGES) {
|
||||
// Check token count even if message count is ok
|
||||
const totalTokens = history.reduce((sum, m) => sum + estimateTokens(m.content), 0);
|
||||
if (totalTokens <= MAX_ESTIMATED_TOKENS) {
|
||||
return history;
|
||||
}
|
||||
}
|
||||
|
||||
// Sliding window: keep most recent messages within limits
|
||||
const truncated: ConversationMessage[] = [];
|
||||
let tokenCount = 0;
|
||||
|
||||
// Process messages in reverse (most recent first)
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
const msg = history[i];
|
||||
const msgTokens = estimateTokens(msg.content);
|
||||
|
||||
// Always include at least the newest message — an empty contents array
|
||||
// would cause a hard Gemini API error, which is worse than an oversized request.
|
||||
if (truncated.length > 0 && (truncated.length >= MAX_CONTEXT_MESSAGES || tokenCount + msgTokens > MAX_ESTIMATED_TOKENS)) {
|
||||
logger.warn('SDK', 'Context window truncated to prevent runaway costs', {
|
||||
originalMessages: history.length,
|
||||
keptMessages: truncated.length,
|
||||
droppedMessages: i + 1,
|
||||
estimatedTokens: tokenCount,
|
||||
tokenLimit: MAX_ESTIMATED_TOKENS
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
truncated.unshift(msg); // Add to beginning
|
||||
tokenCount += msgTokens;
|
||||
}
|
||||
|
||||
return truncated;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert shared ConversationMessage array to Gemini's contents format
|
||||
* Maps 'assistant' role to 'model' for Gemini API compatibility
|
||||
@@ -358,8 +412,8 @@ export class GeminiAgent {
|
||||
}
|
||||
|
||||
/**
|
||||
* Query Gemini via REST API with full conversation history (multi-turn)
|
||||
* Sends the entire conversation context for coherent responses
|
||||
* Query Gemini via REST API with truncated conversation history (multi-turn)
|
||||
* Truncates history to prevent O(N²) token cost growth, then sends for coherent responses
|
||||
*/
|
||||
private async queryGeminiMultiTurn(
|
||||
history: ConversationMessage[],
|
||||
@@ -367,11 +421,13 @@ export class GeminiAgent {
|
||||
model: GeminiModel,
|
||||
rateLimitingEnabled: boolean
|
||||
): Promise<{ content: string; tokensUsed?: number }> {
|
||||
const contents = this.conversationToGeminiContents(history);
|
||||
const totalChars = history.reduce((sum, m) => sum + m.content.length, 0);
|
||||
const truncatedHistory = this.truncateHistory(history);
|
||||
const contents = this.conversationToGeminiContents(truncatedHistory);
|
||||
const totalChars = truncatedHistory.reduce((sum, m) => sum + m.content.length, 0);
|
||||
|
||||
logger.debug('SDK', `Querying Gemini multi-turn (${model})`, {
|
||||
turns: history.length,
|
||||
turns: truncatedHistory.length,
|
||||
totalTurns: history.length,
|
||||
totalChars
|
||||
});
|
||||
|
||||
|
||||
@@ -94,6 +94,8 @@ export class SettingsRoutes extends BaseRouteHandler {
|
||||
'CLAUDE_MEM_GEMINI_API_KEY',
|
||||
'CLAUDE_MEM_GEMINI_MODEL',
|
||||
'CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED',
|
||||
'CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES',
|
||||
'CLAUDE_MEM_GEMINI_MAX_TOKENS',
|
||||
// OpenRouter Configuration
|
||||
'CLAUDE_MEM_OPENROUTER_API_KEY',
|
||||
'CLAUDE_MEM_OPENROUTER_MODEL',
|
||||
@@ -248,6 +250,22 @@ export class SettingsRoutes extends BaseRouteHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// Validate CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES
|
||||
if (settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) {
|
||||
const count = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES, 10);
|
||||
if (isNaN(count) || count < 1 || count > 100) {
|
||||
return { valid: false, error: 'CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES must be between 1 and 100' };
|
||||
}
|
||||
}
|
||||
|
||||
// Validate CLAUDE_MEM_GEMINI_MAX_TOKENS
|
||||
if (settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) {
|
||||
const tokens = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS, 10);
|
||||
if (isNaN(tokens) || tokens < 1000 || tokens > 1000000) {
|
||||
return { valid: false, error: 'CLAUDE_MEM_GEMINI_MAX_TOKENS must be between 1000 and 1000000' };
|
||||
}
|
||||
}
|
||||
|
||||
// Validate CLAUDE_MEM_CONTEXT_OBSERVATIONS
|
||||
if (settings.CLAUDE_MEM_CONTEXT_OBSERVATIONS) {
|
||||
const obsCount = parseInt(settings.CLAUDE_MEM_CONTEXT_OBSERVATIONS, 10);
|
||||
|
||||
@@ -23,6 +23,8 @@ export interface SettingsDefaults {
|
||||
CLAUDE_MEM_GEMINI_API_KEY: string;
|
||||
CLAUDE_MEM_GEMINI_MODEL: string; // 'gemini-2.5-flash-lite' | 'gemini-2.5-flash' | 'gemini-3-flash-preview'
|
||||
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: string; // 'true' | 'false' - enable rate limiting for free tier
|
||||
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: string; // Max messages in Gemini context window (prevents O(N²) cost growth)
|
||||
CLAUDE_MEM_GEMINI_MAX_TOKENS: string; // Max estimated tokens for Gemini context (~100k safety limit)
|
||||
CLAUDE_MEM_OPENROUTER_API_KEY: string;
|
||||
CLAUDE_MEM_OPENROUTER_MODEL: string;
|
||||
CLAUDE_MEM_OPENROUTER_SITE_URL: string;
|
||||
@@ -90,6 +92,8 @@ export class SettingsDefaultsManager {
|
||||
CLAUDE_MEM_GEMINI_API_KEY: '', // Empty by default, can be set via UI or env
|
||||
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite', // Default Gemini model (highest free tier RPM)
|
||||
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: 'true', // Rate limiting ON by default for free tier users
|
||||
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: '20', // Max messages in Gemini context window
|
||||
CLAUDE_MEM_GEMINI_MAX_TOKENS: '100000', // Max estimated tokens (~100k safety limit)
|
||||
CLAUDE_MEM_OPENROUTER_API_KEY: '', // Empty by default, can be set via UI or env
|
||||
CLAUDE_MEM_OPENROUTER_MODEL: 'xiaomi/mimo-v2-flash:free', // Default OpenRouter model (free tier)
|
||||
CLAUDE_MEM_OPENROUTER_SITE_URL: '', // Optional: for OpenRouter analytics
|
||||
|
||||
@@ -358,6 +358,90 @@ describe('GeminiAgent', () => {
|
||||
}
|
||||
});
|
||||
|
||||
describe('conversation history truncation', () => {
|
||||
it('should truncate history when message count exceeds limit', async () => {
|
||||
// Build a history with 25 small messages (limit is 20)
|
||||
const history: any[] = [];
|
||||
for (let i = 0; i < 25; i++) {
|
||||
history.push({ role: i % 2 === 0 ? 'user' : 'assistant', content: `message ${i}` });
|
||||
}
|
||||
|
||||
const session = {
|
||||
sessionDbId: 1,
|
||||
contentSessionId: 'test-session',
|
||||
memorySessionId: 'mem-session-123',
|
||||
project: 'test-project',
|
||||
userPrompt: 'test prompt',
|
||||
conversationHistory: history,
|
||||
lastPromptNumber: 2,
|
||||
cumulativeInputTokens: 0,
|
||||
cumulativeOutputTokens: 0,
|
||||
pendingMessages: [],
|
||||
abortController: new AbortController(),
|
||||
generatorPromise: null,
|
||||
earliestPendingTimestamp: null,
|
||||
currentProvider: null,
|
||||
startTime: Date.now(),
|
||||
processingMessageIds: []
|
||||
} as any;
|
||||
|
||||
global.fetch = mock(() => Promise.resolve(new Response(JSON.stringify({
|
||||
candidates: [{ content: { parts: [{ text: 'response' }] } }]
|
||||
}))));
|
||||
|
||||
await agent.startSession(session);
|
||||
|
||||
// The request body should have truncated contents (init adds 1 more, so 26 total → truncated to 20)
|
||||
const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
|
||||
expect(body.contents.length).toBeLessThanOrEqual(20);
|
||||
});
|
||||
|
||||
it('should always keep at least the newest message even if it exceeds token limit', async () => {
|
||||
// Override settings to have a very low token limit
|
||||
loadFromFileSpy.mockImplementation(() => ({
|
||||
...SettingsDefaultsManager.getAllDefaults(),
|
||||
CLAUDE_MEM_GEMINI_API_KEY: 'test-api-key',
|
||||
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite',
|
||||
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: 'false',
|
||||
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: '20',
|
||||
CLAUDE_MEM_GEMINI_MAX_TOKENS: '1000', // Very low: ~250 chars
|
||||
CLAUDE_MEM_DATA_DIR: '/tmp/claude-mem-test',
|
||||
}));
|
||||
|
||||
// Create a single large message that exceeds the token limit
|
||||
const largeContent = 'x'.repeat(8000); // ~2000 tokens, well above 1000 limit
|
||||
|
||||
const session = {
|
||||
sessionDbId: 1,
|
||||
contentSessionId: 'test-session',
|
||||
memorySessionId: 'mem-session-123',
|
||||
project: 'test-project',
|
||||
userPrompt: largeContent,
|
||||
conversationHistory: [],
|
||||
lastPromptNumber: 1,
|
||||
cumulativeInputTokens: 0,
|
||||
cumulativeOutputTokens: 0,
|
||||
pendingMessages: [],
|
||||
abortController: new AbortController(),
|
||||
generatorPromise: null,
|
||||
earliestPendingTimestamp: null,
|
||||
currentProvider: null,
|
||||
startTime: Date.now(),
|
||||
processingMessageIds: []
|
||||
} as any;
|
||||
|
||||
global.fetch = mock(() => Promise.resolve(new Response(JSON.stringify({
|
||||
candidates: [{ content: { parts: [{ text: 'response' }] } }]
|
||||
}))));
|
||||
|
||||
await agent.startSession(session);
|
||||
|
||||
// Should still send at least 1 message (the newest), not empty contents
|
||||
const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
|
||||
expect(body.contents.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gemini-3-flash-preview model support', () => {
|
||||
it('should accept gemini-3-flash-preview as a valid model', async () => {
|
||||
// The GeminiModel type includes gemini-3-flash-preview - compile-time check
|
||||
|
||||
Reference in New Issue
Block a user