feat(gemini): update Gemini model types and implement rate limiting for free tier
- Changed Gemini model types to 'gemini-2.5-flash-lite', 'gemini-2.5-flash', and 'gemini-3-flash'. - Introduced RPM limits for free tier models with a maximum of 10 RPM for 'gemini-2.5-flash-lite' and 5 RPM for the others. - Added rate limiting enforcement in the GeminiAgent class, which waits based on the model's RPM limit. - Updated getGeminiConfig to include billingEnabled setting, allowing users to skip rate limiting if billing is enabled. - Modified ContextSettingsModal to reflect new model options and added a toggle for enabling billing. - Updated default settings to use the new model and billing configuration.
This commit is contained in:
@@ -25,8 +25,44 @@ import { ModeManager } from '../domain/ModeManager.js';
|
||||
// Gemini API endpoint
|
||||
const GEMINI_API_URL = 'https://generativelanguage.googleapis.com/v1beta/models';
|
||||
|
||||
// Gemini model types
|
||||
export type GeminiModel = 'gemini-2.0-flash-exp' | 'gemini-1.5-flash' | 'gemini-1.5-pro';
|
||||
// Gemini model types (free tier models)
|
||||
export type GeminiModel = 'gemini-2.5-flash-lite' | 'gemini-2.5-flash' | 'gemini-3-flash';
|
||||
|
||||
// Free tier RPM limits by model (requests per minute)
|
||||
const GEMINI_RPM_LIMITS: Record<GeminiModel, number> = {
|
||||
'gemini-2.5-flash-lite': 10,
|
||||
'gemini-2.5-flash': 5,
|
||||
'gemini-3-flash': 5,
|
||||
};
|
||||
|
||||
// Track last request time for rate limiting
|
||||
let lastRequestTime = 0;
|
||||
|
||||
/**
|
||||
* Enforce RPM rate limit for Gemini free tier (no billing).
|
||||
* Waits the required time between requests based on model's RPM limit + 100ms safety buffer.
|
||||
* Skipped entirely if billing is enabled (1000+ RPM available).
|
||||
*/
|
||||
async function enforceRateLimitForModel(model: GeminiModel, billingEnabled: boolean): Promise<void> {
|
||||
// Skip rate limiting if billing is enabled (1000+ RPM available)
|
||||
if (billingEnabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
const rpm = GEMINI_RPM_LIMITS[model] || 5;
|
||||
const minimumDelayMs = Math.ceil(60000 / rpm) + 100; // (60s / RPM) + 100ms safety buffer
|
||||
|
||||
const now = Date.now();
|
||||
const timeSinceLastRequest = now - lastRequestTime;
|
||||
|
||||
if (timeSinceLastRequest < minimumDelayMs) {
|
||||
const waitTime = minimumDelayMs - timeSinceLastRequest;
|
||||
logger.debug('SDK', `Rate limiting: waiting ${waitTime}ms before Gemini request`, { model, rpm });
|
||||
await new Promise(resolve => setTimeout(resolve, waitTime));
|
||||
}
|
||||
|
||||
lastRequestTime = Date.now();
|
||||
}
|
||||
|
||||
interface GeminiResponse {
|
||||
candidates?: Array<{
|
||||
@@ -99,7 +135,7 @@ export class GeminiAgent {
|
||||
async startSession(session: ActiveSession, worker?: any): Promise<void> {
|
||||
try {
|
||||
// Get Gemini configuration
|
||||
const { apiKey, model } = this.getGeminiConfig();
|
||||
const { apiKey, model, billingEnabled } = this.getGeminiConfig();
|
||||
|
||||
if (!apiKey) {
|
||||
throw new Error('Gemini API key not configured. Set CLAUDE_MEM_GEMINI_API_KEY in settings or GEMINI_API_KEY environment variable.');
|
||||
@@ -115,7 +151,7 @@ export class GeminiAgent {
|
||||
|
||||
// Add to conversation history and query Gemini with full context
|
||||
session.conversationHistory.push({ role: 'user', content: initPrompt });
|
||||
const initResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model);
|
||||
const initResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, billingEnabled);
|
||||
|
||||
if (initResponse.content) {
|
||||
// Add response to conversation history
|
||||
@@ -150,7 +186,7 @@ export class GeminiAgent {
|
||||
|
||||
// Add to conversation history and query Gemini with full context
|
||||
session.conversationHistory.push({ role: 'user', content: obsPrompt });
|
||||
const obsResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model);
|
||||
const obsResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, billingEnabled);
|
||||
|
||||
if (obsResponse.content) {
|
||||
// Add response to conversation history
|
||||
@@ -175,7 +211,7 @@ export class GeminiAgent {
|
||||
|
||||
// Add to conversation history and query Gemini with full context
|
||||
session.conversationHistory.push({ role: 'user', content: summaryPrompt });
|
||||
const summaryResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model);
|
||||
const summaryResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, billingEnabled);
|
||||
|
||||
if (summaryResponse.content) {
|
||||
// Add response to conversation history
|
||||
@@ -252,7 +288,8 @@ export class GeminiAgent {
|
||||
private async queryGeminiMultiTurn(
|
||||
history: ConversationMessage[],
|
||||
apiKey: string,
|
||||
model: GeminiModel
|
||||
model: GeminiModel,
|
||||
billingEnabled: boolean
|
||||
): Promise<{ content: string; tokensUsed?: number }> {
|
||||
const contents = this.conversationToGeminiContents(history);
|
||||
const totalChars = history.reduce((sum, m) => sum + m.content.length, 0);
|
||||
@@ -264,8 +301,8 @@ export class GeminiAgent {
|
||||
|
||||
const url = `${GEMINI_API_URL}/${model}:generateContent?key=${apiKey}`;
|
||||
|
||||
// Rate limit delay - Gemini API requires spacing between requests
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
// Enforce RPM rate limit for free tier (skipped if billing enabled)
|
||||
await enforceRateLimitForModel(model, billingEnabled);
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
@@ -461,17 +498,20 @@ export class GeminiAgent {
|
||||
/**
|
||||
* Get Gemini configuration from settings or environment
|
||||
*/
|
||||
private getGeminiConfig(): { apiKey: string; model: GeminiModel } {
|
||||
private getGeminiConfig(): { apiKey: string; model: GeminiModel; billingEnabled: boolean } {
|
||||
const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json');
|
||||
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
||||
|
||||
// API key: check settings first, then environment variable
|
||||
const apiKey = settings.CLAUDE_MEM_GEMINI_API_KEY || process.env.GEMINI_API_KEY || '';
|
||||
|
||||
// Model: from settings or default
|
||||
const model = (settings.CLAUDE_MEM_GEMINI_MODEL || 'gemini-2.0-flash-exp') as GeminiModel;
|
||||
// Model: from settings or default (gemini-2.5-flash-lite has highest free tier RPM)
|
||||
const model = (settings.CLAUDE_MEM_GEMINI_MODEL || 'gemini-2.5-flash-lite') as GeminiModel;
|
||||
|
||||
return { apiKey, model };
|
||||
// Billing: if enabled, skip rate limiting (1000+ RPM available)
|
||||
const billingEnabled = settings.CLAUDE_MEM_GEMINI_BILLING_ENABLED === 'true';
|
||||
|
||||
return { apiKey, model, billingEnabled };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,8 @@ export interface SettingsDefaults {
|
||||
// AI Provider Configuration
|
||||
CLAUDE_MEM_PROVIDER: string; // 'claude' | 'gemini'
|
||||
CLAUDE_MEM_GEMINI_API_KEY: string;
|
||||
CLAUDE_MEM_GEMINI_MODEL: string; // 'gemini-2.0-flash-exp' | 'gemini-1.5-flash' | 'gemini-1.5-pro'
|
||||
CLAUDE_MEM_GEMINI_MODEL: string; // 'gemini-2.5-flash-lite' | 'gemini-2.5-flash' | 'gemini-3-flash'
|
||||
CLAUDE_MEM_GEMINI_BILLING_ENABLED: string; // 'true' | 'false' - skip rate limiting if billing enabled
|
||||
// System Configuration
|
||||
CLAUDE_MEM_DATA_DIR: string;
|
||||
CLAUDE_MEM_LOG_LEVEL: string;
|
||||
@@ -57,7 +58,8 @@ export class SettingsDefaultsManager {
|
||||
// AI Provider Configuration
|
||||
CLAUDE_MEM_PROVIDER: 'claude', // Default to Claude
|
||||
CLAUDE_MEM_GEMINI_API_KEY: '', // Empty by default, can be set via UI or env
|
||||
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.0-flash-exp', // Default Gemini model
|
||||
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite', // Default Gemini model (highest free tier RPM)
|
||||
CLAUDE_MEM_GEMINI_BILLING_ENABLED: 'false', // Rate limiting enabled by default for no-billing users
|
||||
// System Configuration
|
||||
CLAUDE_MEM_DATA_DIR: join(homedir(), '.claude-mem'),
|
||||
CLAUDE_MEM_LOG_LEVEL: 'INFO',
|
||||
|
||||
@@ -471,14 +471,22 @@ export function ContextSettingsModal({
|
||||
tooltip="Gemini model used for generating observations"
|
||||
>
|
||||
<select
|
||||
value={formState.CLAUDE_MEM_GEMINI_MODEL || 'gemini-2.0-flash-exp'}
|
||||
value={formState.CLAUDE_MEM_GEMINI_MODEL || 'gemini-2.5-flash-lite'}
|
||||
onChange={(e) => updateSetting('CLAUDE_MEM_GEMINI_MODEL', e.target.value)}
|
||||
>
|
||||
<option value="gemini-2.0-flash-exp">gemini-2.0-flash-exp (fastest)</option>
|
||||
<option value="gemini-1.5-flash">gemini-1.5-flash (balanced)</option>
|
||||
<option value="gemini-1.5-pro">gemini-1.5-pro (highest quality)</option>
|
||||
<option value="gemini-2.5-flash-lite">gemini-2.5-flash-lite (10 RPM free)</option>
|
||||
<option value="gemini-2.5-flash">gemini-2.5-flash (5 RPM free)</option>
|
||||
<option value="gemini-3-flash">gemini-3-flash (5 RPM free)</option>
|
||||
</select>
|
||||
</FormField>
|
||||
<div className="toggle-group" style={{ marginTop: '8px' }}>
|
||||
<ToggleSwitch
|
||||
label="Billing Enabled"
|
||||
tooltip="Enable if you have billing set up on Google Cloud. Skips rate limiting (1000+ RPM available)."
|
||||
checked={formState.CLAUDE_MEM_GEMINI_BILLING_ENABLED === 'true'}
|
||||
onChange={(checked) => updateSetting('CLAUDE_MEM_GEMINI_BILLING_ENABLED', checked ? 'true' : 'false')}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
|
||||
@@ -11,7 +11,8 @@ export const DEFAULT_SETTINGS = {
|
||||
// AI Provider Configuration
|
||||
CLAUDE_MEM_PROVIDER: 'claude',
|
||||
CLAUDE_MEM_GEMINI_API_KEY: '',
|
||||
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.0-flash-exp',
|
||||
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite',
|
||||
CLAUDE_MEM_GEMINI_BILLING_ENABLED: 'false',
|
||||
|
||||
// Token Economics (all true for backwards compatibility)
|
||||
CLAUDE_MEM_CONTEXT_SHOW_READ_TOKENS: 'true',
|
||||
|
||||
Reference in New Issue
Block a user