Refactor Gemini rate limiting configuration

- Removed billingEnabled setting and replaced it with rateLimitingEnabled in GeminiAgent.
- Updated enforceRateLimitForModel function to skip rate limiting based on rateLimitingEnabled.
- Adjusted getGeminiConfig to retrieve rateLimitingEnabled from settings.
- Changed settings management to reflect the new rate limiting logic in SettingsDefaultsManager and UI components.
- Updated ContextSettingsModal to toggle rate limiting instead of billing.
- Ensured default settings reflect the new rate limiting behavior for free tier users.
This commit is contained in:
Alex Newman
2025-12-25 20:47:39 -05:00
parent a47e57bb35
commit 07036fe427
15 changed files with 69 additions and 69 deletions
+17 -18
View File
@@ -18,8 +18,7 @@ import { logger } from '../../utils/logger.js';
import { parseObservations, parseSummary } from '../../sdk/parser.js';
import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js';
import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
import { USER_SETTINGS_PATH } from '../../shared/paths.js';
import type { ActiveSession, PendingMessage, ConversationMessage } from '../worker-types.js';
import type { ActiveSession, ConversationMessage } from '../worker-types.js';
import { ModeManager } from '../domain/ModeManager.js';
// Gemini API endpoint
@@ -46,13 +45,13 @@ const GEMINI_RPM_LIMITS: Record<GeminiModel, number> = {
let lastRequestTime = 0;
/**
* Enforce RPM rate limit for Gemini free tier (no billing).
* Enforce RPM rate limit for Gemini free tier.
* Waits the required time between requests based on model's RPM limit + 100ms safety buffer.
* Skipped entirely if billing is enabled (1000+ RPM available).
* Skipped entirely if rate limiting is disabled (billing users with 1000+ RPM available).
*/
async function enforceRateLimitForModel(model: GeminiModel, billingEnabled: boolean): Promise<void> {
// Skip rate limiting if billing is enabled (1000+ RPM available)
if (billingEnabled) {
async function enforceRateLimitForModel(model: GeminiModel, rateLimitingEnabled: boolean): Promise<void> {
// Skip rate limiting if disabled (billing users with 1000+ RPM)
if (!rateLimitingEnabled) {
return;
}
@@ -142,7 +141,7 @@ export class GeminiAgent {
async startSession(session: ActiveSession, worker?: any): Promise<void> {
try {
// Get Gemini configuration
const { apiKey, model, billingEnabled } = this.getGeminiConfig();
const { apiKey, model, rateLimitingEnabled } = this.getGeminiConfig();
if (!apiKey) {
throw new Error('Gemini API key not configured. Set CLAUDE_MEM_GEMINI_API_KEY in settings or GEMINI_API_KEY environment variable.');
@@ -158,7 +157,7 @@ export class GeminiAgent {
// Add to conversation history and query Gemini with full context
session.conversationHistory.push({ role: 'user', content: initPrompt });
const initResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, billingEnabled);
const initResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
if (initResponse.content) {
// Add response to conversation history
@@ -193,7 +192,7 @@ export class GeminiAgent {
// Add to conversation history and query Gemini with full context
session.conversationHistory.push({ role: 'user', content: obsPrompt });
const obsResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, billingEnabled);
const obsResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
if (obsResponse.content) {
// Add response to conversation history
@@ -225,7 +224,7 @@ export class GeminiAgent {
// Add to conversation history and query Gemini with full context
session.conversationHistory.push({ role: 'user', content: summaryPrompt });
const summaryResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, billingEnabled);
const summaryResponse = await this.queryGeminiMultiTurn(session.conversationHistory, apiKey, model, rateLimitingEnabled);
if (summaryResponse.content) {
// Add response to conversation history
@@ -307,7 +306,7 @@ export class GeminiAgent {
history: ConversationMessage[],
apiKey: string,
model: GeminiModel,
billingEnabled: boolean
rateLimitingEnabled: boolean
): Promise<{ content: string; tokensUsed?: number }> {
const contents = this.conversationToGeminiContents(history);
const totalChars = history.reduce((sum, m) => sum + m.content.length, 0);
@@ -319,8 +318,8 @@ export class GeminiAgent {
const url = `${GEMINI_API_URL}/${model}:generateContent?key=${apiKey}`;
// Enforce RPM rate limit for free tier (skipped if billing enabled)
await enforceRateLimitForModel(model, billingEnabled);
// Enforce RPM rate limit for free tier (skipped if rate limiting disabled)
await enforceRateLimitForModel(model, rateLimitingEnabled);
const response = await fetch(url, {
method: 'POST',
@@ -516,7 +515,7 @@ export class GeminiAgent {
/**
* Get Gemini configuration from settings or environment
*/
private getGeminiConfig(): { apiKey: string; model: GeminiModel; billingEnabled: boolean } {
private getGeminiConfig(): { apiKey: string; model: GeminiModel; rateLimitingEnabled: boolean } {
const settingsPath = path.join(homedir(), '.claude-mem', 'settings.json');
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
@@ -545,10 +544,10 @@ export class GeminiAgent {
model = defaultModel;
}
// Billing: if enabled, skip rate limiting (1000+ RPM available)
const billingEnabled = settings.CLAUDE_MEM_GEMINI_BILLING_ENABLED === 'true';
// Rate limiting: enabled by default for free tier users
const rateLimitingEnabled = settings.CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED !== 'false';
return { apiKey, model, billingEnabled };
return { apiKey, model, rateLimitingEnabled };
}
}