MAESTRO: Merge PR #693 - prevent infinite restart loop that causes runaway API costs
Add restart limit (max 3 consecutive restarts) with exponential backoff to prevent infinite generator restart loops. Also add defensive memorySessionId checks in GeminiAgent and OpenRouterAgent before expensive LLM calls to fail fast when session ID hasn't been captured. Based on PR #693 by @ajbmachon (applied to current main). Co-Authored-By: Andre Machon <ajbmachon2@gmail.com> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -33,6 +33,7 @@ export interface ActiveSession {
|
||||
earliestPendingTimestamp: number | null; // Original timestamp of earliest pending message (for accurate observation timestamps)
|
||||
conversationHistory: ConversationMessage[]; // Shared conversation history for provider switching
|
||||
currentProvider: 'claude' | 'gemini' | 'openrouter' | null; // Track which provider is currently running
|
||||
consecutiveRestarts: number; // Track consecutive restart attempts to prevent infinite loops
|
||||
}
|
||||
|
||||
export interface PendingMessage {
|
||||
|
||||
@@ -192,6 +192,12 @@ export class GeminiAgent {
|
||||
session.lastPromptNumber = message.prompt_number;
|
||||
}
|
||||
|
||||
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
|
||||
// This prevents wasting tokens when we won't be able to store the result anyway
|
||||
if (!session.memorySessionId) {
|
||||
throw new Error('Cannot process observations: memorySessionId not yet captured. This session may need to be reinitialized.');
|
||||
}
|
||||
|
||||
// Build observation prompt
|
||||
const obsPrompt = buildObservationPrompt({
|
||||
id: 0,
|
||||
@@ -230,6 +236,11 @@ export class GeminiAgent {
|
||||
);
|
||||
|
||||
} else if (message.type === 'summarize') {
|
||||
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
|
||||
if (!session.memorySessionId) {
|
||||
throw new Error('Cannot process summary: memorySessionId not yet captured. This session may need to be reinitialized.');
|
||||
}
|
||||
|
||||
// Build summary prompt
|
||||
const summaryPrompt = buildSummaryPrompt({
|
||||
id: session.sessionDbId,
|
||||
|
||||
@@ -150,6 +150,12 @@ export class OpenRouterAgent {
|
||||
session.lastPromptNumber = message.prompt_number;
|
||||
}
|
||||
|
||||
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
|
||||
// This prevents wasting tokens when we won't be able to store the result anyway
|
||||
if (!session.memorySessionId) {
|
||||
throw new Error('Cannot process observations: memorySessionId not yet captured. This session may need to be reinitialized.');
|
||||
}
|
||||
|
||||
// Build observation prompt
|
||||
const obsPrompt = buildObservationPrompt({
|
||||
id: 0,
|
||||
@@ -188,6 +194,11 @@ export class OpenRouterAgent {
|
||||
);
|
||||
|
||||
} else if (message.type === 'summarize') {
|
||||
// CRITICAL: Check memorySessionId BEFORE making expensive LLM call
|
||||
if (!session.memorySessionId) {
|
||||
throw new Error('Cannot process summary: memorySessionId not yet captured. This session may need to be reinitialized.');
|
||||
}
|
||||
|
||||
// Build summary prompt
|
||||
const summaryPrompt = buildSummaryPrompt({
|
||||
id: session.sessionDbId,
|
||||
|
||||
@@ -153,7 +153,8 @@ export class SessionManager {
|
||||
cumulativeOutputTokens: 0,
|
||||
earliestPendingTimestamp: null,
|
||||
conversationHistory: [], // Initialize empty - will be populated by agents
|
||||
currentProvider: null // Will be set when generator starts
|
||||
currentProvider: null, // Will be set when generator starts
|
||||
consecutiveRestarts: 0 // Track consecutive restart attempts to prevent infinite loops
|
||||
};
|
||||
|
||||
logger.debug('SESSION', 'Creating new session object (memorySessionId cleared to prevent stale resume)', {
|
||||
|
||||
@@ -175,16 +175,37 @@ export class SessionRoutes extends BaseRouteHandler {
|
||||
session.currentProvider = null;
|
||||
this.workerService.broadcastProcessingStatus();
|
||||
|
||||
// Crash recovery: If not aborted and still has work, restart
|
||||
// Crash recovery: If not aborted and still has work, restart (with limit)
|
||||
if (!wasAborted) {
|
||||
try {
|
||||
const pendingStore = this.sessionManager.getPendingMessageStore();
|
||||
const pendingCount = pendingStore.getPendingCount(sessionDbId);
|
||||
|
||||
// CRITICAL: Limit consecutive restarts to prevent infinite loops
|
||||
// This prevents runaway API costs when there's a persistent error (e.g., memorySessionId not captured)
|
||||
const MAX_CONSECUTIVE_RESTARTS = 3;
|
||||
|
||||
if (pendingCount > 0) {
|
||||
session.consecutiveRestarts = (session.consecutiveRestarts || 0) + 1;
|
||||
|
||||
if (session.consecutiveRestarts > MAX_CONSECUTIVE_RESTARTS) {
|
||||
logger.error('SESSION', `CRITICAL: Generator restart limit exceeded - stopping to prevent runaway costs`, {
|
||||
sessionId: sessionDbId,
|
||||
pendingCount,
|
||||
consecutiveRestarts: session.consecutiveRestarts,
|
||||
maxRestarts: MAX_CONSECUTIVE_RESTARTS,
|
||||
action: 'Generator will NOT restart. Check logs for root cause. Messages remain in pending state.'
|
||||
});
|
||||
// Don't restart - abort to prevent further API calls
|
||||
session.abortController.abort();
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info('SESSION', `Restarting generator after crash/exit with pending work`, {
|
||||
sessionId: sessionDbId,
|
||||
pendingCount
|
||||
pendingCount,
|
||||
consecutiveRestarts: session.consecutiveRestarts,
|
||||
maxRestarts: MAX_CONSECUTIVE_RESTARTS
|
||||
});
|
||||
|
||||
// Abort OLD controller before replacing to prevent child process leaks
|
||||
@@ -192,16 +213,21 @@ export class SessionRoutes extends BaseRouteHandler {
|
||||
session.abortController = new AbortController();
|
||||
oldController.abort();
|
||||
|
||||
// Small delay before restart
|
||||
// Exponential backoff: 1s, 2s, 4s for subsequent restarts
|
||||
const backoffMs = Math.min(1000 * Math.pow(2, session.consecutiveRestarts - 1), 8000);
|
||||
|
||||
// Delay before restart with exponential backoff
|
||||
setTimeout(() => {
|
||||
const stillExists = this.sessionManager.getSession(sessionDbId);
|
||||
if (stillExists && !stillExists.generatorPromise) {
|
||||
this.startGeneratorWithProvider(stillExists, this.getSelectedProvider(), 'crash-recovery');
|
||||
}
|
||||
}, 1000);
|
||||
}, backoffMs);
|
||||
} else {
|
||||
// No pending work - abort to kill the child process
|
||||
session.abortController.abort();
|
||||
// Reset restart counter on successful completion
|
||||
session.consecutiveRestarts = 0;
|
||||
logger.debug('SESSION', 'Aborted controller after natural completion', {
|
||||
sessionId: sessionDbId
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user