MAESTRO: Merge PR #693 - prevent infinite restart loop that causes runaway API costs

Add restart limit (max 3 consecutive restarts) with exponential backoff
to prevent infinite generator restart loops. Also add defensive
memorySessionId checks in GeminiAgent and OpenRouterAgent before
expensive LLM calls to fail fast when session ID hasn't been captured.

Based on PR #693 by @ajbmachon (applied to current main).

Co-Authored-By: Andre Machon <ajbmachon2@gmail.com>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alex Newman
2026-02-06 01:43:12 -05:00
parent 1ae6c5708c
commit 6382d6f9c7
6 changed files with 112 additions and 62 deletions
@@ -175,16 +175,37 @@ export class SessionRoutes extends BaseRouteHandler {
session.currentProvider = null;
this.workerService.broadcastProcessingStatus();
// Crash recovery: If not aborted and still has work, restart
// Crash recovery: If not aborted and still has work, restart (with limit)
if (!wasAborted) {
try {
const pendingStore = this.sessionManager.getPendingMessageStore();
const pendingCount = pendingStore.getPendingCount(sessionDbId);
// CRITICAL: Limit consecutive restarts to prevent infinite loops
// This prevents runaway API costs when there's a persistent error (e.g., memorySessionId not captured)
const MAX_CONSECUTIVE_RESTARTS = 3;
if (pendingCount > 0) {
session.consecutiveRestarts = (session.consecutiveRestarts || 0) + 1;
if (session.consecutiveRestarts > MAX_CONSECUTIVE_RESTARTS) {
logger.error('SESSION', `CRITICAL: Generator restart limit exceeded - stopping to prevent runaway costs`, {
sessionId: sessionDbId,
pendingCount,
consecutiveRestarts: session.consecutiveRestarts,
maxRestarts: MAX_CONSECUTIVE_RESTARTS,
action: 'Generator will NOT restart. Check logs for root cause. Messages remain in pending state.'
});
// Don't restart - abort to prevent further API calls
session.abortController.abort();
return;
}
logger.info('SESSION', `Restarting generator after crash/exit with pending work`, {
sessionId: sessionDbId,
pendingCount
pendingCount,
consecutiveRestarts: session.consecutiveRestarts,
maxRestarts: MAX_CONSECUTIVE_RESTARTS
});
// Abort OLD controller before replacing to prevent child process leaks
@@ -192,16 +213,21 @@ export class SessionRoutes extends BaseRouteHandler {
session.abortController = new AbortController();
oldController.abort();
// Small delay before restart
// Exponential backoff: 1s, 2s, 4s for subsequent restarts
const backoffMs = Math.min(1000 * Math.pow(2, session.consecutiveRestarts - 1), 8000);
// Delay before restart with exponential backoff
setTimeout(() => {
const stillExists = this.sessionManager.getSession(sessionDbId);
if (stillExists && !stillExists.generatorPromise) {
this.startGeneratorWithProvider(stillExists, this.getSelectedProvider(), 'crash-recovery');
}
}, 1000);
}, backoffMs);
} else {
// No pending work - abort to kill the child process
session.abortController.abort();
// Reset restart counter on successful completion
session.consecutiveRestarts = 0;
logger.debug('SESSION', 'Aborted controller after natural completion', {
sessionId: sessionDbId
});