fix: prevent infinite restart loop on FOREIGN KEY constraint errors (#1334)
The pending-work-restart logic had no retry limit, causing infinite loops when sessions encountered FOREIGN KEY constraint failures. This led to 2000+ error log entries per minute and eventual worker crash via SIGTERM. Two fixes: 1. Add 'FOREIGN KEY constraint failed' to unrecoverable error patterns so it short-circuits immediately instead of falling through to restart 2. Add MAX_PENDING_RESTARTS (3) limit to pending-work-restart path as a safety net for any future unhandled persistent errors Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -561,6 +561,7 @@ export class WorkerService {
|
|||||||
'ENOENT',
|
'ENOENT',
|
||||||
'spawn',
|
'spawn',
|
||||||
'Invalid API key',
|
'Invalid API key',
|
||||||
|
'FOREIGN KEY constraint failed',
|
||||||
];
|
];
|
||||||
if (unrecoverablePatterns.some(pattern => errorMessage.includes(pattern))) {
|
if (unrecoverablePatterns.some(pattern => errorMessage.includes(pattern))) {
|
||||||
hadUnrecoverableError = true;
|
hadUnrecoverableError = true;
|
||||||
@@ -659,16 +660,35 @@ export class WorkerService {
|
|||||||
|
|
||||||
// Check if there's pending work that needs processing with a fresh AbortController
|
// Check if there's pending work that needs processing with a fresh AbortController
|
||||||
const pendingCount = pendingStore.getPendingCount(session.sessionDbId);
|
const pendingCount = pendingStore.getPendingCount(session.sessionDbId);
|
||||||
|
const MAX_PENDING_RESTARTS = 3;
|
||||||
|
|
||||||
if (pendingCount > 0) {
|
if (pendingCount > 0) {
|
||||||
|
// Track consecutive pending-work restarts to prevent infinite loops (e.g. FK errors)
|
||||||
|
session.consecutiveRestarts = (session.consecutiveRestarts || 0) + 1;
|
||||||
|
|
||||||
|
if (session.consecutiveRestarts > MAX_PENDING_RESTARTS) {
|
||||||
|
logger.error('SYSTEM', 'Exceeded max pending-work restarts, stopping to prevent infinite loop', {
|
||||||
|
sessionId: session.sessionDbId,
|
||||||
|
pendingCount,
|
||||||
|
consecutiveRestarts: session.consecutiveRestarts
|
||||||
|
});
|
||||||
|
session.consecutiveRestarts = 0;
|
||||||
|
this.broadcastProcessingStatus();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
logger.info('SYSTEM', 'Pending work remains after generator exit, restarting with fresh AbortController', {
|
logger.info('SYSTEM', 'Pending work remains after generator exit, restarting with fresh AbortController', {
|
||||||
sessionId: session.sessionDbId,
|
sessionId: session.sessionDbId,
|
||||||
pendingCount
|
pendingCount,
|
||||||
|
attempt: session.consecutiveRestarts
|
||||||
});
|
});
|
||||||
// Reset AbortController for restart
|
// Reset AbortController for restart
|
||||||
session.abortController = new AbortController();
|
session.abortController = new AbortController();
|
||||||
// Restart processor
|
// Restart processor
|
||||||
this.startSessionProcessor(session, 'pending-work-restart');
|
this.startSessionProcessor(session, 'pending-work-restart');
|
||||||
|
} else {
|
||||||
|
// Successful completion with no pending work — reset counter
|
||||||
|
session.consecutiveRestarts = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.broadcastProcessingStatus();
|
this.broadcastProcessingStatus();
|
||||||
|
|||||||
Reference in New Issue
Block a user