Two changes fix the observer process resource leak: 1. Add ensureProcessExit to generator finally blocks in SessionRoutes and worker-service, matching the pattern already working in SDKAgent. 2. Add stale session reaper (every 2m) that removes sessions with no active generator and no pending work after 15m idle. This unblocks the orphan reaper which previously skipped processes for "active" sessions. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -116,7 +116,7 @@ import { LogsRoutes } from './worker/http/routes/LogsRoutes.js';
|
||||
import { MemoryRoutes } from './worker/http/routes/MemoryRoutes.js';
|
||||
|
||||
// Process management for zombie cleanup (Issue #737)
|
||||
import { startOrphanReaper, reapOrphanedProcesses } from './worker/ProcessRegistry.js';
|
||||
import { startOrphanReaper, reapOrphanedProcesses, getProcessBySession, ensureProcessExit } from './worker/ProcessRegistry.js';
|
||||
|
||||
/**
|
||||
* Build JSON status output for hook framework communication.
|
||||
@@ -176,6 +176,9 @@ export class WorkerService {
|
||||
// Orphan reaper cleanup function (Issue #737)
|
||||
private stopOrphanReaper: (() => void) | null = null;
|
||||
|
||||
// Stale session reaper interval (Issue #1168)
|
||||
private staleSessionReaperInterval: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
// AI interaction tracking for health endpoint
|
||||
private lastAiInteraction: {
|
||||
timestamp: number;
|
||||
@@ -465,6 +468,18 @@ export class WorkerService {
|
||||
});
|
||||
logger.info('SYSTEM', 'Started orphan reaper (runs every 5 minutes)');
|
||||
|
||||
// Reap stale sessions to unblock orphan process cleanup (Issue #1168)
|
||||
this.staleSessionReaperInterval = setInterval(async () => {
|
||||
try {
|
||||
const reaped = await this.sessionManager.reapStaleSessions();
|
||||
if (reaped > 0) {
|
||||
logger.info('SYSTEM', `Reaped ${reaped} stale sessions`);
|
||||
}
|
||||
} catch (e) {
|
||||
logger.error('SYSTEM', 'Stale session reaper error', { error: e instanceof Error ? e.message : String(e) });
|
||||
}
|
||||
}, 2 * 60 * 1000);
|
||||
|
||||
// Auto-recover orphaned queues (fire-and-forget with error logging)
|
||||
this.processPendingQueues(50).then(result => {
|
||||
if (result.sessionsStarted > 0) {
|
||||
@@ -593,7 +608,13 @@ export class WorkerService {
|
||||
};
|
||||
throw error;
|
||||
})
|
||||
.finally(() => {
|
||||
.finally(async () => {
|
||||
// CRITICAL: Verify subprocess exit to prevent zombie accumulation (Issue #1168)
|
||||
const trackedProcess = getProcessBySession(session.sessionDbId);
|
||||
if (trackedProcess && !trackedProcess.process.killed && trackedProcess.process.exitCode === null) {
|
||||
await ensureProcessExit(trackedProcess, 5000);
|
||||
}
|
||||
|
||||
session.generatorPromise = null;
|
||||
|
||||
// Record successful AI interaction if no error occurred
|
||||
@@ -823,6 +844,12 @@ export class WorkerService {
|
||||
this.stopOrphanReaper = null;
|
||||
}
|
||||
|
||||
// Stop stale session reaper (Issue #1168)
|
||||
if (this.staleSessionReaperInterval) {
|
||||
clearInterval(this.staleSessionReaperInterval);
|
||||
this.staleSessionReaperInterval = null;
|
||||
}
|
||||
|
||||
await performGracefulShutdown({
|
||||
server: this.server.getHttpServer(),
|
||||
sessionManager: this.sessionManager,
|
||||
|
||||
@@ -341,6 +341,39 @@ export class SessionManager {
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly MAX_SESSION_IDLE_MS = 15 * 60 * 1000; // 15 minutes
|
||||
|
||||
/**
|
||||
* Reap sessions with no active generator and no pending work that have been idle too long.
|
||||
* This unblocks the orphan reaper which skips processes for "active" sessions. (Issue #1168)
|
||||
*/
|
||||
async reapStaleSessions(): Promise<number> {
|
||||
const now = Date.now();
|
||||
const staleSessionIds: number[] = [];
|
||||
|
||||
for (const [sessionDbId, session] of this.sessions) {
|
||||
// Skip sessions with active generators
|
||||
if (session.generatorPromise) continue;
|
||||
|
||||
// Skip sessions with pending work
|
||||
const pendingCount = this.getPendingStore().getPendingCount(sessionDbId);
|
||||
if (pendingCount > 0) continue;
|
||||
|
||||
// No generator + no pending work + old enough = stale
|
||||
const sessionAge = now - session.startTime;
|
||||
if (sessionAge > SessionManager.MAX_SESSION_IDLE_MS) {
|
||||
staleSessionIds.push(sessionDbId);
|
||||
}
|
||||
}
|
||||
|
||||
for (const sessionDbId of staleSessionIds) {
|
||||
logger.warn('SESSION', `Reaping stale session ${sessionDbId} (no activity for >${Math.round(SessionManager.MAX_SESSION_IDLE_MS / 60000)}m)`, { sessionDbId });
|
||||
await this.deleteSession(sessionDbId);
|
||||
}
|
||||
|
||||
return staleSessionIds.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown all active sessions
|
||||
*/
|
||||
|
||||
@@ -21,6 +21,7 @@ import { SessionCompletionHandler } from '../../session/SessionCompletionHandler
|
||||
import { PrivacyCheckValidator } from '../../validation/PrivacyCheckValidator.js';
|
||||
import { SettingsDefaultsManager } from '../../../../shared/SettingsDefaultsManager.js';
|
||||
import { USER_SETTINGS_PATH } from '../../../../shared/paths.js';
|
||||
import { getProcessBySession, ensureProcessExit } from '../../ProcessRegistry.js';
|
||||
|
||||
export class SessionRoutes extends BaseRouteHandler {
|
||||
private completionHandler: SessionCompletionHandler;
|
||||
@@ -184,7 +185,13 @@ export class SessionRoutes extends BaseRouteHandler {
|
||||
}, dbError as Error);
|
||||
}
|
||||
})
|
||||
.finally(() => {
|
||||
.finally(async () => {
|
||||
// CRITICAL: Verify subprocess exit to prevent zombie accumulation (Issue #1168)
|
||||
const tracked = getProcessBySession(session.sessionDbId);
|
||||
if (tracked && !tracked.process.killed && tracked.process.exitCode === null) {
|
||||
await ensureProcessExit(tracked, 5000);
|
||||
}
|
||||
|
||||
const sessionDbId = session.sessionDbId;
|
||||
this.spawnInProgress.delete(sessionDbId);
|
||||
const wasAborted = session.abortController.signal.aborted;
|
||||
|
||||
Reference in New Issue
Block a user