fix: prevent chroma-mcp spawn storm with 5-layer defense (641 processes → max 2)
During SIGHUP testing with 6+ active sessions, ChromaSync.ensureConnection()
had no mutex — concurrent fire-and-forget syncObservation() calls each spawned
a chroma-mcp subprocess via StdioClientTransport, creating 641 orphans in ~5min.
Error-driven reconnection formed a positive feedback loop amplifying the storm.
Defense layers:
- Layer 0: Connection mutex via promise memoization (prevents concurrent spawns)
- Layer 1: Pre-spawn process count guard using execFileSync('ps') (kills excess)
- Layer 2: Hardened close() with try-finally + Unix pkill in GracefulShutdown
- Layer 3: Count-based orphan reaper in ProcessManager (not age-based)
- Layer 4: Circuit breaker stops retries after 3 consecutive failures for 60s
Closes #1063, closes #695
Relates to #1010, #707
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
import { spawn, exec, ChildProcess } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { cleanupExcessChromaProcesses } from '../infrastructure/ProcessManager.js';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
@@ -212,7 +213,7 @@ async function killSystemOrphans(): Promise<number> {
|
||||
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
'ps -eo pid,ppid,args 2>/dev/null | grep -E "claude.*haiku|claude.*output-format" | grep -v grep'
|
||||
'ps -eo pid,ppid,args 2>/dev/null | grep -E "claude.*haiku|claude.*output-format|chroma-mcp" | grep -v grep'
|
||||
);
|
||||
|
||||
let killed = 0;
|
||||
@@ -262,6 +263,9 @@ export async function reapOrphanedProcesses(activeSessionIds: Set<number>): Prom
|
||||
// Daemon children: find idle SDK processes that didn't terminate
|
||||
killed += await killIdleDaemonChildren();
|
||||
|
||||
// Count-based: kill excess chroma-mcp processes regardless of age
|
||||
killed += await cleanupExcessChromaProcesses();
|
||||
|
||||
return killed;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user