fix: prevent chroma-mcp spawn storm with 5-layer defense (641 processes → max 2)
During SIGHUP testing with 6+ active sessions, ChromaSync.ensureConnection()
had no mutex — concurrent fire-and-forget syncObservation() calls each spawned
a chroma-mcp subprocess via StdioClientTransport, creating 641 orphans in ~5min.
Error-driven reconnection formed a positive feedback loop amplifying the storm.
Defense layers:
- Layer 0: Connection mutex via promise memoization (prevents concurrent spawns)
- Layer 1: Pre-spawn process count guard using execFileSync('ps') (kills excess)
- Layer 2: Hardened close() with try-finally + Unix pkill in GracefulShutdown
- Layer 3: Count-based orphan reaper in ProcessManager (not age-based)
- Layer 4: Circuit breaker stops retries after 3 consecutive failures for 60s
Closes #1063, closes #695
Relates to #1010, #707
This commit is contained in:
@@ -8,6 +8,7 @@ import {
|
||||
removePidFile,
|
||||
getPlatformTimeout,
|
||||
parseElapsedTime,
|
||||
cleanupExcessChromaProcesses,
|
||||
type PidInfo
|
||||
} from '../../src/services/infrastructure/index.js';
|
||||
|
||||
@@ -221,4 +222,63 @@ describe('ProcessManager', () => {
|
||||
expect(result).toBe(666);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cleanupExcessChromaProcesses (Issue #1063)', () => {
|
||||
/**
|
||||
* Tests for count-based chroma-mcp process cleanup.
|
||||
* Unlike the age-based cleanupOrphanedProcesses() which has a 30-minute
|
||||
* threshold, this function kills by count — essential for catching spawn
|
||||
* storms where all processes are young.
|
||||
*/
|
||||
|
||||
it('should be exported and callable', () => {
|
||||
expect(typeof cleanupExcessChromaProcesses).toBe('function');
|
||||
});
|
||||
|
||||
it('should return 0 on Windows (Chroma disabled)', async () => {
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'win32',
|
||||
writable: true,
|
||||
configurable: true
|
||||
});
|
||||
|
||||
try {
|
||||
const killed = await cleanupExcessChromaProcesses();
|
||||
expect(killed).toBe(0);
|
||||
} finally {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: originalPlatform,
|
||||
writable: true,
|
||||
configurable: true
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('should accept custom maxAllowed parameter', async () => {
|
||||
// Should not throw with any valid maxAllowed value
|
||||
const killed = await cleanupExcessChromaProcesses(5);
|
||||
expect(killed).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('should return a number (killed count)', async () => {
|
||||
const killed = await cleanupExcessChromaProcesses();
|
||||
expect(typeof killed).toBe('number');
|
||||
expect(killed).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('should exist in ProcessManager source with count-based logic', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/infrastructure/ProcessManager.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
// Verify count-based logic exists (not age-based)
|
||||
expect(sourceFile).toContain('cleanupExcessChromaProcesses');
|
||||
expect(sourceFile).toContain('chroma-mcp');
|
||||
|
||||
// Should sort by age and keep newest
|
||||
expect(sourceFile).toContain('.sort(');
|
||||
expect(sourceFile).toContain('.slice(maxAllowed)');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user