Merge remote-tracking branch 'origin/main' into fix/chroma-mcp-spawn-storm

# Conflicts:
#	src/services/worker-service.ts
#	tests/infrastructure/process-manager.test.ts
This commit is contained in:
Alex Newman
2026-02-11 15:43:08 -05:00
31 changed files with 2145 additions and 321 deletions
+164
View File
@@ -0,0 +1,164 @@
/**
* Tests for hook-command error classifier
*
* Validates that isWorkerUnavailableError correctly distinguishes between:
* - Transport failures (ECONNREFUSED, etc.) → true (graceful degradation)
* - Server errors (5xx) → true (graceful degradation)
* - Client errors (4xx) → false (handler bug, blocking)
* - Programming errors (TypeError, etc.) → false (code bug, blocking)
*/
import { describe, it, expect } from 'bun:test';
import { isWorkerUnavailableError } from '../src/cli/hook-command.js';
describe('isWorkerUnavailableError', () => {
describe('transport failures → true (graceful)', () => {
it('should classify ECONNREFUSED as worker unavailable', () => {
const error = new Error('connect ECONNREFUSED 127.0.0.1:37777');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify ECONNRESET as worker unavailable', () => {
const error = new Error('socket hang up ECONNRESET');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify EPIPE as worker unavailable', () => {
const error = new Error('write EPIPE');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify ETIMEDOUT as worker unavailable', () => {
const error = new Error('connect ETIMEDOUT 127.0.0.1:37777');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify "fetch failed" as worker unavailable', () => {
const error = new TypeError('fetch failed');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify "Unable to connect" as worker unavailable', () => {
const error = new Error('Unable to connect to server');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify ENOTFOUND as worker unavailable', () => {
const error = new Error('getaddrinfo ENOTFOUND localhost');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify "socket hang up" as worker unavailable', () => {
const error = new Error('socket hang up');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify ECONNABORTED as worker unavailable', () => {
const error = new Error('ECONNABORTED');
expect(isWorkerUnavailableError(error)).toBe(true);
});
});
describe('timeout errors → true (graceful)', () => {
it('should classify "timed out" as worker unavailable', () => {
const error = new Error('Request timed out after 3000ms');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify "timeout" as worker unavailable', () => {
const error = new Error('Connection timeout');
expect(isWorkerUnavailableError(error)).toBe(true);
});
});
describe('HTTP 5xx server errors → true (graceful)', () => {
it('should classify 500 status as worker unavailable', () => {
const error = new Error('Context generation failed: 500');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify 502 status as worker unavailable', () => {
const error = new Error('Observation storage failed: 502');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify 503 status as worker unavailable', () => {
const error = new Error('Request failed: 503');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify "status: 500" format as worker unavailable', () => {
const error = new Error('HTTP error status: 500');
expect(isWorkerUnavailableError(error)).toBe(true);
});
});
describe('HTTP 429 rate limit → true (graceful)', () => {
it('should classify 429 as worker unavailable (rate limit is transient)', () => {
const error = new Error('Request failed: 429');
expect(isWorkerUnavailableError(error)).toBe(true);
});
it('should classify "status: 429" format as worker unavailable', () => {
const error = new Error('HTTP error status: 429');
expect(isWorkerUnavailableError(error)).toBe(true);
});
});
describe('HTTP 4xx client errors → false (blocking)', () => {
it('should NOT classify 400 Bad Request as worker unavailable', () => {
const error = new Error('Request failed: 400');
expect(isWorkerUnavailableError(error)).toBe(false);
});
it('should NOT classify 404 Not Found as worker unavailable', () => {
const error = new Error('Observation storage failed: 404');
expect(isWorkerUnavailableError(error)).toBe(false);
});
it('should NOT classify 422 Validation Error as worker unavailable', () => {
const error = new Error('Request failed: 422');
expect(isWorkerUnavailableError(error)).toBe(false);
});
it('should NOT classify "status: 400" format as worker unavailable', () => {
const error = new Error('HTTP error status: 400');
expect(isWorkerUnavailableError(error)).toBe(false);
});
});
describe('programming errors → false (blocking)', () => {
it('should NOT classify TypeError as worker unavailable', () => {
const error = new TypeError('Cannot read properties of undefined');
// Note: TypeError with "fetch failed" IS classified as unavailable (transport layer)
// But generic TypeErrors are NOT
expect(isWorkerUnavailableError(new TypeError('Cannot read properties of undefined'))).toBe(false);
});
it('should NOT classify ReferenceError as worker unavailable', () => {
const error = new ReferenceError('foo is not defined');
expect(isWorkerUnavailableError(error)).toBe(false);
});
it('should NOT classify SyntaxError as worker unavailable', () => {
const error = new SyntaxError('Unexpected token');
expect(isWorkerUnavailableError(error)).toBe(false);
});
});
describe('unknown errors → false (blocking, conservative)', () => {
it('should NOT classify generic Error as worker unavailable', () => {
const error = new Error('Something unexpected happened');
expect(isWorkerUnavailableError(error)).toBe(false);
});
it('should handle string errors', () => {
expect(isWorkerUnavailableError('ECONNREFUSED')).toBe(true);
expect(isWorkerUnavailableError('random error')).toBe(false);
});
it('should handle null/undefined errors', () => {
expect(isWorkerUnavailableError(null)).toBe(false);
expect(isWorkerUnavailableError(undefined)).toBe(false);
});
});
});
+10 -6
View File
@@ -28,18 +28,22 @@ describe('hook-constants', () => {
expect(HOOK_TIMEOUTS.DEFAULT).toBe(300000);
});
it('should define HEALTH_CHECK timeout', () => {
expect(HOOK_TIMEOUTS.HEALTH_CHECK).toBe(30000);
it('should define HEALTH_CHECK timeout as 3s (reduced from 30s)', () => {
expect(HOOK_TIMEOUTS.HEALTH_CHECK).toBe(3000);
});
it('should define POST_SPAWN_WAIT as 5s', () => {
expect(HOOK_TIMEOUTS.POST_SPAWN_WAIT).toBe(5000);
});
it('should define PORT_IN_USE_WAIT as 3s', () => {
expect(HOOK_TIMEOUTS.PORT_IN_USE_WAIT).toBe(3000);
});
it('should define WORKER_STARTUP_WAIT', () => {
expect(HOOK_TIMEOUTS.WORKER_STARTUP_WAIT).toBe(1000);
});
it('should define WORKER_STARTUP_RETRIES', () => {
expect(HOOK_TIMEOUTS.WORKER_STARTUP_RETRIES).toBe(300);
});
it('should define PRE_RESTART_SETTLE_DELAY', () => {
expect(HOOK_TIMEOUTS.PRE_RESTART_SETTLE_DELAY).toBe(2000);
});
@@ -9,6 +9,9 @@ import {
getPlatformTimeout,
parseElapsedTime,
cleanupExcessChromaProcesses,
isProcessAlive,
cleanStalePidFile,
spawnDaemon,
type PidInfo
} from '../../src/services/infrastructure/index.js';
@@ -281,4 +284,138 @@ describe('ProcessManager', () => {
expect(sourceFile).toContain('.slice(maxAllowed)');
});
});
describe('isProcessAlive', () => {
it('should return true for the current process', () => {
expect(isProcessAlive(process.pid)).toBe(true);
});
it('should return false for a non-existent PID', () => {
// Use a very high PID that's extremely unlikely to exist
expect(isProcessAlive(2147483647)).toBe(false);
});
it('should return true for PID 0 (Windows WMIC sentinel)', () => {
expect(isProcessAlive(0)).toBe(true);
});
it('should return false for negative PIDs', () => {
expect(isProcessAlive(-1)).toBe(false);
expect(isProcessAlive(-999)).toBe(false);
});
it('should return false for non-integer PIDs', () => {
expect(isProcessAlive(1.5)).toBe(false);
expect(isProcessAlive(NaN)).toBe(false);
});
});
describe('cleanStalePidFile', () => {
it('should remove PID file when process is dead', () => {
// Write a PID file with a non-existent PID
const staleInfo: PidInfo = {
pid: 2147483647,
port: 37777,
startedAt: '2024-01-01T00:00:00.000Z'
};
writePidFile(staleInfo);
expect(existsSync(PID_FILE)).toBe(true);
cleanStalePidFile();
expect(existsSync(PID_FILE)).toBe(false);
});
it('should keep PID file when process is alive', () => {
// Write a PID file with the current process PID (definitely alive)
const liveInfo: PidInfo = {
pid: process.pid,
port: 37777,
startedAt: new Date().toISOString()
};
writePidFile(liveInfo);
cleanStalePidFile();
// PID file should still exist since process.pid is alive
expect(existsSync(PID_FILE)).toBe(true);
});
it('should do nothing when PID file does not exist', () => {
removePidFile();
expect(existsSync(PID_FILE)).toBe(false);
// Should not throw
expect(() => cleanStalePidFile()).not.toThrow();
});
});
describe('spawnDaemon', () => {
it('should use setsid on Linux when available', () => {
// setsid should exist at /usr/bin/setsid on Linux
if (process.platform === 'win32') return; // Skip on Windows
const setsidAvailable = existsSync('/usr/bin/setsid');
if (!setsidAvailable) return; // Skip if setsid not installed
// Spawn a daemon with a non-existent script (it will fail to start, but we can verify the spawn attempt)
// Use a harmless script path — the child will exit immediately
const pid = spawnDaemon('/dev/null', 39999);
// setsid spawn should return a PID (the setsid process itself)
expect(pid).toBeDefined();
expect(typeof pid).toBe('number');
// Clean up: kill the spawned process if it's still alive
if (pid !== undefined && pid > 0) {
try { process.kill(pid, 'SIGKILL'); } catch { /* already exited */ }
}
});
it('should return undefined when spawn fails on Windows path', () => {
// On non-Windows, this tests the Unix path which should succeed
// The function should not throw, only return undefined on failure
if (process.platform === 'win32') return;
// Spawning with a totally invalid script should still return a PID
// (setsid/spawn succeeds even if the child will exit immediately)
const result = spawnDaemon('/nonexistent/script.cjs', 39998);
// spawn itself should succeed (returns PID), even if child exits
expect(result).toBeDefined();
// Clean up
if (result !== undefined && result > 0) {
try { process.kill(result, 'SIGKILL'); } catch { /* already exited */ }
}
});
});
describe('SIGHUP handling', () => {
it('should have SIGHUP listeners registered (integration check)', () => {
// Verify that SIGHUP listener registration is possible on Unix
if (process.platform === 'win32') return;
// Register a test handler, verify it works, then remove it
let received = false;
const testHandler = () => { received = true; };
process.on('SIGHUP', testHandler);
expect(process.listenerCount('SIGHUP')).toBeGreaterThanOrEqual(1);
// Clean up the test handler
process.removeListener('SIGHUP', testHandler);
});
it('should ignore SIGHUP when --daemon is in process.argv', () => {
if (process.platform === 'win32') return;
// Simulate the daemon SIGHUP handler logic
const isDaemon = process.argv.includes('--daemon');
// In test context, --daemon is not in argv, so this tests the branch logic
expect(isDaemon).toBe(false);
// Verify the non-daemon path: SIGHUP should trigger shutdown (covered by registerSignalHandlers)
// This is a logic verification test — actual signal delivery is tested manually
});
});
});