diff --git a/src/services/infrastructure/HealthMonitor.ts b/src/services/infrastructure/HealthMonitor.ts index 815eeea5..d0e86864 100644 --- a/src/services/infrastructure/HealthMonitor.ts +++ b/src/services/infrastructure/HealthMonitor.ts @@ -29,17 +29,21 @@ export async function isPortInUse(port: number): Promise { } /** - * Wait for the worker to become fully ready (passes readiness check) + * Wait for the worker HTTP server to become responsive (liveness check) + * Uses /api/health instead of /api/readiness because: + * - /api/health returns 200 as soon as HTTP server is listening + * - /api/readiness waits for full initialization (MCP connection can take 5+ minutes) + * See: https://github.com/thedotmack/claude-mem/issues/811 * @param port Worker port to check * @param timeoutMs Maximum time to wait in milliseconds - * @returns true if worker became ready, false if timeout + * @returns true if worker became responsive, false if timeout */ export async function waitForHealth(port: number, timeoutMs: number = 30000): Promise { const start = Date.now(); while (Date.now() - start < timeoutMs) { try { // Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion) - const response = await fetch(`http://127.0.0.1:${port}/api/readiness`); + const response = await fetch(`http://127.0.0.1:${port}/api/health`); if (response.ok) return true; } catch (error) { // [ANTI-PATTERN IGNORED]: Retry loop - expected failures during startup, will retry diff --git a/src/shared/worker-utils.ts b/src/shared/worker-utils.ts index e44af2a2..406fae0f 100644 --- a/src/shared/worker-utils.ts +++ b/src/shared/worker-utils.ts @@ -56,13 +56,17 @@ export function clearPortCache(): void { } /** - * Check if worker is responsive and fully initialized by trying the readiness endpoint - * Changed from /health to /api/readiness to ensure MCP initialization is complete + * Check if worker HTTP server is responsive + * Uses /api/health (liveness) instead of /api/readiness because: + * - Hooks have 15-second timeout, but full initialization can take 5+ minutes (MCP connection) + * - /api/health returns 200 as soon as HTTP server is up (sufficient for hook communication) + * - /api/readiness returns 503 until full initialization completes (too slow for hooks) + * See: https://github.com/thedotmack/claude-mem/issues/811 */ async function isWorkerHealthy(): Promise { const port = getWorkerPort(); // Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion) - const response = await fetch(`http://127.0.0.1:${port}/api/readiness`); + const response = await fetch(`http://127.0.0.1:${port}/api/health`); return response.ok; } diff --git a/tests/infrastructure/health-monitor.test.ts b/tests/infrastructure/health-monitor.test.ts index 64f09aa8..9a1d00e8 100644 --- a/tests/infrastructure/health-monitor.test.ts +++ b/tests/infrastructure/health-monitor.test.ts @@ -98,16 +98,18 @@ describe('HealthMonitor', () => { expect(callCount).toBeGreaterThanOrEqual(3); }); - it('should check readiness endpoint not health endpoint', async () => { + it('should check health endpoint for liveness', async () => { const fetchMock = mock(() => Promise.resolve({ ok: true } as Response)); global.fetch = fetchMock; await waitForHealth(37777, 1000); - // waitForHealth uses /api/readiness, not /api/health + // waitForHealth uses /api/health (liveness), not /api/readiness + // This is because hooks have 15-second timeout but full initialization can take 5+ minutes + // See: https://github.com/thedotmack/claude-mem/issues/811 const calls = fetchMock.mock.calls; expect(calls.length).toBeGreaterThan(0); - expect(calls[0][0]).toBe('http://127.0.0.1:37777/api/readiness'); + expect(calls[0][0]).toBe('http://127.0.0.1:37777/api/health'); }); it('should use default timeout when not specified', async () => {