From 74f6b75db23aa22584c0d39344956c5f1c2904f8 Mon Sep 17 00:00:00 2001 From: bigphoot Date: Mon, 26 Jan 2026 16:11:02 -0800 Subject: [PATCH] fix: use /api/health instead of /api/readiness for hook health checks Fixes the "Worker did not become ready within 15 seconds" timeout issue. Root cause: isWorkerHealthy() and waitForHealth() were checking /api/readiness which returns 503 until full initialization completes (including MCP connection which can take 5+ minutes). Hooks only have 15 seconds timeout. Solution: Use /api/health (liveness check) which returns 200 as soon as the HTTP server is listening. This is sufficient for hook communication since the worker can accept requests while background initialization continues. Changes: - src/shared/worker-utils.ts: Change /api/readiness to /api/health in isWorkerHealthy() - src/services/infrastructure/HealthMonitor.ts: Change /api/readiness to /api/health in waitForHealth() - tests/infrastructure/health-monitor.test.ts: Update test to expect /api/health Fixes #811, #772, #729 Co-Authored-By: Claude Opus 4.5 --- src/services/infrastructure/HealthMonitor.ts | 10 +++++++--- src/shared/worker-utils.ts | 10 +++++++--- tests/infrastructure/health-monitor.test.ts | 8 +++++--- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/services/infrastructure/HealthMonitor.ts b/src/services/infrastructure/HealthMonitor.ts index 815eeea5..d0e86864 100644 --- a/src/services/infrastructure/HealthMonitor.ts +++ b/src/services/infrastructure/HealthMonitor.ts @@ -29,17 +29,21 @@ export async function isPortInUse(port: number): Promise { } /** - * Wait for the worker to become fully ready (passes readiness check) + * Wait for the worker HTTP server to become responsive (liveness check) + * Uses /api/health instead of /api/readiness because: + * - /api/health returns 200 as soon as HTTP server is listening + * - /api/readiness waits for full initialization (MCP connection can take 5+ minutes) + * See: https://github.com/thedotmack/claude-mem/issues/811 * @param port Worker port to check * @param timeoutMs Maximum time to wait in milliseconds - * @returns true if worker became ready, false if timeout + * @returns true if worker became responsive, false if timeout */ export async function waitForHealth(port: number, timeoutMs: number = 30000): Promise { const start = Date.now(); while (Date.now() - start < timeoutMs) { try { // Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion) - const response = await fetch(`http://127.0.0.1:${port}/api/readiness`); + const response = await fetch(`http://127.0.0.1:${port}/api/health`); if (response.ok) return true; } catch (error) { // [ANTI-PATTERN IGNORED]: Retry loop - expected failures during startup, will retry diff --git a/src/shared/worker-utils.ts b/src/shared/worker-utils.ts index e44af2a2..406fae0f 100644 --- a/src/shared/worker-utils.ts +++ b/src/shared/worker-utils.ts @@ -56,13 +56,17 @@ export function clearPortCache(): void { } /** - * Check if worker is responsive and fully initialized by trying the readiness endpoint - * Changed from /health to /api/readiness to ensure MCP initialization is complete + * Check if worker HTTP server is responsive + * Uses /api/health (liveness) instead of /api/readiness because: + * - Hooks have 15-second timeout, but full initialization can take 5+ minutes (MCP connection) + * - /api/health returns 200 as soon as HTTP server is up (sufficient for hook communication) + * - /api/readiness returns 503 until full initialization completes (too slow for hooks) + * See: https://github.com/thedotmack/claude-mem/issues/811 */ async function isWorkerHealthy(): Promise { const port = getWorkerPort(); // Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion) - const response = await fetch(`http://127.0.0.1:${port}/api/readiness`); + const response = await fetch(`http://127.0.0.1:${port}/api/health`); return response.ok; } diff --git a/tests/infrastructure/health-monitor.test.ts b/tests/infrastructure/health-monitor.test.ts index 64f09aa8..9a1d00e8 100644 --- a/tests/infrastructure/health-monitor.test.ts +++ b/tests/infrastructure/health-monitor.test.ts @@ -98,16 +98,18 @@ describe('HealthMonitor', () => { expect(callCount).toBeGreaterThanOrEqual(3); }); - it('should check readiness endpoint not health endpoint', async () => { + it('should check health endpoint for liveness', async () => { const fetchMock = mock(() => Promise.resolve({ ok: true } as Response)); global.fetch = fetchMock; await waitForHealth(37777, 1000); - // waitForHealth uses /api/readiness, not /api/health + // waitForHealth uses /api/health (liveness), not /api/readiness + // This is because hooks have 15-second timeout but full initialization can take 5+ minutes + // See: https://github.com/thedotmack/claude-mem/issues/811 const calls = fetchMock.mock.calls; expect(calls.length).toBeGreaterThan(0); - expect(calls[0][0]).toBe('http://127.0.0.1:37777/api/readiness'); + expect(calls[0][0]).toBe('http://127.0.0.1:37777/api/health'); }); it('should use default timeout when not specified', async () => {