fix: use /api/health instead of /api/readiness for hook health checks

Fixes the "Worker did not become ready within 15 seconds" timeout issue.

Root cause: isWorkerHealthy() and waitForHealth() were checking /api/readiness
which returns 503 until full initialization completes (including MCP connection
which can take 5+ minutes). Hooks only have 15 seconds timeout.

Solution: Use /api/health (liveness check) which returns 200 as soon as the
HTTP server is listening. This is sufficient for hook communication since
the worker can accept requests while background initialization continues.

Changes:
- src/shared/worker-utils.ts: Change /api/readiness to /api/health in isWorkerHealthy()
- src/services/infrastructure/HealthMonitor.ts: Change /api/readiness to /api/health in waitForHealth()
- tests/infrastructure/health-monitor.test.ts: Update test to expect /api/health

Fixes #811, #772, #729

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
bigphoot
2026-01-26 16:11:02 -08:00
committed by Alex Newman
parent b8821f5ea3
commit 74f6b75db2
3 changed files with 19 additions and 9 deletions
+7 -3
View File
@@ -29,17 +29,21 @@ export async function isPortInUse(port: number): Promise<boolean> {
} }
/** /**
* Wait for the worker to become fully ready (passes readiness check) * Wait for the worker HTTP server to become responsive (liveness check)
* Uses /api/health instead of /api/readiness because:
* - /api/health returns 200 as soon as HTTP server is listening
* - /api/readiness waits for full initialization (MCP connection can take 5+ minutes)
* See: https://github.com/thedotmack/claude-mem/issues/811
* @param port Worker port to check * @param port Worker port to check
* @param timeoutMs Maximum time to wait in milliseconds * @param timeoutMs Maximum time to wait in milliseconds
* @returns true if worker became ready, false if timeout * @returns true if worker became responsive, false if timeout
*/ */
export async function waitForHealth(port: number, timeoutMs: number = 30000): Promise<boolean> { export async function waitForHealth(port: number, timeoutMs: number = 30000): Promise<boolean> {
const start = Date.now(); const start = Date.now();
while (Date.now() - start < timeoutMs) { while (Date.now() - start < timeoutMs) {
try { try {
// Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion) // Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion)
const response = await fetch(`http://127.0.0.1:${port}/api/readiness`); const response = await fetch(`http://127.0.0.1:${port}/api/health`);
if (response.ok) return true; if (response.ok) return true;
} catch (error) { } catch (error) {
// [ANTI-PATTERN IGNORED]: Retry loop - expected failures during startup, will retry // [ANTI-PATTERN IGNORED]: Retry loop - expected failures during startup, will retry
+7 -3
View File
@@ -56,13 +56,17 @@ export function clearPortCache(): void {
} }
/** /**
* Check if worker is responsive and fully initialized by trying the readiness endpoint * Check if worker HTTP server is responsive
* Changed from /health to /api/readiness to ensure MCP initialization is complete * Uses /api/health (liveness) instead of /api/readiness because:
* - Hooks have 15-second timeout, but full initialization can take 5+ minutes (MCP connection)
* - /api/health returns 200 as soon as HTTP server is up (sufficient for hook communication)
* - /api/readiness returns 503 until full initialization completes (too slow for hooks)
* See: https://github.com/thedotmack/claude-mem/issues/811
*/ */
async function isWorkerHealthy(): Promise<boolean> { async function isWorkerHealthy(): Promise<boolean> {
const port = getWorkerPort(); const port = getWorkerPort();
// Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion) // Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion)
const response = await fetch(`http://127.0.0.1:${port}/api/readiness`); const response = await fetch(`http://127.0.0.1:${port}/api/health`);
return response.ok; return response.ok;
} }
+5 -3
View File
@@ -98,16 +98,18 @@ describe('HealthMonitor', () => {
expect(callCount).toBeGreaterThanOrEqual(3); expect(callCount).toBeGreaterThanOrEqual(3);
}); });
it('should check readiness endpoint not health endpoint', async () => { it('should check health endpoint for liveness', async () => {
const fetchMock = mock(() => Promise.resolve({ ok: true } as Response)); const fetchMock = mock(() => Promise.resolve({ ok: true } as Response));
global.fetch = fetchMock; global.fetch = fetchMock;
await waitForHealth(37777, 1000); await waitForHealth(37777, 1000);
// waitForHealth uses /api/readiness, not /api/health // waitForHealth uses /api/health (liveness), not /api/readiness
// This is because hooks have 15-second timeout but full initialization can take 5+ minutes
// See: https://github.com/thedotmack/claude-mem/issues/811
const calls = fetchMock.mock.calls; const calls = fetchMock.mock.calls;
expect(calls.length).toBeGreaterThan(0); expect(calls.length).toBeGreaterThan(0);
expect(calls[0][0]).toBe('http://127.0.0.1:37777/api/readiness'); expect(calls[0][0]).toBe('http://127.0.0.1:37777/api/health');
}); });
it('should use default timeout when not specified', async () => { it('should use default timeout when not specified', async () => {