fix: use /api/health instead of /api/readiness for hook health checks

Fixes the "Worker did not become ready within 15 seconds" timeout issue.

Root cause: isWorkerHealthy() and waitForHealth() were checking /api/readiness
which returns 503 until full initialization completes (including MCP connection
which can take 5+ minutes). Hooks only have 15 seconds timeout.

Solution: Use /api/health (liveness check) which returns 200 as soon as the
HTTP server is listening. This is sufficient for hook communication since
the worker can accept requests while background initialization continues.

Changes:
- src/shared/worker-utils.ts: Change /api/readiness to /api/health in isWorkerHealthy()
- src/services/infrastructure/HealthMonitor.ts: Change /api/readiness to /api/health in waitForHealth()
- tests/infrastructure/health-monitor.test.ts: Update test to expect /api/health

Fixes #811, #772, #729

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
bigphoot
2026-01-26 16:11:02 -08:00
committed by Alex Newman
parent b8821f5ea3
commit 74f6b75db2
3 changed files with 19 additions and 9 deletions
+7 -3
View File
@@ -29,17 +29,21 @@ export async function isPortInUse(port: number): Promise<boolean> {
}
/**
* Wait for the worker to become fully ready (passes readiness check)
* Wait for the worker HTTP server to become responsive (liveness check)
* Uses /api/health instead of /api/readiness because:
* - /api/health returns 200 as soon as HTTP server is listening
* - /api/readiness waits for full initialization (MCP connection can take 5+ minutes)
* See: https://github.com/thedotmack/claude-mem/issues/811
* @param port Worker port to check
* @param timeoutMs Maximum time to wait in milliseconds
* @returns true if worker became ready, false if timeout
* @returns true if worker became responsive, false if timeout
*/
export async function waitForHealth(port: number, timeoutMs: number = 30000): Promise<boolean> {
const start = Date.now();
while (Date.now() - start < timeoutMs) {
try {
// Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion)
const response = await fetch(`http://127.0.0.1:${port}/api/readiness`);
const response = await fetch(`http://127.0.0.1:${port}/api/health`);
if (response.ok) return true;
} catch (error) {
// [ANTI-PATTERN IGNORED]: Retry loop - expected failures during startup, will retry
+7 -3
View File
@@ -56,13 +56,17 @@ export function clearPortCache(): void {
}
/**
* Check if worker is responsive and fully initialized by trying the readiness endpoint
* Changed from /health to /api/readiness to ensure MCP initialization is complete
* Check if worker HTTP server is responsive
* Uses /api/health (liveness) instead of /api/readiness because:
* - Hooks have 15-second timeout, but full initialization can take 5+ minutes (MCP connection)
* - /api/health returns 200 as soon as HTTP server is up (sufficient for hook communication)
* - /api/readiness returns 503 until full initialization completes (too slow for hooks)
* See: https://github.com/thedotmack/claude-mem/issues/811
*/
async function isWorkerHealthy(): Promise<boolean> {
const port = getWorkerPort();
// Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion)
const response = await fetch(`http://127.0.0.1:${port}/api/readiness`);
const response = await fetch(`http://127.0.0.1:${port}/api/health`);
return response.ok;
}
+5 -3
View File
@@ -98,16 +98,18 @@ describe('HealthMonitor', () => {
expect(callCount).toBeGreaterThanOrEqual(3);
});
it('should check readiness endpoint not health endpoint', async () => {
it('should check health endpoint for liveness', async () => {
const fetchMock = mock(() => Promise.resolve({ ok: true } as Response));
global.fetch = fetchMock;
await waitForHealth(37777, 1000);
// waitForHealth uses /api/readiness, not /api/health
// waitForHealth uses /api/health (liveness), not /api/readiness
// This is because hooks have 15-second timeout but full initialization can take 5+ minutes
// See: https://github.com/thedotmack/claude-mem/issues/811
const calls = fetchMock.mock.calls;
expect(calls.length).toBeGreaterThan(0);
expect(calls[0][0]).toBe('http://127.0.0.1:37777/api/readiness');
expect(calls[0][0]).toBe('http://127.0.0.1:37777/api/health');
});
it('should use default timeout when not specified', async () => {