74f6b75db2
Fixes the "Worker did not become ready within 15 seconds" timeout issue. Root cause: isWorkerHealthy() and waitForHealth() were checking /api/readiness which returns 503 until full initialization completes (including MCP connection which can take 5+ minutes). Hooks only have 15 seconds timeout. Solution: Use /api/health (liveness check) which returns 200 as soon as the HTTP server is listening. This is sufficient for hook communication since the worker can accept requests while background initialization continues. Changes: - src/shared/worker-utils.ts: Change /api/readiness to /api/health in isWorkerHealthy() - src/services/infrastructure/HealthMonitor.ts: Change /api/readiness to /api/health in waitForHealth() - tests/infrastructure/health-monitor.test.ts: Update test to expect /api/health Fixes #811, #772, #729 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
140 lines
4.8 KiB
TypeScript
140 lines
4.8 KiB
TypeScript
import path from "path";
|
|
import { homedir } from "os";
|
|
import { readFileSync } from "fs";
|
|
import { logger } from "../utils/logger.js";
|
|
import { HOOK_TIMEOUTS, getTimeout } from "./hook-constants.js";
|
|
import { SettingsDefaultsManager } from "./SettingsDefaultsManager.js";
|
|
|
|
const MARKETPLACE_ROOT = path.join(homedir(), '.claude', 'plugins', 'marketplaces', 'thedotmack');
|
|
|
|
// Named constants for health checks
|
|
const HEALTH_CHECK_TIMEOUT_MS = getTimeout(HOOK_TIMEOUTS.HEALTH_CHECK);
|
|
|
|
// Cache to avoid repeated settings file reads
|
|
let cachedPort: number | null = null;
|
|
let cachedHost: string | null = null;
|
|
|
|
/**
|
|
* Get the worker port number from settings
|
|
* Uses CLAUDE_MEM_WORKER_PORT from settings file or default (37777)
|
|
* Caches the port value to avoid repeated file reads
|
|
*/
|
|
export function getWorkerPort(): number {
|
|
if (cachedPort !== null) {
|
|
return cachedPort;
|
|
}
|
|
|
|
const settingsPath = path.join(SettingsDefaultsManager.get('CLAUDE_MEM_DATA_DIR'), 'settings.json');
|
|
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
|
cachedPort = parseInt(settings.CLAUDE_MEM_WORKER_PORT, 10);
|
|
return cachedPort;
|
|
}
|
|
|
|
/**
|
|
* Get the worker host address
|
|
* Uses CLAUDE_MEM_WORKER_HOST from settings file or default (127.0.0.1)
|
|
* Caches the host value to avoid repeated file reads
|
|
*/
|
|
export function getWorkerHost(): string {
|
|
if (cachedHost !== null) {
|
|
return cachedHost;
|
|
}
|
|
|
|
const settingsPath = path.join(SettingsDefaultsManager.get('CLAUDE_MEM_DATA_DIR'), 'settings.json');
|
|
const settings = SettingsDefaultsManager.loadFromFile(settingsPath);
|
|
cachedHost = settings.CLAUDE_MEM_WORKER_HOST;
|
|
return cachedHost;
|
|
}
|
|
|
|
/**
|
|
* Clear the cached port and host values
|
|
* Call this when settings are updated to force re-reading from file
|
|
*/
|
|
export function clearPortCache(): void {
|
|
cachedPort = null;
|
|
cachedHost = null;
|
|
}
|
|
|
|
/**
|
|
* Check if worker HTTP server is responsive
|
|
* Uses /api/health (liveness) instead of /api/readiness because:
|
|
* - Hooks have 15-second timeout, but full initialization can take 5+ minutes (MCP connection)
|
|
* - /api/health returns 200 as soon as HTTP server is up (sufficient for hook communication)
|
|
* - /api/readiness returns 503 until full initialization completes (too slow for hooks)
|
|
* See: https://github.com/thedotmack/claude-mem/issues/811
|
|
*/
|
|
async function isWorkerHealthy(): Promise<boolean> {
|
|
const port = getWorkerPort();
|
|
// Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion)
|
|
const response = await fetch(`http://127.0.0.1:${port}/api/health`);
|
|
return response.ok;
|
|
}
|
|
|
|
/**
|
|
* Get the current plugin version from package.json
|
|
*/
|
|
function getPluginVersion(): string {
|
|
const packageJsonPath = path.join(MARKETPLACE_ROOT, 'package.json');
|
|
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
return packageJson.version;
|
|
}
|
|
|
|
/**
|
|
* Get the running worker's version from the API
|
|
*/
|
|
async function getWorkerVersion(): Promise<string> {
|
|
const port = getWorkerPort();
|
|
// Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion)
|
|
const response = await fetch(`http://127.0.0.1:${port}/api/version`);
|
|
if (!response.ok) {
|
|
throw new Error(`Failed to get worker version: ${response.status}`);
|
|
}
|
|
const data = await response.json() as { version: string };
|
|
return data.version;
|
|
}
|
|
|
|
/**
|
|
* Check if worker version matches plugin version
|
|
* Note: Auto-restart on version mismatch is now handled in worker-service.ts start command (issue #484)
|
|
* This function logs for informational purposes only
|
|
*/
|
|
async function checkWorkerVersion(): Promise<void> {
|
|
const pluginVersion = getPluginVersion();
|
|
const workerVersion = await getWorkerVersion();
|
|
|
|
if (pluginVersion !== workerVersion) {
|
|
// Just log debug info - auto-restart handles the mismatch in worker-service.ts
|
|
logger.debug('SYSTEM', 'Version check', {
|
|
pluginVersion,
|
|
workerVersion,
|
|
note: 'Mismatch will be auto-restarted by worker-service start command'
|
|
});
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Ensure worker service is running
|
|
* Quick health check - returns false if worker not healthy (doesn't block)
|
|
* Port might be in use by another process, or worker might not be started yet
|
|
*/
|
|
export async function ensureWorkerRunning(): Promise<boolean> {
|
|
// Quick health check (single attempt, no polling)
|
|
try {
|
|
if (await isWorkerHealthy()) {
|
|
await checkWorkerVersion(); // logs warning on mismatch, doesn't restart
|
|
return true; // Worker healthy
|
|
}
|
|
} catch (e) {
|
|
// Not healthy - log for debugging
|
|
logger.debug('SYSTEM', 'Worker health check failed', {
|
|
error: e instanceof Error ? e.message : String(e)
|
|
});
|
|
}
|
|
|
|
// Port might be in use by something else, or worker not started
|
|
// Return false but don't throw - let caller decide how to handle
|
|
logger.warn('SYSTEM', 'Worker not healthy, hook will proceed gracefully');
|
|
return false;
|
|
}
|