Reduce timeouts to eliminate 10-30s startup delay when worker is dead (common on WSL2 after hibernate). Add stale PID detection, graceful error handling across all handlers, and error classification that distinguishes worker unavailability from handler bugs. - HEALTH_CHECK 30s→3s, new POST_SPAWN_WAIT (5s), PORT_IN_USE_WAIT (3s) - isProcessAlive() with EPERM handling, cleanStalePidFile() - getPluginVersion() try-catch for shutdown race (#1042) - isWorkerUnavailableError: transport+5xx+429→exit 0, 4xx→exit 2 - No-op handler for unknown event types (#984) - Wrap all handler fetch calls in try-catch for graceful degradation - CLAUDE_MEM_HEALTH_TIMEOUT_MS env var override with validation
This commit is contained in:
+65
-2
@@ -8,6 +8,61 @@ export interface HookCommandOptions {
|
||||
skipExit?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify whether an error indicates the worker is unavailable (graceful degradation)
|
||||
* vs a handler/client bug (blocking error that developers need to see).
|
||||
*
|
||||
* Exit 0 (graceful degradation):
|
||||
* - Transport failures: ECONNREFUSED, ECONNRESET, EPIPE, ETIMEDOUT, fetch failed
|
||||
* - Timeout errors: timed out, timeout
|
||||
* - Server errors: HTTP 5xx status codes
|
||||
*
|
||||
* Exit 2 (blocking error — handler/client bug):
|
||||
* - HTTP 4xx status codes (bad request, not found, validation error)
|
||||
* - Programming errors (TypeError, ReferenceError, SyntaxError)
|
||||
* - All other unexpected errors
|
||||
*/
|
||||
export function isWorkerUnavailableError(error: unknown): boolean {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const lower = message.toLowerCase();
|
||||
|
||||
// Transport failures — worker unreachable
|
||||
const transportPatterns = [
|
||||
'econnrefused',
|
||||
'econnreset',
|
||||
'epipe',
|
||||
'etimedout',
|
||||
'enotfound',
|
||||
'econnaborted',
|
||||
'enetunreach',
|
||||
'ehostunreach',
|
||||
'fetch failed',
|
||||
'unable to connect',
|
||||
'socket hang up',
|
||||
];
|
||||
if (transportPatterns.some(p => lower.includes(p))) return true;
|
||||
|
||||
// Timeout errors — worker didn't respond in time
|
||||
if (lower.includes('timed out') || lower.includes('timeout')) return true;
|
||||
|
||||
// HTTP 5xx server errors — worker has internal problems
|
||||
if (/failed:\s*5\d{2}/.test(message) || /status[:\s]+5\d{2}/.test(message)) return true;
|
||||
|
||||
// HTTP 429 (rate limit) — treat as transient unavailability, not a bug
|
||||
if (/failed:\s*429/.test(message) || /status[:\s]+429/.test(message)) return true;
|
||||
|
||||
// HTTP 4xx client errors — our bug, NOT worker unavailability
|
||||
if (/failed:\s*4\d{2}/.test(message) || /status[:\s]+4\d{2}/.test(message)) return false;
|
||||
|
||||
// Programming errors — code bugs, not worker unavailability
|
||||
if (error instanceof TypeError || error instanceof ReferenceError || error instanceof SyntaxError) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Default: treat unknown errors as blocking (conservative — surface bugs)
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function hookCommand(platform: string, event: string, options: HookCommandOptions = {}): Promise<number> {
|
||||
try {
|
||||
const adapter = getPlatformAdapter(platform);
|
||||
@@ -26,9 +81,17 @@ export async function hookCommand(platform: string, event: string, options: Hook
|
||||
}
|
||||
return exitCode;
|
||||
} catch (error) {
|
||||
if (isWorkerUnavailableError(error)) {
|
||||
// Worker unavailable — degrade gracefully, don't block the user
|
||||
console.error(`[claude-mem] Worker unavailable, skipping hook: ${error instanceof Error ? error.message : error}`);
|
||||
if (!options.skipExit) {
|
||||
process.exit(HOOK_EXIT_CODES.SUCCESS); // = 0 (graceful)
|
||||
}
|
||||
return HOOK_EXIT_CODES.SUCCESS;
|
||||
}
|
||||
|
||||
// Handler/client bug — show as blocking error so developers see it
|
||||
console.error(`Hook error: ${error}`);
|
||||
// Use exit code 2 (blocking error) so users see the error message
|
||||
// Exit code 1 only shows in verbose mode per Claude Code docs
|
||||
if (!options.skipExit) {
|
||||
process.exit(HOOK_EXIT_CODES.BLOCKING_ERROR); // = 2
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user