fix: hook resilience and worker lifecycle improvements (#957, #923, #984, #987, #1042)

Reduce timeouts to eliminate 10-30s startup delay when worker is dead
(common on WSL2 after hibernate). Add stale PID detection, graceful
error handling across all handlers, and error classification that
distinguishes worker unavailability from handler bugs.

- HEALTH_CHECK 30s→3s, new POST_SPAWN_WAIT (5s), PORT_IN_USE_WAIT (3s)
- isProcessAlive() with EPERM handling, cleanStalePidFile()
- getPluginVersion() try-catch for shutdown race (#1042)
- isWorkerUnavailableError: transport+5xx+429→exit 0, 4xx→exit 2
- No-op handler for unknown event types (#984)
- Wrap all handler fetch calls in try-catch for graceful degradation
- CLAUDE_MEM_HEALTH_TIMEOUT_MS env var override with validation
This commit is contained in:
Rod Boev
2026-02-10 15:34:35 -05:00
parent 6ac5507e4e
commit 418e38ee46
16 changed files with 791 additions and 348 deletions
+28 -13
View File
@@ -9,6 +9,7 @@ import type { EventHandler, NormalizedHookInput, HookResult } from '../types.js'
import { ensureWorkerRunning, getWorkerPort } from '../../shared/worker-utils.js';
import { getProjectContext } from '../../utils/project-name.js';
import { HOOK_EXIT_CODES } from '../../shared/hook-constants.js';
import { logger } from '../../utils/logger.js';
export const contextHandler: EventHandler = {
async execute(input: NormalizedHookInput): Promise<HookResult> {
@@ -35,20 +36,34 @@ export const contextHandler: EventHandler = {
// Note: Removed AbortSignal.timeout due to Windows Bun cleanup issue (libuv assertion)
// Worker service has its own timeouts, so client-side timeout is redundant
const response = await fetch(url);
try {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Context generation failed: ${response.status}`);
}
const result = await response.text();
const additionalContext = result.trim();
return {
hookSpecificOutput: {
hookEventName: 'SessionStart',
additionalContext
if (!response.ok) {
// Log but don't throw — context fetch failure should not block session start
logger.warn('HOOK', 'Context generation failed, returning empty', { status: response.status });
return {
hookSpecificOutput: { hookEventName: 'SessionStart', additionalContext: '' },
exitCode: HOOK_EXIT_CODES.SUCCESS
};
}
};
const result = await response.text();
const additionalContext = result.trim();
return {
hookSpecificOutput: {
hookEventName: 'SessionStart',
additionalContext
}
};
} catch (error) {
// Worker unreachable — return empty context gracefully
logger.warn('HOOK', 'Context fetch error, returning empty', { error: error instanceof Error ? error.message : String(error) });
return {
hookSpecificOutput: { hookEventName: 'SessionStart', additionalContext: '' },
exitCode: HOOK_EXIT_CODES.SUCCESS
};
}
}
};