fix: address PR review findings for worker lifecycle
- Add PID validation to restart case (matches start case) - Wrap forceKillProcess() in try/catch for graceful shutdown - Wrap getChildProcesses() in try/catch for Windows failures - Add logging to readPidFile(), removePidFile(), httpShutdown() Fixes critical issues found in PR #458 review. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -42,11 +42,18 @@ function readPidFile(): PidInfo | null {
|
|||||||
try {
|
try {
|
||||||
if (!existsSync(PID_FILE)) return null;
|
if (!existsSync(PID_FILE)) return null;
|
||||||
return JSON.parse(readFileSync(PID_FILE, 'utf-8'));
|
return JSON.parse(readFileSync(PID_FILE, 'utf-8'));
|
||||||
} catch { return null; }
|
} catch (error) {
|
||||||
|
logger.warn('SYSTEM', 'Failed to read PID file', { path: PID_FILE, error: (error as Error).message });
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function removePidFile(): void {
|
function removePidFile(): void {
|
||||||
try { if (existsSync(PID_FILE)) unlinkSync(PID_FILE); } catch {}
|
try {
|
||||||
|
if (existsSync(PID_FILE)) unlinkSync(PID_FILE);
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn('SYSTEM', 'Failed to remove PID file', { path: PID_FILE, error: (error as Error).message });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function isPortInUse(port: number): Promise<boolean> {
|
async function isPortInUse(port: number): Promise<boolean> {
|
||||||
@@ -76,12 +83,23 @@ async function waitForHealth(port: number, timeoutMs: number = 30000): Promise<b
|
|||||||
|
|
||||||
async function httpShutdown(port: number): Promise<boolean> {
|
async function httpShutdown(port: number): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
await fetch(`http://127.0.0.1:${port}/api/admin/shutdown`, {
|
const response = await fetch(`http://127.0.0.1:${port}/api/admin/shutdown`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
signal: AbortSignal.timeout(5000)
|
signal: AbortSignal.timeout(5000)
|
||||||
});
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
logger.warn('SYSTEM', 'Shutdown request returned error', { port, status: response.status });
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
} catch { return false; }
|
} catch (error) {
|
||||||
|
// Connection refused is expected if worker already stopped
|
||||||
|
const isConnectionRefused = (error as Error).message?.includes('ECONNREFUSED');
|
||||||
|
if (!isConnectionRefused) {
|
||||||
|
logger.warn('SYSTEM', 'Shutdown request failed', { port, error: (error as Error).message });
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function waitForPortFree(port: number, timeoutMs: number = 10000): Promise<boolean> {
|
async function waitForPortFree(port: number, timeoutMs: number = 10000): Promise<boolean> {
|
||||||
@@ -735,13 +753,18 @@ export class WorkerService {
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const cmd = `powershell -Command "Get-CimInstance Win32_Process | Where-Object { $_.ParentProcessId -eq ${parentPid} } | Select-Object -ExpandProperty ProcessId"`;
|
try {
|
||||||
const { stdout } = await execAsync(cmd, { timeout: 60000 });
|
const cmd = `powershell -Command "Get-CimInstance Win32_Process | Where-Object { $_.ParentProcessId -eq ${parentPid} } | Select-Object -ExpandProperty ProcessId"`;
|
||||||
return stdout
|
const { stdout } = await execAsync(cmd, { timeout: 60000 });
|
||||||
.trim()
|
return stdout
|
||||||
.split('\n')
|
.trim()
|
||||||
.map(s => parseInt(s.trim(), 10))
|
.split('\n')
|
||||||
.filter(n => !isNaN(n) && Number.isInteger(n) && n > 0); // SECURITY: Validate each PID
|
.map(s => parseInt(s.trim(), 10))
|
||||||
|
.filter(n => !isNaN(n) && Number.isInteger(n) && n > 0); // SECURITY: Validate each PID
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn('SYSTEM', 'Failed to enumerate child processes', { parentPid, error: (error as Error).message });
|
||||||
|
return []; // Fail safely - continue shutdown without child process cleanup
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -754,12 +777,17 @@ export class WorkerService {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (process.platform === 'win32') {
|
try {
|
||||||
// /T kills entire process tree, /F forces termination
|
if (process.platform === 'win32') {
|
||||||
await execAsync(`taskkill /PID ${pid} /T /F`, { timeout: 60000 });
|
// /T kills entire process tree, /F forces termination
|
||||||
|
await execAsync(`taskkill /PID ${pid} /T /F`, { timeout: 60000 });
|
||||||
|
} else {
|
||||||
|
process.kill(pid, 'SIGKILL');
|
||||||
|
}
|
||||||
logger.info('SYSTEM', 'Killed process', { pid });
|
logger.info('SYSTEM', 'Killed process', { pid });
|
||||||
} else {
|
} catch {
|
||||||
process.kill(pid, 'SIGKILL');
|
// Process may have already exited - continue shutdown
|
||||||
|
logger.debug('SYSTEM', 'Process already exited during force kill', { pid });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -889,8 +917,14 @@ async function main() {
|
|||||||
windowsHide: true,
|
windowsHide: true,
|
||||||
env: { ...process.env, CLAUDE_MEM_WORKER_PORT: String(port) }
|
env: { ...process.env, CLAUDE_MEM_WORKER_PORT: String(port) }
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (child.pid === undefined) {
|
||||||
|
console.error('Failed to spawn worker daemon during restart');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
child.unref();
|
child.unref();
|
||||||
writePidFile({ pid: child.pid!, port, startedAt: new Date().toISOString() });
|
writePidFile({ pid: child.pid, port, startedAt: new Date().toISOString() });
|
||||||
const healthy = await waitForHealth(port, 30000);
|
const healthy = await waitForHealth(port, 30000);
|
||||||
if (!healthy) {
|
if (!healthy) {
|
||||||
removePidFile();
|
removePidFile();
|
||||||
|
|||||||
Reference in New Issue
Block a user