Merge main into feature/openrouter-provider
Resolved conflicts in built files by accepting main's versions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
+256
-44
@@ -14,11 +14,103 @@ import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
||||
import { getWorkerPort, getWorkerHost } from '../shared/worker-utils.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { exec, execSync } from 'child_process';
|
||||
import { exec, execSync, spawn } from 'child_process';
|
||||
import { homedir } from 'os';
|
||||
import { existsSync, writeFileSync, readFileSync, unlinkSync, mkdirSync } from 'fs';
|
||||
import { promisify } from 'util';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
// PID file management for self-spawn pattern
|
||||
const DATA_DIR = path.join(homedir(), '.claude-mem');
|
||||
const PID_FILE = path.join(DATA_DIR, 'worker.pid');
|
||||
const HOOK_RESPONSE = '{"continue": true, "suppressOutput": true}';
|
||||
|
||||
interface PidInfo {
|
||||
pid: number;
|
||||
port: number;
|
||||
startedAt: string;
|
||||
}
|
||||
|
||||
// PID file utility functions
|
||||
function writePidFile(info: PidInfo): void {
|
||||
mkdirSync(DATA_DIR, { recursive: true });
|
||||
writeFileSync(PID_FILE, JSON.stringify(info, null, 2));
|
||||
}
|
||||
|
||||
function readPidFile(): PidInfo | null {
|
||||
try {
|
||||
if (!existsSync(PID_FILE)) return null;
|
||||
return JSON.parse(readFileSync(PID_FILE, 'utf-8'));
|
||||
} catch (error) {
|
||||
logger.warn('SYSTEM', 'Failed to read PID file', { path: PID_FILE, error: (error as Error).message });
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function removePidFile(): void {
|
||||
try {
|
||||
if (existsSync(PID_FILE)) unlinkSync(PID_FILE);
|
||||
} catch (error) {
|
||||
logger.warn('SYSTEM', 'Failed to remove PID file', { path: PID_FILE, error: (error as Error).message });
|
||||
}
|
||||
}
|
||||
|
||||
async function isPortInUse(port: number): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/health`, {
|
||||
signal: AbortSignal.timeout(2000)
|
||||
});
|
||||
return response.ok;
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
async function waitForHealth(port: number, timeoutMs: number = 30000): Promise<boolean> {
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/readiness`, {
|
||||
signal: AbortSignal.timeout(2000)
|
||||
});
|
||||
if (response.ok) return true;
|
||||
} catch {
|
||||
// Not ready yet
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function httpShutdown(port: number): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/admin/shutdown`, {
|
||||
method: 'POST',
|
||||
signal: AbortSignal.timeout(5000)
|
||||
});
|
||||
if (!response.ok) {
|
||||
logger.warn('SYSTEM', 'Shutdown request returned error', { port, status: response.status });
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} catch (error) {
|
||||
// Connection refused is expected if worker already stopped
|
||||
const isConnectionRefused = (error as Error).message?.includes('ECONNREFUSED');
|
||||
if (!isConnectionRefused) {
|
||||
logger.warn('SYSTEM', 'Shutdown request failed', { port, error: (error as Error).message });
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function waitForPortFree(port: number, timeoutMs: number = 10000): Promise<boolean> {
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
if (!(await isPortInUse(port))) return true;
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Import composed service layer
|
||||
import { DatabaseManager } from './worker/DatabaseManager.js';
|
||||
import { SessionManager } from './worker/SessionManager.js';
|
||||
@@ -270,7 +362,7 @@ export class WorkerService {
|
||||
this.app.get('/api/context/inject', async (req, res, next) => {
|
||||
try {
|
||||
// Wait for initialization to complete (with timeout)
|
||||
const timeoutMs = 30000; // 30 second timeout
|
||||
const timeoutMs = 300000; // 5 minute timeout for slow systems
|
||||
const timeoutPromise = new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('Initialization timeout')), timeoutMs)
|
||||
);
|
||||
@@ -330,7 +422,7 @@ export class WorkerService {
|
||||
if (isWindows) {
|
||||
// Windows: Use PowerShell Get-CimInstance to find chroma-mcp processes
|
||||
const cmd = `powershell -Command "Get-CimInstance Win32_Process | Where-Object { $_.Name -like '*python*' -and $_.CommandLine -like '*chroma-mcp*' } | Select-Object -ExpandProperty ProcessId"`;
|
||||
const { stdout } = await execAsync(cmd, { timeout: 5000 });
|
||||
const { stdout } = await execAsync(cmd, { timeout: 60000 });
|
||||
|
||||
if (!stdout.trim()) {
|
||||
logger.debug('SYSTEM', 'No orphaned chroma-mcp processes found (Windows)');
|
||||
@@ -385,10 +477,20 @@ export class WorkerService {
|
||||
logger.warn('SYSTEM', 'Skipping invalid PID', { pid });
|
||||
continue;
|
||||
}
|
||||
execSync(`taskkill /PID ${pid} /T /F`, { timeout: 5000, stdio: 'ignore' });
|
||||
try {
|
||||
execSync(`taskkill /PID ${pid} /T /F`, { timeout: 60000, stdio: 'ignore' });
|
||||
} catch {
|
||||
// Process may have already exited - continue cleanup
|
||||
}
|
||||
}
|
||||
} else {
|
||||
await execAsync(`kill ${pids.join(' ')}`);
|
||||
for (const pid of pids) {
|
||||
try {
|
||||
process.kill(pid, 'SIGKILL');
|
||||
} catch {
|
||||
// Process already exited - that's fine
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('SYSTEM', 'Orphaned processes cleaned up', { count: pids.length });
|
||||
@@ -467,11 +569,11 @@ export class WorkerService {
|
||||
env: process.env
|
||||
});
|
||||
|
||||
// Add timeout guard to prevent hanging on MCP connection (15 seconds)
|
||||
const MCP_INIT_TIMEOUT_MS = 15000;
|
||||
// Add timeout guard to prevent hanging on MCP connection (5 minutes for slow systems)
|
||||
const MCP_INIT_TIMEOUT_MS = 300000;
|
||||
const mcpConnectionPromise = this.mcpClient.connect(transport);
|
||||
const timeoutPromise = new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('MCP connection timeout after 15s')), MCP_INIT_TIMEOUT_MS)
|
||||
setTimeout(() => reject(new Error('MCP connection timeout after 5 minutes')), MCP_INIT_TIMEOUT_MS)
|
||||
);
|
||||
|
||||
await Promise.race([mcpConnectionPromise, timeoutPromise]);
|
||||
@@ -655,13 +757,18 @@ export class WorkerService {
|
||||
return [];
|
||||
}
|
||||
|
||||
const cmd = `powershell -Command "Get-CimInstance Win32_Process | Where-Object { $_.ParentProcessId -eq ${parentPid} } | Select-Object -ExpandProperty ProcessId"`;
|
||||
const { stdout } = await execAsync(cmd, { timeout: 5000 });
|
||||
return stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map(s => parseInt(s.trim(), 10))
|
||||
.filter(n => !isNaN(n) && Number.isInteger(n) && n > 0); // SECURITY: Validate each PID
|
||||
try {
|
||||
const cmd = `powershell -Command "Get-CimInstance Win32_Process | Where-Object { $_.ParentProcessId -eq ${parentPid} } | Select-Object -ExpandProperty ProcessId"`;
|
||||
const { stdout } = await execAsync(cmd, { timeout: 60000 });
|
||||
return stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map(s => parseInt(s.trim(), 10))
|
||||
.filter(n => !isNaN(n) && Number.isInteger(n) && n > 0); // SECURITY: Validate each PID
|
||||
} catch (error) {
|
||||
logger.warn('SYSTEM', 'Failed to enumerate child processes', { parentPid, error: (error as Error).message });
|
||||
return []; // Fail safely - continue shutdown without child process cleanup
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -674,12 +781,17 @@ export class WorkerService {
|
||||
return;
|
||||
}
|
||||
|
||||
if (process.platform === 'win32') {
|
||||
// /T kills entire process tree, /F forces termination
|
||||
await execAsync(`taskkill /PID ${pid} /T /F`, { timeout: 5000 });
|
||||
try {
|
||||
if (process.platform === 'win32') {
|
||||
// /T kills entire process tree, /F forces termination
|
||||
await execAsync(`taskkill /PID ${pid} /T /F`, { timeout: 60000 });
|
||||
} else {
|
||||
process.kill(pid, 'SIGKILL');
|
||||
}
|
||||
logger.info('SYSTEM', 'Killed process', { pid });
|
||||
} else {
|
||||
process.kill(pid, 'SIGKILL');
|
||||
} catch {
|
||||
// Process may have already exited - continue shutdown
|
||||
logger.debug('SYSTEM', 'Process already exited during force kill', { pid });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -691,8 +803,12 @@ export class WorkerService {
|
||||
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
const stillAlive = pids.filter(pid => {
|
||||
process.kill(pid, 0); // Signal 0 checks if process exists - throws if dead
|
||||
return true;
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
if (stillAlive.length === 0) {
|
||||
@@ -741,31 +857,127 @@ export class WorkerService {
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Main Entry Point
|
||||
// CLI Entry Point
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Start the worker service (if running as main module)
|
||||
* Note: Using require.main check for CJS compatibility (build outputs CJS)
|
||||
*/
|
||||
if (require.main === module || !module.parent) {
|
||||
const worker = new WorkerService();
|
||||
async function main() {
|
||||
const command = process.argv[2];
|
||||
const port = getWorkerPort();
|
||||
|
||||
// Graceful shutdown
|
||||
process.on('SIGTERM', async () => {
|
||||
logger.info('SYSTEM', 'Received SIGTERM, shutting down gracefully');
|
||||
await worker.shutdown();
|
||||
process.exit(0);
|
||||
});
|
||||
switch (command) {
|
||||
case 'start': {
|
||||
// Already running?
|
||||
if (await isPortInUse(port)) {
|
||||
console.log(HOOK_RESPONSE);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
process.on('SIGINT', async () => {
|
||||
logger.info('SYSTEM', 'Received SIGINT, shutting down gracefully');
|
||||
await worker.shutdown();
|
||||
process.exit(0);
|
||||
});
|
||||
// Spawn self as daemon
|
||||
const child = spawn(process.execPath, [__filename, '--daemon'], {
|
||||
detached: true,
|
||||
stdio: 'ignore',
|
||||
windowsHide: true,
|
||||
env: { ...process.env, CLAUDE_MEM_WORKER_PORT: String(port) }
|
||||
});
|
||||
|
||||
worker.start().catch((error) => {
|
||||
logger.failure('SYSTEM', 'Worker failed to start', {}, error as Error);
|
||||
process.exit(1);
|
||||
});
|
||||
if (child.pid === undefined) {
|
||||
console.error('Failed to spawn worker daemon');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
child.unref();
|
||||
|
||||
// Write PID file
|
||||
writePidFile({ pid: child.pid, port, startedAt: new Date().toISOString() });
|
||||
|
||||
// Wait for health
|
||||
const healthy = await waitForHealth(port, 30000);
|
||||
if (!healthy) {
|
||||
removePidFile();
|
||||
console.error('Worker failed to start');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(HOOK_RESPONSE);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
case 'stop': {
|
||||
await httpShutdown(port);
|
||||
await waitForPortFree(port, 10000);
|
||||
removePidFile();
|
||||
console.log(HOOK_RESPONSE);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
case 'restart': {
|
||||
await httpShutdown(port);
|
||||
await waitForPortFree(port, 10000);
|
||||
removePidFile();
|
||||
// Fall through to start a new instance
|
||||
const child = spawn(process.execPath, [__filename, '--daemon'], {
|
||||
detached: true,
|
||||
stdio: 'ignore',
|
||||
windowsHide: true,
|
||||
env: { ...process.env, CLAUDE_MEM_WORKER_PORT: String(port) }
|
||||
});
|
||||
|
||||
if (child.pid === undefined) {
|
||||
console.error('Failed to spawn worker daemon during restart');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
child.unref();
|
||||
writePidFile({ pid: child.pid, port, startedAt: new Date().toISOString() });
|
||||
const healthy = await waitForHealth(port, 30000);
|
||||
if (!healthy) {
|
||||
removePidFile();
|
||||
console.error('Worker failed to restart');
|
||||
process.exit(1);
|
||||
}
|
||||
console.log(HOOK_RESPONSE);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
case 'status': {
|
||||
const running = await isPortInUse(port);
|
||||
const pidInfo = readPidFile();
|
||||
if (running && pidInfo) {
|
||||
console.log(`Worker running (PID: ${pidInfo.pid}, Port: ${pidInfo.port})`);
|
||||
} else {
|
||||
console.log('Worker not running');
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
case '--daemon':
|
||||
default: {
|
||||
// Run server directly
|
||||
const worker = new WorkerService();
|
||||
|
||||
process.on('SIGTERM', async () => {
|
||||
logger.info('SYSTEM', 'Received SIGTERM');
|
||||
await worker.shutdown();
|
||||
removePidFile();
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
process.on('SIGINT', async () => {
|
||||
logger.info('SYSTEM', 'Received SIGINT');
|
||||
await worker.shutdown();
|
||||
removePidFile();
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
worker.start().catch((error) => {
|
||||
logger.failure('SYSTEM', 'Worker failed to start', {}, error as Error);
|
||||
removePidFile();
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (require.main === module || !module.parent) {
|
||||
main();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user