Merge remote-tracking branch 'origin/main' into fix/chroma-mcp-spawn-storm
# Conflicts: # src/services/worker-service.ts # tests/infrastructure/process-manager.test.ts
This commit is contained in:
@@ -14,6 +14,7 @@ import { existsSync, writeFileSync, unlinkSync, statSync } from 'fs';
|
||||
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
||||
import { getWorkerPort, getWorkerHost } from '../shared/worker-utils.js';
|
||||
import { HOOK_TIMEOUTS } from '../shared/hook-constants.js';
|
||||
import { SettingsDefaultsManager } from '../shared/SettingsDefaultsManager.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
@@ -67,6 +68,7 @@ import {
|
||||
getPlatformTimeout,
|
||||
cleanupOrphanedProcesses,
|
||||
cleanupExcessChromaProcesses,
|
||||
cleanStalePidFile,
|
||||
spawnDaemon,
|
||||
createSignalHandler
|
||||
} from './infrastructure/ProcessManager.js';
|
||||
@@ -230,6 +232,22 @@ export class WorkerService {
|
||||
this.isShuttingDown = shutdownRef.value;
|
||||
handler('SIGINT');
|
||||
});
|
||||
|
||||
// SIGHUP: sent by kernel when controlling terminal closes.
|
||||
// Daemon mode: ignore it (survive parent shell exit).
|
||||
// Interactive mode: treat like SIGTERM (graceful shutdown).
|
||||
if (process.platform !== 'win32') {
|
||||
if (process.argv.includes('--daemon')) {
|
||||
process.on('SIGHUP', () => {
|
||||
logger.debug('SYSTEM', 'Ignoring SIGHUP in daemon mode');
|
||||
});
|
||||
} else {
|
||||
process.on('SIGHUP', () => {
|
||||
this.isShuttingDown = shutdownRef.value;
|
||||
handler('SIGHUP');
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -748,6 +766,9 @@ export class WorkerService {
|
||||
* @returns true if worker is healthy (existing or newly started), false on failure
|
||||
*/
|
||||
async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
// Clean stale PID file first (cheap: 1 fs read + 1 signal-0 check)
|
||||
cleanStalePidFile();
|
||||
|
||||
// Check if worker is already running and healthy
|
||||
if (await waitForHealth(port, 1000)) {
|
||||
const versionCheck = await checkVersionMatch(port);
|
||||
@@ -758,7 +779,7 @@ async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
});
|
||||
|
||||
await httpShutdown(port);
|
||||
const freed = await waitForPortFree(port, getPlatformTimeout(15000));
|
||||
const freed = await waitForPortFree(port, getPlatformTimeout(HOOK_TIMEOUTS.PORT_IN_USE_WAIT));
|
||||
if (!freed) {
|
||||
logger.error('SYSTEM', 'Port did not free up after shutdown for version mismatch restart', { port });
|
||||
return false;
|
||||
@@ -774,7 +795,7 @@ async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
const portInUse = await isPortInUse(port);
|
||||
if (portInUse) {
|
||||
logger.info('SYSTEM', 'Port in use, waiting for worker to become healthy');
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(15000));
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(HOOK_TIMEOUTS.PORT_IN_USE_WAIT));
|
||||
if (healthy) {
|
||||
logger.info('SYSTEM', 'Worker is now healthy');
|
||||
return true;
|
||||
@@ -801,7 +822,7 @@ async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
// PID file is written by the worker itself after listen() succeeds
|
||||
// This is race-free and works correctly on Windows where cmd.exe PID is useless
|
||||
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(30000));
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(HOOK_TIMEOUTS.POST_SPAWN_WAIT));
|
||||
if (!healthy) {
|
||||
removePidFile();
|
||||
logger.error('SYSTEM', 'Worker failed to start (health check timeout)');
|
||||
@@ -873,7 +894,7 @@ async function main() {
|
||||
// PID file is written by the worker itself after listen() succeeds
|
||||
// This is race-free and works correctly on Windows where cmd.exe PID is useless
|
||||
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(30000));
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(HOOK_TIMEOUTS.POST_SPAWN_WAIT));
|
||||
if (!healthy) {
|
||||
removePidFile();
|
||||
logger.error('SYSTEM', 'Worker failed to restart');
|
||||
@@ -968,6 +989,18 @@ async function main() {
|
||||
|
||||
case '--daemon':
|
||||
default: {
|
||||
// Prevent daemon from dying silently on unhandled errors.
|
||||
// The HTTP server can continue serving even if a background task throws.
|
||||
process.on('unhandledRejection', (reason) => {
|
||||
logger.error('SYSTEM', 'Unhandled rejection in daemon', {
|
||||
reason: reason instanceof Error ? reason.message : String(reason)
|
||||
});
|
||||
});
|
||||
process.on('uncaughtException', (error) => {
|
||||
logger.error('SYSTEM', 'Uncaught exception in daemon', {}, error as Error);
|
||||
// Don't exit — keep the HTTP server running
|
||||
});
|
||||
|
||||
const worker = new WorkerService();
|
||||
worker.start().catch((error) => {
|
||||
logger.failure('SYSTEM', 'Worker failed to start', {}, error as Error);
|
||||
|
||||
Reference in New Issue
Block a user