fix: suppress false ERROR when duplicate daemon loses port bind race (#1447)
When the MCP server and SessionStart hook both spawn a worker daemon concurrently, one loses the bind race (EADDRINUSE / Bun's port-in-use error). The loser now checks if the winner is healthy; if so, it logs INFO and exits cleanly instead of logging a misleading ERROR on every first session start. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1200,7 +1200,18 @@ async function main() {
|
||||
});
|
||||
|
||||
const worker = new WorkerService();
|
||||
worker.start().catch((error) => {
|
||||
worker.start().catch(async (error) => {
|
||||
// Port race: when the MCP server and SessionStart hook both spawn a daemon
|
||||
// concurrently, one will lose the bind race with EADDRINUSE or Bun's equivalent
|
||||
// "port in use" error. If the winner is already healthy, exit cleanly (#1447).
|
||||
const isPortConflict = error instanceof Error && (
|
||||
(error as NodeJS.ErrnoException).code === 'EADDRINUSE' ||
|
||||
/port.*in use|address.*in use/i.test(error.message)
|
||||
);
|
||||
if (isPortConflict && await waitForHealth(port, 3000)) {
|
||||
logger.info('SYSTEM', 'Duplicate daemon exiting — another worker already claimed port', { port });
|
||||
process.exit(0);
|
||||
}
|
||||
logger.failure('SYSTEM', 'Worker failed to start', {}, error as Error);
|
||||
removePidFile();
|
||||
// Exit gracefully: Windows Terminal won't keep tab open on exit 0
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { readFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
/**
|
||||
* Source-inspection tests for Issue #1447: Worker startup race condition
|
||||
*
|
||||
* When the MCP server and SessionStart hook both spawn a daemon concurrently,
|
||||
* one daemon loses the port bind race (EADDRINUSE / Bun's "port in use" error).
|
||||
* The loser should detect this, verify the winner is healthy, and exit cleanly
|
||||
* instead of logging an ERROR that clutters the user's session start output.
|
||||
*
|
||||
* These are source-inspection tests because the race is non-deterministic and
|
||||
* requires a real concurrent multi-process scenario to reproduce reliably.
|
||||
*/
|
||||
|
||||
const WORKER_SERVICE_PATH = join(import.meta.dir, '../../src/services/worker-service.ts');
|
||||
const source = readFileSync(WORKER_SERVICE_PATH, 'utf-8');
|
||||
|
||||
describe('Worker daemon port-race guard (#1447)', () => {
|
||||
it('detects EADDRINUSE error code in the port-conflict check', () => {
|
||||
expect(source).toContain("code === 'EADDRINUSE'");
|
||||
});
|
||||
|
||||
it('detects Bun port-in-use message via regex in the port-conflict check', () => {
|
||||
expect(source).toContain('/port.*in use|address.*in use/i.test(error.message)');
|
||||
});
|
||||
|
||||
it('calls waitForHealth before exiting on a port conflict', () => {
|
||||
// The guard must verify the winner is actually healthy before exiting,
|
||||
// otherwise a non-worker process on the port would suppress a real error.
|
||||
expect(source).toContain('isPortConflict && await waitForHealth(port,');
|
||||
});
|
||||
|
||||
it('uses async catch handler to allow awaiting waitForHealth', () => {
|
||||
// The .catch() must be async so it can await the health check.
|
||||
expect(source).toContain('worker.start().catch(async (error) =>');
|
||||
});
|
||||
|
||||
it('logs info (not error) when cleanly exiting after port race', () => {
|
||||
// Must not call logger.failure() / logger.error() on the clean exit path.
|
||||
expect(source).toContain("logger.info('SYSTEM', 'Duplicate daemon exiting");
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user