fix: suppress false ERROR when duplicate daemon loses port bind race (#1447)

When the MCP server and SessionStart hook both spawn a worker daemon
concurrently, one loses the bind race (EADDRINUSE / Bun's port-in-use
error). The loser now checks if the winner is healthy; if so, it logs
INFO and exits cleanly instead of logging a misleading ERROR on every
first session start.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Ousama Ben Younes
2026-04-10 10:01:08 +00:00
parent 3651a34e96
commit 08cf2ba3bd
2 changed files with 56 additions and 1 deletions
+12 -1
View File
@@ -1200,7 +1200,18 @@ async function main() {
});
const worker = new WorkerService();
worker.start().catch((error) => {
worker.start().catch(async (error) => {
// Port race: when the MCP server and SessionStart hook both spawn a daemon
// concurrently, one will lose the bind race with EADDRINUSE or Bun's equivalent
// "port in use" error. If the winner is already healthy, exit cleanly (#1447).
const isPortConflict = error instanceof Error && (
(error as NodeJS.ErrnoException).code === 'EADDRINUSE' ||
/port.*in use|address.*in use/i.test(error.message)
);
if (isPortConflict && await waitForHealth(port, 3000)) {
logger.info('SYSTEM', 'Duplicate daemon exiting — another worker already claimed port', { port });
process.exit(0);
}
logger.failure('SYSTEM', 'Worker failed to start', {}, error as Error);
removePidFile();
// Exit gracefully: Windows Terminal won't keep tab open on exit 0
@@ -0,0 +1,44 @@
import { describe, it, expect } from 'bun:test';
import { readFileSync } from 'fs';
import { join } from 'path';
/**
* Source-inspection tests for Issue #1447: Worker startup race condition
*
* When the MCP server and SessionStart hook both spawn a daemon concurrently,
* one daemon loses the port bind race (EADDRINUSE / Bun's "port in use" error).
* The loser should detect this, verify the winner is healthy, and exit cleanly
* instead of logging an ERROR that clutters the user's session start output.
*
* These are source-inspection tests because the race is non-deterministic and
* requires a real concurrent multi-process scenario to reproduce reliably.
*/
const WORKER_SERVICE_PATH = join(import.meta.dir, '../../src/services/worker-service.ts');
const source = readFileSync(WORKER_SERVICE_PATH, 'utf-8');
describe('Worker daemon port-race guard (#1447)', () => {
it('detects EADDRINUSE error code in the port-conflict check', () => {
expect(source).toContain("code === 'EADDRINUSE'");
});
it('detects Bun port-in-use message via regex in the port-conflict check', () => {
expect(source).toContain('/port.*in use|address.*in use/i.test(error.message)');
});
it('calls waitForHealth before exiting on a port conflict', () => {
// The guard must verify the winner is actually healthy before exiting,
// otherwise a non-worker process on the port would suppress a real error.
expect(source).toContain('isPortConflict && await waitForHealth(port,');
});
it('uses async catch handler to allow awaiting waitForHealth', () => {
// The .catch() must be async so it can await the health check.
expect(source).toContain('worker.start().catch(async (error) =>');
});
it('logs info (not error) when cleanly exiting after port race', () => {
// Must not call logger.failure() / logger.error() on the clean exit path.
expect(source).toContain("logger.info('SYSTEM', 'Duplicate daemon exiting");
});
});