fix: address PR review comments for chroma server lifecycle
This commit is contained in:
+102
-101
File diff suppressed because one or more lines are too long
@@ -79,18 +79,18 @@ export async function performGracefulShutdown(config: GracefulShutdownConfig): P
|
||||
logger.info('SYSTEM', 'MCP client closed');
|
||||
}
|
||||
|
||||
// STEP 5: Close database connection (includes ChromaSync cleanup)
|
||||
if (config.dbManager) {
|
||||
await config.dbManager.close();
|
||||
}
|
||||
|
||||
// STEP 6: Stop Chroma server (local mode only)
|
||||
// STEP 5: Stop Chroma server (local mode only)
|
||||
if (config.chromaServer) {
|
||||
logger.info('SHUTDOWN', 'Stopping Chroma server...');
|
||||
await config.chromaServer.stop();
|
||||
logger.info('SHUTDOWN', 'Chroma server stopped');
|
||||
}
|
||||
|
||||
// STEP 6: Close database connection (includes ChromaSync cleanup)
|
||||
if (config.dbManager) {
|
||||
await config.dbManager.close();
|
||||
}
|
||||
|
||||
// STEP 7: Force kill any remaining child processes (Windows zombie port fix)
|
||||
if (childPids.length > 0) {
|
||||
logger.info('SYSTEM', 'Force killing remaining children');
|
||||
|
||||
@@ -8,9 +8,10 @@
|
||||
* Cross-platform: Linux, macOS, Windows
|
||||
*/
|
||||
|
||||
import { spawn, ChildProcess } from 'child_process';
|
||||
import { spawn, ChildProcess, execSync } from 'child_process';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import fs from 'fs';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
|
||||
export interface ChromaServerConfig {
|
||||
@@ -25,6 +26,7 @@ export class ChromaServerManager {
|
||||
private config: ChromaServerConfig;
|
||||
private starting: boolean = false;
|
||||
private ready: boolean = false;
|
||||
private startPromise: Promise<boolean> | null = null;
|
||||
|
||||
private constructor(config: ChromaServerConfig) {
|
||||
this.config = config;
|
||||
@@ -47,22 +49,49 @@ export class ChromaServerManager {
|
||||
|
||||
/**
|
||||
* Start the Chroma HTTP server
|
||||
* Reuses in-flight startup if already starting
|
||||
* Spawns `npx chroma run` as a background process
|
||||
* If a server is already running (from previous worker), reuses it
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
if (this.ready || this.starting) {
|
||||
async start(timeoutMs: number = 60000): Promise<boolean> {
|
||||
if (this.ready) {
|
||||
logger.debug('CHROMA_SERVER', 'Server already started or starting', {
|
||||
ready: this.ready,
|
||||
starting: this.starting
|
||||
});
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (this.startPromise) {
|
||||
logger.debug('CHROMA_SERVER', 'Awaiting existing startup', {
|
||||
host: this.config.host,
|
||||
port: this.config.port
|
||||
});
|
||||
return this.startPromise;
|
||||
}
|
||||
|
||||
this.starting = true;
|
||||
this.startPromise = this.startInternal(timeoutMs);
|
||||
|
||||
try {
|
||||
return await this.startPromise;
|
||||
} finally {
|
||||
this.startPromise = null;
|
||||
if (!this.ready) {
|
||||
this.starting = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal startup path used behind a single shared startPromise lock
|
||||
*/
|
||||
private async startInternal(timeoutMs: number): Promise<boolean> {
|
||||
// Check if a server is already running (from previous worker or manual start)
|
||||
try {
|
||||
const response = await fetch(
|
||||
`http://${this.config.host}:${this.config.port}/api/v2/heartbeat`
|
||||
`http://${this.config.host}:${this.config.port}/api/v2/heartbeat`,
|
||||
{ signal: AbortSignal.timeout(3000) }
|
||||
);
|
||||
if (response.ok) {
|
||||
logger.info('CHROMA_SERVER', 'Existing server detected, reusing', {
|
||||
@@ -70,14 +99,13 @@ export class ChromaServerManager {
|
||||
port: this.config.port
|
||||
});
|
||||
this.ready = true;
|
||||
return;
|
||||
this.starting = false;
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// No server running, proceed to start one
|
||||
}
|
||||
|
||||
this.starting = true;
|
||||
|
||||
// Cross-platform: use npx.cmd on Windows
|
||||
const isWindows = process.platform === 'win32';
|
||||
const command = isWindows ? 'npx.cmd' : 'npx';
|
||||
@@ -95,10 +123,13 @@ export class ChromaServerManager {
|
||||
dataDir: this.config.dataDir
|
||||
});
|
||||
|
||||
const spawnEnv = this.getSpawnEnv();
|
||||
|
||||
this.serverProcess = spawn(command, args, {
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
detached: !isWindows, // Don't detach on Windows (no process groups)
|
||||
windowsHide: true // Hide console window on Windows
|
||||
windowsHide: true, // Hide console window on Windows
|
||||
env: spawnEnv
|
||||
});
|
||||
|
||||
// Log server output for debugging
|
||||
@@ -131,6 +162,8 @@ export class ChromaServerManager {
|
||||
this.starting = false;
|
||||
this.serverProcess = null;
|
||||
});
|
||||
|
||||
return this.waitForReady(timeoutMs);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -138,6 +171,10 @@ export class ChromaServerManager {
|
||||
* Polls the heartbeat endpoint until success or timeout
|
||||
*/
|
||||
async waitForReady(timeoutMs: number = 60000): Promise<boolean> {
|
||||
if (this.ready) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
const checkInterval = 500;
|
||||
|
||||
@@ -237,6 +274,7 @@ export class ChromaServerManager {
|
||||
this.serverProcess = null;
|
||||
this.ready = false;
|
||||
this.starting = false;
|
||||
this.startPromise = null;
|
||||
logger.info('CHROMA_SERVER', 'Server stopped', { pid });
|
||||
resolve();
|
||||
};
|
||||
@@ -277,6 +315,94 @@ export class ChromaServerManager {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create combined SSL certificate bundle for Zscaler/corporate proxy environments.
|
||||
* This ports previous MCP SSL handling so local `npx chroma run` works behind enterprise proxies.
|
||||
*/
|
||||
private getCombinedCertPath(): string | undefined {
|
||||
const combinedCertPath = path.join(os.homedir(), '.claude-mem', 'combined_certs.pem');
|
||||
|
||||
if (fs.existsSync(combinedCertPath)) {
|
||||
const stats = fs.statSync(combinedCertPath);
|
||||
const ageMs = Date.now() - stats.mtimeMs;
|
||||
if (ageMs < 24 * 60 * 60 * 1000) {
|
||||
return combinedCertPath;
|
||||
}
|
||||
}
|
||||
|
||||
if (process.platform !== 'darwin') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
let certifiPath: string | undefined;
|
||||
try {
|
||||
certifiPath = execSync(
|
||||
'uvx --with certifi python -c "import certifi; print(certifi.where())"',
|
||||
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 }
|
||||
).trim();
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (!certifiPath || !fs.existsSync(certifiPath)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let zscalerCert = '';
|
||||
try {
|
||||
zscalerCert = execSync(
|
||||
'security find-certificate -a -c "Zscaler" -p /Library/Keychains/System.keychain',
|
||||
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 5000 }
|
||||
);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (!zscalerCert ||
|
||||
!zscalerCert.includes('-----BEGIN CERTIFICATE-----') ||
|
||||
!zscalerCert.includes('-----END CERTIFICATE-----')) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const certifiContent = fs.readFileSync(certifiPath, 'utf8');
|
||||
const tempPath = combinedCertPath + '.tmp';
|
||||
fs.writeFileSync(tempPath, certifiContent + '\n' + zscalerCert);
|
||||
fs.renameSync(tempPath, combinedCertPath);
|
||||
|
||||
logger.info('CHROMA_SERVER', 'Created combined SSL certificate bundle for Zscaler', {
|
||||
path: combinedCertPath
|
||||
});
|
||||
|
||||
return combinedCertPath;
|
||||
} catch (error) {
|
||||
logger.debug('CHROMA_SERVER', 'Could not create combined cert bundle', {}, error as Error);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build subprocess env and preserve Zscaler compatibility from previous architecture.
|
||||
*/
|
||||
private getSpawnEnv(): NodeJS.ProcessEnv {
|
||||
const combinedCertPath = this.getCombinedCertPath();
|
||||
if (!combinedCertPath) {
|
||||
return process.env;
|
||||
}
|
||||
|
||||
logger.info('CHROMA_SERVER', 'Using combined SSL certificates for enterprise compatibility', {
|
||||
certPath: combinedCertPath
|
||||
});
|
||||
|
||||
return {
|
||||
...process.env,
|
||||
SSL_CERT_FILE: combinedCertPath,
|
||||
REQUESTS_CA_BUNDLE: combinedCertPath,
|
||||
CURL_CA_BUNDLE: combinedCertPath,
|
||||
NODE_EXTRA_CA_CERTS: combinedCertPath
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the singleton instance (for testing)
|
||||
*/
|
||||
|
||||
@@ -383,8 +383,7 @@ export class WorkerService {
|
||||
port: parseInt(settings.CLAUDE_MEM_CHROMA_PORT || '8000', 10)
|
||||
});
|
||||
|
||||
await this.chromaServer.start();
|
||||
const ready = await this.chromaServer.waitForReady(60000);
|
||||
const ready = await this.chromaServer.start(60000);
|
||||
|
||||
if (ready) {
|
||||
logger.success('SYSTEM', 'Chroma server ready');
|
||||
|
||||
@@ -87,6 +87,12 @@ describe('GracefulShutdown', () => {
|
||||
})
|
||||
};
|
||||
|
||||
const mockChromaServer = {
|
||||
stop: mock(async () => {
|
||||
callOrder.push('chromaServer.stop');
|
||||
})
|
||||
};
|
||||
|
||||
// Create a PID file so we can verify it's removed
|
||||
writePidFile({ pid: 12345, port: 37777, startedAt: new Date().toISOString() });
|
||||
expect(existsSync(PID_FILE)).toBe(true);
|
||||
@@ -95,16 +101,18 @@ describe('GracefulShutdown', () => {
|
||||
server: mockServer,
|
||||
sessionManager: mockSessionManager,
|
||||
mcpClient: mockMcpClient,
|
||||
dbManager: mockDbManager
|
||||
dbManager: mockDbManager,
|
||||
chromaServer: mockChromaServer
|
||||
};
|
||||
|
||||
await performGracefulShutdown(config);
|
||||
|
||||
// Verify order: PID removal happens first (synchronous), then server, then session, then MCP, then DB
|
||||
// Verify order: PID removal happens first (synchronous), then server, then session, then MCP, then Chroma, then DB
|
||||
expect(callOrder).toContain('closeAllConnections');
|
||||
expect(callOrder).toContain('serverClose');
|
||||
expect(callOrder).toContain('sessionManager.shutdownAll');
|
||||
expect(callOrder).toContain('mcpClient.close');
|
||||
expect(callOrder).toContain('chromaServer.stop');
|
||||
expect(callOrder).toContain('dbManager.close');
|
||||
|
||||
// Verify server closes before session manager
|
||||
@@ -115,6 +123,9 @@ describe('GracefulShutdown', () => {
|
||||
|
||||
// Verify MCP closes before database
|
||||
expect(callOrder.indexOf('mcpClient.close')).toBeLessThan(callOrder.indexOf('dbManager.close'));
|
||||
|
||||
// Verify Chroma stops before DB closes
|
||||
expect(callOrder.indexOf('chromaServer.stop')).toBeLessThan(callOrder.indexOf('dbManager.close'));
|
||||
});
|
||||
|
||||
it('should remove PID file during shutdown', async () => {
|
||||
@@ -184,7 +195,7 @@ describe('GracefulShutdown', () => {
|
||||
expect(mockSessionManager.shutdownAll).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should close database after MCP client', async () => {
|
||||
it('should stop chroma server before database close', async () => {
|
||||
const callOrder: string[] = [];
|
||||
|
||||
const mockSessionManager: ShutdownableService = {
|
||||
@@ -205,16 +216,23 @@ describe('GracefulShutdown', () => {
|
||||
})
|
||||
};
|
||||
|
||||
const mockChromaServer = {
|
||||
stop: mock(async () => {
|
||||
callOrder.push('chromaServer');
|
||||
})
|
||||
};
|
||||
|
||||
const config: GracefulShutdownConfig = {
|
||||
server: null,
|
||||
sessionManager: mockSessionManager,
|
||||
mcpClient: mockMcpClient,
|
||||
dbManager: mockDbManager
|
||||
dbManager: mockDbManager,
|
||||
chromaServer: mockChromaServer
|
||||
};
|
||||
|
||||
await performGracefulShutdown(config);
|
||||
|
||||
expect(callOrder).toEqual(['sessionManager', 'mcpClient', 'dbManager']);
|
||||
expect(callOrder).toEqual(['sessionManager', 'mcpClient', 'chromaServer', 'dbManager']);
|
||||
});
|
||||
|
||||
it('should handle shutdown when PID file does not exist', async () => {
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
import { describe, it, expect, beforeEach, afterEach, mock, spyOn } from 'bun:test';
|
||||
import { EventEmitter } from 'events';
|
||||
import * as childProcess from 'child_process';
|
||||
import { ChromaServerManager } from '../../../src/services/sync/ChromaServerManager.js';
|
||||
|
||||
function createFakeProcess(pid: number = 4242): childProcess.ChildProcess {
|
||||
const proc = new EventEmitter() as childProcess.ChildProcess & EventEmitter;
|
||||
let exited = false;
|
||||
|
||||
(proc as any).stdout = new EventEmitter();
|
||||
(proc as any).stderr = new EventEmitter();
|
||||
(proc as any).pid = pid;
|
||||
(proc as any).kill = mock(() => {
|
||||
if (!exited) {
|
||||
exited = true;
|
||||
setTimeout(() => proc.emit('exit', 0, 'SIGTERM'), 0);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
return proc as childProcess.ChildProcess;
|
||||
}
|
||||
|
||||
describe('ChromaServerManager', () => {
|
||||
const originalFetch = global.fetch;
|
||||
const originalPlatform = process.platform;
|
||||
|
||||
beforeEach(() => {
|
||||
mock.restore();
|
||||
ChromaServerManager.reset();
|
||||
|
||||
// Avoid macOS cert bundle shelling in tests; these tests only exercise startup races.
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'linux',
|
||||
writable: true,
|
||||
configurable: true
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
mock.restore();
|
||||
ChromaServerManager.reset();
|
||||
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: originalPlatform,
|
||||
writable: true,
|
||||
configurable: true
|
||||
});
|
||||
});
|
||||
|
||||
it('reuses in-flight startup and only spawns one server process', async () => {
|
||||
const fetchMock = mock(async () => {
|
||||
// First call: existing server check fails, second call: waitForReady succeeds.
|
||||
if (fetchMock.mock.calls.length === 1) {
|
||||
throw new Error('no server yet');
|
||||
}
|
||||
return new Response(null, { status: 200 });
|
||||
});
|
||||
global.fetch = fetchMock as typeof fetch;
|
||||
|
||||
const spawnSpy = spyOn(childProcess, 'spawn').mockImplementation(
|
||||
() => createFakeProcess() as unknown as ReturnType<typeof childProcess.spawn>
|
||||
);
|
||||
|
||||
const manager = ChromaServerManager.getInstance({
|
||||
dataDir: '/tmp/chroma-test',
|
||||
host: '127.0.0.1',
|
||||
port: 8000
|
||||
});
|
||||
|
||||
const [first, second] = await Promise.all([
|
||||
manager.start(2000),
|
||||
manager.start(2000)
|
||||
]);
|
||||
|
||||
expect(first).toBe(true);
|
||||
expect(second).toBe(true);
|
||||
expect(spawnSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('reuses existing reachable server without spawning', async () => {
|
||||
global.fetch = mock(async () => new Response(null, { status: 200 })) as typeof fetch;
|
||||
const spawnSpy = spyOn(childProcess, 'spawn').mockImplementation(
|
||||
() => createFakeProcess() as unknown as ReturnType<typeof childProcess.spawn>
|
||||
);
|
||||
|
||||
const manager = ChromaServerManager.getInstance({
|
||||
dataDir: '/tmp/chroma-test',
|
||||
host: '127.0.0.1',
|
||||
port: 8000
|
||||
});
|
||||
|
||||
const ready = await manager.start(2000);
|
||||
expect(ready).toBe(true);
|
||||
expect(spawnSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('waits for ongoing startup instead of returning early', async () => {
|
||||
let resolveReady: ((value: Response) => void) | null = null;
|
||||
const delayedReady = new Promise<Response>((resolve) => {
|
||||
resolveReady = resolve;
|
||||
});
|
||||
|
||||
const fetchMock = mock(async () => {
|
||||
// 1st: existing server check -> fail, 2nd: waitForReady -> block until we resolve.
|
||||
if (fetchMock.mock.calls.length === 1) {
|
||||
throw new Error('no server yet');
|
||||
}
|
||||
return delayedReady;
|
||||
});
|
||||
global.fetch = fetchMock as typeof fetch;
|
||||
|
||||
spyOn(childProcess, 'spawn').mockImplementation(
|
||||
() => createFakeProcess() as unknown as ReturnType<typeof childProcess.spawn>
|
||||
);
|
||||
|
||||
const manager = ChromaServerManager.getInstance({
|
||||
dataDir: '/tmp/chroma-test',
|
||||
host: '127.0.0.1',
|
||||
port: 8000
|
||||
});
|
||||
|
||||
const firstStart = manager.start(5000);
|
||||
let secondResolved = false;
|
||||
const secondStart = manager.start(5000).then((value) => {
|
||||
secondResolved = true;
|
||||
return value;
|
||||
});
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
expect(secondResolved).toBe(false);
|
||||
|
||||
resolveReady!(new Response(null, { status: 200 }));
|
||||
|
||||
expect(await firstStart).toBe(true);
|
||||
expect(await secondStart).toBe(true);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user