Merge remote-tracking branch 'origin/main' into openclaw-installer
# Conflicts: # plugin/scripts/mcp-server.cjs # plugin/scripts/worker-service.cjs
This commit is contained in:
@@ -10,7 +10,7 @@
|
||||
"plugins": [
|
||||
{
|
||||
"name": "claude-mem",
|
||||
"version": "10.0.3",
|
||||
"version": "10.0.4",
|
||||
"source": "./plugin",
|
||||
"description": "Persistent memory system for Claude Code - context compression across sessions"
|
||||
}
|
||||
|
||||
+25
-4
@@ -2,6 +2,31 @@
|
||||
|
||||
All notable changes to claude-mem.
|
||||
|
||||
## [v10.0.4] - 2026-02-12
|
||||
|
||||
## Revert: v10.0.3 chroma-mcp spawn storm fix
|
||||
|
||||
v10.0.3 introduced regressions. This release reverts the codebase to the stable v10.0.2 state.
|
||||
|
||||
### What was reverted
|
||||
|
||||
- Connection mutex via promise memoization
|
||||
- Pre-spawn process count guard
|
||||
- Hardened `close()` with try-finally + Unix `pkill -P` fallback
|
||||
- Count-based orphan reaper in `ProcessManager`
|
||||
- Circuit breaker (3 failures → 60s cooldown)
|
||||
- `etime`-based sorting for process guards
|
||||
|
||||
### Files restored to v10.0.2
|
||||
|
||||
- `src/services/sync/ChromaSync.ts`
|
||||
- `src/services/infrastructure/GracefulShutdown.ts`
|
||||
- `src/services/infrastructure/ProcessManager.ts`
|
||||
- `src/services/worker-service.ts`
|
||||
- `src/services/worker/ProcessRegistry.ts`
|
||||
- `tests/infrastructure/process-manager.test.ts`
|
||||
- `tests/integration/chroma-vector-sync.test.ts`
|
||||
|
||||
## [v10.0.3] - 2026-02-11
|
||||
|
||||
## Fix: Prevent chroma-mcp spawn storm (PR #1065)
|
||||
@@ -1519,7 +1544,3 @@ Refactored context loading logic to differentiate between code and non-code mode
|
||||
|
||||
🤖 Generated with [Claude Code](https://claude.com/claude-code)
|
||||
|
||||
## [v8.0.3] - 2025-12-23
|
||||
|
||||
Fix critical worker crashes on startup (v8.0.2 regression)
|
||||
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claude-mem",
|
||||
"version": "10.0.3",
|
||||
"version": "10.0.4",
|
||||
"description": "Memory compression system for Claude Code - persist context across sessions",
|
||||
"keywords": [
|
||||
"claude",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claude-mem",
|
||||
"version": "10.0.3",
|
||||
"version": "10.0.4",
|
||||
"description": "Persistent memory system for Claude Code - seamlessly preserve context across sessions",
|
||||
"author": {
|
||||
"name": "Alex Newman"
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claude-mem-plugin",
|
||||
"version": "10.0.3",
|
||||
"version": "10.0.4",
|
||||
"private": true,
|
||||
"description": "Runtime dependencies for claude-mem bundled hooks",
|
||||
"type": "module",
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
*/
|
||||
|
||||
import http from 'http';
|
||||
import { execFileSync } from 'child_process';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import {
|
||||
getChildProcesses,
|
||||
@@ -77,21 +76,6 @@ export async function performGracefulShutdown(config: GracefulShutdownConfig): P
|
||||
await config.dbManager.close();
|
||||
}
|
||||
|
||||
// STEP 5.5: Kill any chroma-mcp children that survived transport.close() (Unix only)
|
||||
// On Unix, getChildProcesses() returns [] (Windows-only), so chroma-mcp
|
||||
// subprocesses spawned via StdioClientTransport may escape STEP 5 cleanup
|
||||
if (process.platform !== 'win32') {
|
||||
try {
|
||||
execFileSync('pkill', ['-P', String(process.pid), '-f', 'chroma-mcp'], {
|
||||
timeout: 3000,
|
||||
stdio: 'ignore'
|
||||
});
|
||||
logger.info('SYSTEM', 'Killed chroma-mcp child processes');
|
||||
} catch {
|
||||
// pkill returns exit code 1 if no processes matched — that's fine
|
||||
}
|
||||
}
|
||||
|
||||
// STEP 6: Force kill any remaining child processes (Windows zombie port fix)
|
||||
if (childPids.length > 0) {
|
||||
logger.info('SYSTEM', 'Force killing remaining children');
|
||||
|
||||
@@ -339,77 +339,6 @@ export async function cleanupOrphanedProcesses(): Promise<void> {
|
||||
logger.info('SYSTEM', 'Orphaned processes cleaned up', { count: pidsToKill.length });
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up excess chroma-mcp processes by count (not age).
|
||||
*
|
||||
* Unlike cleanupOrphanedProcesses() which uses ORPHAN_MAX_AGE_MINUTES = 30,
|
||||
* this function kills by count — essential for catching spawn storms where
|
||||
* all processes are young. Keeps the newest processes (by elapsed time)
|
||||
* and kills the rest.
|
||||
*
|
||||
* Returns the number of processes killed.
|
||||
*/
|
||||
export async function cleanupExcessChromaProcesses(maxAllowed: number = 2): Promise<number> {
|
||||
// Windows: Chroma is disabled entirely, no cleanup needed
|
||||
if (process.platform === 'win32') return 0;
|
||||
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
'ps -eo pid,etime,command | grep -E "chroma-mcp" | grep -v grep || true'
|
||||
);
|
||||
|
||||
if (!stdout.trim()) return 0;
|
||||
|
||||
const processes: Array<{ pid: number; ageMinutes: number }> = [];
|
||||
|
||||
for (const line of stdout.trim().split('\n')) {
|
||||
if (!line.trim()) continue;
|
||||
const match = line.trim().match(/^(\d+)\s+(\S+)\s+(.*)$/);
|
||||
if (!match) continue;
|
||||
|
||||
const pid = parseInt(match[1], 10);
|
||||
const etime = match[2];
|
||||
|
||||
if (!Number.isInteger(pid) || pid <= 0 || pid === process.pid) continue;
|
||||
|
||||
const ageMinutes = parseElapsedTime(etime);
|
||||
// Skip entries with unparseable etime (-1) to avoid sort corruption
|
||||
if (ageMinutes < 0) continue;
|
||||
processes.push({ pid, ageMinutes });
|
||||
}
|
||||
|
||||
if (processes.length <= maxAllowed) return 0;
|
||||
|
||||
// Sort: newest first (lowest age), keep maxAllowed, kill rest
|
||||
processes.sort((a, b) => a.ageMinutes - b.ageMinutes);
|
||||
const toKill = processes.slice(maxAllowed);
|
||||
|
||||
let killed = 0;
|
||||
for (const { pid } of toKill) {
|
||||
try {
|
||||
process.kill(pid, 'SIGTERM');
|
||||
killed++;
|
||||
logger.info('SYSTEM', 'Killed excess chroma-mcp process', { pid });
|
||||
} catch {
|
||||
// Process may already be dead
|
||||
}
|
||||
}
|
||||
|
||||
if (killed > 0) {
|
||||
logger.warn('SYSTEM', 'Cleaned up excess chroma-mcp processes by count', {
|
||||
found: processes.length,
|
||||
killed,
|
||||
maxAllowed
|
||||
});
|
||||
}
|
||||
|
||||
return killed;
|
||||
} catch (error) {
|
||||
logger.debug('SYSTEM', 'Failed to enumerate chroma-mcp processes', {}, error as Error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawn a detached daemon process
|
||||
* Returns the child PID or undefined if spawn failed
|
||||
|
||||
+20
-156
@@ -18,16 +18,12 @@ import { USER_SETTINGS_PATH } from '../../shared/paths.js';
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import fs from 'fs';
|
||||
import { execSync, execFileSync } from 'child_process';
|
||||
import { parseElapsedTime } from '../infrastructure/ProcessManager.js';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
// Version injected at build time by esbuild define
|
||||
declare const __DEFAULT_PACKAGE_VERSION__: string;
|
||||
const packageVersion = typeof __DEFAULT_PACKAGE_VERSION__ !== 'undefined' ? __DEFAULT_PACKAGE_VERSION__ : '0.0.0-dev';
|
||||
|
||||
// Maximum allowed chroma-mcp processes before pre-spawn guard kills excess
|
||||
const MAX_CHROMA_PROCESSES = 2; // 1 active + 1 starting
|
||||
|
||||
interface ChromaDocument {
|
||||
id: string;
|
||||
document: string;
|
||||
@@ -94,16 +90,6 @@ export class ChromaSync {
|
||||
// MCP SDK's StdioClientTransport uses shell:false and no detached flag, so console is inherited.
|
||||
private readonly disabled: boolean = false;
|
||||
|
||||
// Layer 0: Connection mutex — coalesces concurrent callers onto single spawn
|
||||
private connectionPromise: Promise<void> | null = null;
|
||||
|
||||
// Layer 4: Circuit breaker — stops retry storms after repeated failures
|
||||
private consecutiveFailures: number = 0;
|
||||
private lastFailureTime: number = 0;
|
||||
private static readonly MAX_FAILURES = 3;
|
||||
private static readonly BACKOFF_BASE_MS = 2000;
|
||||
private static readonly CIRCUIT_OPEN_MS = 60000; // 1 minute cooldown
|
||||
|
||||
constructor(project: string) {
|
||||
this.project = project;
|
||||
this.collectionName = `cm__${project}`;
|
||||
@@ -192,114 +178,14 @@ export class ChromaSync {
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure MCP client is connected to Chroma server (mutex wrapper).
|
||||
* Coalesces concurrent callers onto a single connection attempt.
|
||||
* This prevents N concurrent calls from each spawning a chroma-mcp subprocess.
|
||||
* Ensure MCP client is connected to Chroma server
|
||||
* Throws error if connection fails
|
||||
*/
|
||||
private async ensureConnection(): Promise<void> {
|
||||
if (this.connected && this.client) return;
|
||||
|
||||
// Layer 0: Coalesce concurrent callers onto a single connection attempt
|
||||
if (this.connectionPromise) {
|
||||
return this.connectionPromise;
|
||||
if (this.connected && this.client) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.connectionPromise = this._doConnect();
|
||||
try {
|
||||
await this.connectionPromise;
|
||||
} finally {
|
||||
this.connectionPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Layer 4: Circuit breaker — refuse to spawn after repeated failures.
|
||||
* After MAX_FAILURES consecutive connection failures, stops all spawn
|
||||
* attempts for CIRCUIT_OPEN_MS to prevent process accumulation storms.
|
||||
*/
|
||||
private checkCircuitBreaker(): void {
|
||||
if (this.consecutiveFailures >= ChromaSync.MAX_FAILURES) {
|
||||
const elapsed = Date.now() - this.lastFailureTime;
|
||||
if (elapsed < ChromaSync.CIRCUIT_OPEN_MS) {
|
||||
throw new Error(
|
||||
`Chroma circuit breaker open: ${this.consecutiveFailures} consecutive failures. ` +
|
||||
`Retry in ${Math.ceil((ChromaSync.CIRCUIT_OPEN_MS - elapsed) / 1000)}s`
|
||||
);
|
||||
}
|
||||
// Cooldown expired, allow retry
|
||||
logger.info('CHROMA_SYNC', 'Circuit breaker cooldown expired, allowing retry', {
|
||||
consecutiveFailures: this.consecutiveFailures,
|
||||
cooldownMs: ChromaSync.CIRCUIT_OPEN_MS
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Layer 1: Pre-spawn process count guard.
|
||||
* Kills excess chroma-mcp processes before spawning a new one.
|
||||
* Uses execFileSync (no shell) to list processes, filters in JavaScript.
|
||||
*/
|
||||
private killExcessChromaProcesses(): void {
|
||||
if (process.platform === 'win32') return; // Windows has Chroma disabled entirely
|
||||
|
||||
try {
|
||||
// Use execFileSync to avoid shell injection — filter and sort in JavaScript
|
||||
// Include etime column for reliable age-based sorting (PID order is unreliable)
|
||||
const output = execFileSync('ps', ['-eo', 'pid,etime,command'], {
|
||||
encoding: 'utf8',
|
||||
timeout: 5000,
|
||||
stdio: ['pipe', 'pipe', 'pipe']
|
||||
});
|
||||
|
||||
// Filter for chroma-mcp, parse elapsed time, sort by actual age
|
||||
const processes = output.split('\n')
|
||||
.filter(l => l.includes('chroma-mcp'))
|
||||
.map(l => {
|
||||
const parts = l.trim().split(/\s+/);
|
||||
const pid = parseInt(parts[0], 10);
|
||||
const etime = parts[1] || '';
|
||||
const ageMinutes = parseElapsedTime(etime);
|
||||
return { pid, ageMinutes };
|
||||
})
|
||||
.filter(p => p.pid > 0 && p.pid !== process.pid && p.ageMinutes >= 0)
|
||||
.sort((a, b) => a.ageMinutes - b.ageMinutes); // Ascending: newest (lowest age) first
|
||||
|
||||
if (processes.length < MAX_CHROMA_PROCESSES) return;
|
||||
|
||||
// Keep newest MAX_CHROMA_PROCESSES - 1 (making room for the one we're about to spawn)
|
||||
const toKill = processes.slice(MAX_CHROMA_PROCESSES - 1);
|
||||
for (const { pid } of toKill) {
|
||||
try {
|
||||
process.kill(pid, 'SIGTERM');
|
||||
} catch {
|
||||
// Process may already be dead
|
||||
}
|
||||
}
|
||||
|
||||
if (toKill.length > 0) {
|
||||
logger.warn('CHROMA_SYNC', 'Killed excess chroma-mcp processes before spawning', {
|
||||
found: processes.length,
|
||||
killed: toKill.length,
|
||||
maxAllowed: MAX_CHROMA_PROCESSES
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// ps may fail — don't block connection
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal connection logic — called only via ensureConnection() mutex.
|
||||
* Implements circuit breaker (Layer 4), pre-spawn guard (Layer 1),
|
||||
* and actual connection setup.
|
||||
*/
|
||||
private async _doConnect(): Promise<void> {
|
||||
// Layer 4: Circuit breaker check — refuse if too many recent failures
|
||||
this.checkCircuitBreaker();
|
||||
|
||||
// Layer 1: Kill excess processes before spawning a new one
|
||||
this.killExcessChromaProcesses();
|
||||
|
||||
logger.info('CHROMA_SYNC', 'Connecting to Chroma MCP server...', { project: this.project });
|
||||
|
||||
try {
|
||||
@@ -352,20 +238,9 @@ export class ChromaSync {
|
||||
await this.client.connect(this.transport);
|
||||
this.connected = true;
|
||||
|
||||
// Layer 4: Reset circuit breaker on success
|
||||
this.consecutiveFailures = 0;
|
||||
|
||||
logger.info('CHROMA_SYNC', 'Connected to Chroma MCP server', { project: this.project });
|
||||
} catch (error) {
|
||||
// Layer 4: Track failure for circuit breaker
|
||||
this.consecutiveFailures++;
|
||||
this.lastFailureTime = Date.now();
|
||||
|
||||
logger.error('CHROMA_SYNC', 'Failed to connect to Chroma MCP server', {
|
||||
project: this.project,
|
||||
consecutiveFailures: this.consecutiveFailures,
|
||||
circuitBreakerThreshold: ChromaSync.MAX_FAILURES
|
||||
}, error as Error);
|
||||
logger.error('CHROMA_SYNC', 'Failed to connect to Chroma MCP server', { project: this.project }, error as Error);
|
||||
throw new Error(`Chroma connection failed: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
@@ -416,7 +291,6 @@ export class ChromaSync {
|
||||
this.connected = false;
|
||||
this.client = null;
|
||||
this.transport = null;
|
||||
this.connectionPromise = null;
|
||||
logger.error('CHROMA_SYNC', 'Connection lost during collection check',
|
||||
{ collection: this.collectionName }, error as Error);
|
||||
throw new Error(`Chroma connection lost: ${errorMessage}`);
|
||||
@@ -1086,7 +960,6 @@ export class ChromaSync {
|
||||
this.connected = false;
|
||||
this.client = null;
|
||||
this.transport = null;
|
||||
this.connectionPromise = null;
|
||||
logger.error('CHROMA_SYNC', 'Connection lost during query',
|
||||
{ project: this.project, query }, error as Error);
|
||||
throw new Error(`Chroma query failed - connection lost: ${errorMessage}`);
|
||||
@@ -1144,37 +1017,28 @@ export class ChromaSync {
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the Chroma client connection and cleanup subprocess.
|
||||
* Uses try-finally to guarantee state reset even if close() throws.
|
||||
* Individual close calls use .catch() to prevent one failure from
|
||||
* blocking the other (e.g., client.close() failing shouldn't prevent
|
||||
* transport.close() from killing the subprocess).
|
||||
* Close the Chroma client connection and cleanup subprocess
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
if (!this.connected && !this.client && !this.transport) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Close client first, then transport — catch individual errors
|
||||
if (this.client) {
|
||||
await this.client.close().catch((err: Error) => {
|
||||
logger.debug('CHROMA_SYNC', 'Client close error (may already be disconnected)', {}, err);
|
||||
});
|
||||
}
|
||||
if (this.transport) {
|
||||
await this.transport.close().catch((err: Error) => {
|
||||
logger.debug('CHROMA_SYNC', 'Transport close error (may already be dead)', {}, err);
|
||||
});
|
||||
}
|
||||
} finally {
|
||||
// Always reset state, even if close throws
|
||||
this.connected = false;
|
||||
this.client = null;
|
||||
this.transport = null;
|
||||
this.connectionPromise = null;
|
||||
// Close client first
|
||||
if (this.client) {
|
||||
await this.client.close();
|
||||
}
|
||||
|
||||
// Explicitly close transport to kill subprocess
|
||||
if (this.transport) {
|
||||
await this.transport.close();
|
||||
}
|
||||
|
||||
logger.info('CHROMA_SYNC', 'Chroma client and subprocess closed', { project: this.project });
|
||||
|
||||
// Always reset state
|
||||
this.connected = false;
|
||||
this.client = null;
|
||||
this.transport = null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +68,6 @@ import {
|
||||
removePidFile,
|
||||
getPlatformTimeout,
|
||||
cleanupOrphanedProcesses,
|
||||
cleanupExcessChromaProcesses,
|
||||
cleanStalePidFile,
|
||||
spawnDaemon,
|
||||
createSignalHandler
|
||||
@@ -361,7 +360,6 @@ export class WorkerService {
|
||||
private async initializeBackground(): Promise<void> {
|
||||
try {
|
||||
await cleanupOrphanedProcesses();
|
||||
await cleanupExcessChromaProcesses();
|
||||
|
||||
// Load mode configuration
|
||||
const { ModeManager } = await import('./domain/ModeManager.js');
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
import { spawn, exec, ChildProcess } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { cleanupExcessChromaProcesses } from '../infrastructure/ProcessManager.js';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
@@ -213,7 +212,7 @@ async function killSystemOrphans(): Promise<number> {
|
||||
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
'ps -eo pid,ppid,args 2>/dev/null | grep -E "claude.*haiku|claude.*output-format|chroma-mcp" | grep -v grep'
|
||||
'ps -eo pid,ppid,args 2>/dev/null | grep -E "claude.*haiku|claude.*output-format" | grep -v grep'
|
||||
);
|
||||
|
||||
let killed = 0;
|
||||
@@ -263,9 +262,6 @@ export async function reapOrphanedProcesses(activeSessionIds: Set<number>): Prom
|
||||
// Daemon children: find idle SDK processes that didn't terminate
|
||||
killed += await killIdleDaemonChildren();
|
||||
|
||||
// Count-based: kill excess chroma-mcp processes regardless of age
|
||||
killed += await cleanupExcessChromaProcesses();
|
||||
|
||||
return killed;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ import {
|
||||
removePidFile,
|
||||
getPlatformTimeout,
|
||||
parseElapsedTime,
|
||||
cleanupExcessChromaProcesses,
|
||||
isProcessAlive,
|
||||
cleanStalePidFile,
|
||||
spawnDaemon,
|
||||
@@ -226,65 +225,6 @@ describe('ProcessManager', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('cleanupExcessChromaProcesses (Issue #1063)', () => {
|
||||
/**
|
||||
* Tests for count-based chroma-mcp process cleanup.
|
||||
* Unlike the age-based cleanupOrphanedProcesses() which has a 30-minute
|
||||
* threshold, this function kills by count — essential for catching spawn
|
||||
* storms where all processes are young.
|
||||
*/
|
||||
|
||||
it('should be exported and callable', () => {
|
||||
expect(typeof cleanupExcessChromaProcesses).toBe('function');
|
||||
});
|
||||
|
||||
it('should return 0 on Windows (Chroma disabled)', async () => {
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: 'win32',
|
||||
writable: true,
|
||||
configurable: true
|
||||
});
|
||||
|
||||
try {
|
||||
const killed = await cleanupExcessChromaProcesses();
|
||||
expect(killed).toBe(0);
|
||||
} finally {
|
||||
Object.defineProperty(process, 'platform', {
|
||||
value: originalPlatform,
|
||||
writable: true,
|
||||
configurable: true
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('should accept custom maxAllowed parameter', async () => {
|
||||
// Should not throw with any valid maxAllowed value
|
||||
const killed = await cleanupExcessChromaProcesses(5);
|
||||
expect(killed).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('should return a number (killed count)', async () => {
|
||||
const killed = await cleanupExcessChromaProcesses();
|
||||
expect(typeof killed).toBe('number');
|
||||
expect(killed).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('should exist in ProcessManager source with count-based logic', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/infrastructure/ProcessManager.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
// Verify count-based logic exists (not age-based)
|
||||
expect(sourceFile).toContain('cleanupExcessChromaProcesses');
|
||||
expect(sourceFile).toContain('chroma-mcp');
|
||||
|
||||
// Should sort by age and keep newest
|
||||
expect(sourceFile).toContain('.sort(');
|
||||
expect(sourceFile).toContain('.slice(maxAllowed)');
|
||||
});
|
||||
});
|
||||
|
||||
describe('isProcessAlive', () => {
|
||||
it('should return true for the current process', () => {
|
||||
expect(isProcessAlive(process.pid)).toBe(true);
|
||||
|
||||
@@ -392,209 +392,4 @@ describe('ChromaSync Vector Sync Integration', () => {
|
||||
expect(sourceFile).toContain('this.transport = null');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Spawn storm prevention (Issue #1063)', () => {
|
||||
/**
|
||||
* Regression tests for chroma-mcp spawn storm:
|
||||
* 641 processes spawned in ~5 minutes from 6 concurrent sessions.
|
||||
*
|
||||
* Root cause: ensureConnection() had no mutex. Concurrent callers
|
||||
* each spawned a chroma-mcp subprocess via StdioClientTransport.
|
||||
*
|
||||
* Fix: 5 defense layers — connection mutex, pre-spawn count guard,
|
||||
* hardened close(), count-based orphan reaper, circuit breaker.
|
||||
*/
|
||||
|
||||
describe('Layer 0: Connection mutex', () => {
|
||||
it('should have connectionPromise field for mutex', async () => {
|
||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
||||
const sync = new ChromaSync(testProject);
|
||||
const syncAny = sync as any;
|
||||
|
||||
// connectionPromise should be null initially
|
||||
expect(syncAny.connectionPromise).toBeNull();
|
||||
});
|
||||
|
||||
it('should coalesce concurrent ensureConnection calls via source code', async () => {
|
||||
// Static analysis: verify mutex pattern exists in source
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
// Verify mutex pattern: check for connectionPromise, return it
|
||||
expect(sourceFile).toContain('if (this.connectionPromise)');
|
||||
expect(sourceFile).toContain('return this.connectionPromise');
|
||||
expect(sourceFile).toContain('this.connectionPromise = this._doConnect()');
|
||||
});
|
||||
|
||||
it('should clear connectionPromise in finally block', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
// The finally block ensures connectionPromise is cleared even on error
|
||||
expect(sourceFile).toContain('finally {');
|
||||
expect(sourceFile).toContain('this.connectionPromise = null');
|
||||
});
|
||||
|
||||
it('should clear connectionPromise in error recovery paths', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
// Count occurrences of connectionPromise = null (should be in multiple places:
|
||||
// finally block, ensureCollection error, queryChroma error, close())
|
||||
const matches = sourceFile.match(/this\.connectionPromise = null/g) || [];
|
||||
expect(matches.length).toBeGreaterThanOrEqual(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Layer 1: Pre-spawn process count guard', () => {
|
||||
it('should have killExcessChromaProcesses method', async () => {
|
||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
||||
const sync = new ChromaSync(testProject);
|
||||
const syncAny = sync as any;
|
||||
|
||||
expect(typeof syncAny.killExcessChromaProcesses).toBe('function');
|
||||
});
|
||||
|
||||
it('should use execFileSync not execSync for safety', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
// killExcessChromaProcesses should use execFileSync (no shell injection)
|
||||
// Extract just the method body
|
||||
const methodStart = sourceFile.indexOf('killExcessChromaProcesses');
|
||||
const methodBody = sourceFile.slice(methodStart, methodStart + 500);
|
||||
expect(methodBody).toContain('execFileSync');
|
||||
});
|
||||
|
||||
it('should define MAX_CHROMA_PROCESSES constant', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
expect(sourceFile).toContain('MAX_CHROMA_PROCESSES');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Layer 2: Hardened close()', () => {
|
||||
it('should use try-finally to guarantee state reset', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
// Find the close() method body (needs larger slice to capture finally block)
|
||||
const closeStart = sourceFile.indexOf('async close():');
|
||||
const closeBody = sourceFile.slice(closeStart, closeStart + 1000);
|
||||
|
||||
// Verify try-finally pattern
|
||||
expect(closeBody).toContain('try {');
|
||||
expect(closeBody).toContain('} finally {');
|
||||
});
|
||||
|
||||
it('should catch individual close errors with .catch()', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
const closeStart = sourceFile.indexOf('async close():');
|
||||
const closeBody = sourceFile.slice(closeStart, closeStart + 600);
|
||||
|
||||
// Both client and transport close should have .catch()
|
||||
expect(closeBody).toContain('this.client.close().catch(');
|
||||
expect(closeBody).toContain('this.transport.close().catch(');
|
||||
});
|
||||
|
||||
it('should reset connectionPromise in close()', async () => {
|
||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
||||
const sync = new ChromaSync(testProject);
|
||||
const syncAny = sync as any;
|
||||
|
||||
// Simulate partially initialized state — must set connected=true
|
||||
// so close() doesn't early-return before reaching the finally block
|
||||
syncAny.connectionPromise = Promise.resolve();
|
||||
syncAny.connected = true;
|
||||
|
||||
await sync.close();
|
||||
|
||||
// connectionPromise must be reset
|
||||
expect(syncAny.connectionPromise).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Layer 4: Circuit breaker', () => {
|
||||
it('should have circuit breaker fields', async () => {
|
||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
||||
const sync = new ChromaSync(testProject);
|
||||
const syncAny = sync as any;
|
||||
|
||||
expect(syncAny.consecutiveFailures).toBe(0);
|
||||
expect(syncAny.lastFailureTime).toBe(0);
|
||||
});
|
||||
|
||||
it('should have circuit breaker constants', async () => {
|
||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
||||
const syncClass = ChromaSync as any;
|
||||
|
||||
expect(syncClass.MAX_FAILURES).toBe(3);
|
||||
expect(syncClass.CIRCUIT_OPEN_MS).toBe(60000);
|
||||
});
|
||||
|
||||
it('should throw when circuit breaker is open', async () => {
|
||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
||||
const sync = new ChromaSync(testProject);
|
||||
const syncAny = sync as any;
|
||||
|
||||
// Simulate 3 consecutive failures just now
|
||||
syncAny.consecutiveFailures = 3;
|
||||
syncAny.lastFailureTime = Date.now();
|
||||
|
||||
// checkCircuitBreaker should throw
|
||||
expect(() => syncAny.checkCircuitBreaker()).toThrow('circuit breaker open');
|
||||
});
|
||||
|
||||
it('should allow retry after cooldown expires', async () => {
|
||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
||||
const sync = new ChromaSync(testProject);
|
||||
const syncAny = sync as any;
|
||||
|
||||
// Simulate 3 failures that happened 2 minutes ago (past cooldown)
|
||||
syncAny.consecutiveFailures = 3;
|
||||
syncAny.lastFailureTime = Date.now() - 120000;
|
||||
|
||||
// checkCircuitBreaker should NOT throw — cooldown expired
|
||||
expect(() => syncAny.checkCircuitBreaker()).not.toThrow();
|
||||
});
|
||||
|
||||
it('should not throw when under failure threshold', async () => {
|
||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
||||
const sync = new ChromaSync(testProject);
|
||||
const syncAny = sync as any;
|
||||
|
||||
// 2 failures (under MAX_FAILURES = 3)
|
||||
syncAny.consecutiveFailures = 2;
|
||||
syncAny.lastFailureTime = Date.now();
|
||||
|
||||
expect(() => syncAny.checkCircuitBreaker()).not.toThrow();
|
||||
});
|
||||
|
||||
it('should track failures in _doConnect error path via source code', async () => {
|
||||
const sourceFile = await Bun.file(
|
||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
||||
).text();
|
||||
|
||||
// Verify _doConnect increments consecutiveFailures on error
|
||||
const doConnectStart = sourceFile.indexOf('private async _doConnect');
|
||||
const doConnectBody = sourceFile.slice(doConnectStart, doConnectStart + 3000);
|
||||
|
||||
expect(doConnectBody).toContain('this.consecutiveFailures++');
|
||||
expect(doConnectBody).toContain('this.lastFailureTime = Date.now()');
|
||||
|
||||
// And resets on success
|
||||
expect(doConnectBody).toContain('this.consecutiveFailures = 0');
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user