Merge remote-tracking branch 'origin/main' into openclaw-installer
# Conflicts: # plugin/scripts/mcp-server.cjs # plugin/scripts/worker-service.cjs
This commit is contained in:
@@ -10,7 +10,7 @@
|
|||||||
"plugins": [
|
"plugins": [
|
||||||
{
|
{
|
||||||
"name": "claude-mem",
|
"name": "claude-mem",
|
||||||
"version": "10.0.3",
|
"version": "10.0.4",
|
||||||
"source": "./plugin",
|
"source": "./plugin",
|
||||||
"description": "Persistent memory system for Claude Code - context compression across sessions"
|
"description": "Persistent memory system for Claude Code - context compression across sessions"
|
||||||
}
|
}
|
||||||
|
|||||||
+25
-4
@@ -2,6 +2,31 @@
|
|||||||
|
|
||||||
All notable changes to claude-mem.
|
All notable changes to claude-mem.
|
||||||
|
|
||||||
|
## [v10.0.4] - 2026-02-12
|
||||||
|
|
||||||
|
## Revert: v10.0.3 chroma-mcp spawn storm fix
|
||||||
|
|
||||||
|
v10.0.3 introduced regressions. This release reverts the codebase to the stable v10.0.2 state.
|
||||||
|
|
||||||
|
### What was reverted
|
||||||
|
|
||||||
|
- Connection mutex via promise memoization
|
||||||
|
- Pre-spawn process count guard
|
||||||
|
- Hardened `close()` with try-finally + Unix `pkill -P` fallback
|
||||||
|
- Count-based orphan reaper in `ProcessManager`
|
||||||
|
- Circuit breaker (3 failures → 60s cooldown)
|
||||||
|
- `etime`-based sorting for process guards
|
||||||
|
|
||||||
|
### Files restored to v10.0.2
|
||||||
|
|
||||||
|
- `src/services/sync/ChromaSync.ts`
|
||||||
|
- `src/services/infrastructure/GracefulShutdown.ts`
|
||||||
|
- `src/services/infrastructure/ProcessManager.ts`
|
||||||
|
- `src/services/worker-service.ts`
|
||||||
|
- `src/services/worker/ProcessRegistry.ts`
|
||||||
|
- `tests/infrastructure/process-manager.test.ts`
|
||||||
|
- `tests/integration/chroma-vector-sync.test.ts`
|
||||||
|
|
||||||
## [v10.0.3] - 2026-02-11
|
## [v10.0.3] - 2026-02-11
|
||||||
|
|
||||||
## Fix: Prevent chroma-mcp spawn storm (PR #1065)
|
## Fix: Prevent chroma-mcp spawn storm (PR #1065)
|
||||||
@@ -1519,7 +1544,3 @@ Refactored context loading logic to differentiate between code and non-code mode
|
|||||||
|
|
||||||
🤖 Generated with [Claude Code](https://claude.com/claude-code)
|
🤖 Generated with [Claude Code](https://claude.com/claude-code)
|
||||||
|
|
||||||
## [v8.0.3] - 2025-12-23
|
|
||||||
|
|
||||||
Fix critical worker crashes on startup (v8.0.2 regression)
|
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "claude-mem",
|
"name": "claude-mem",
|
||||||
"version": "10.0.3",
|
"version": "10.0.4",
|
||||||
"description": "Memory compression system for Claude Code - persist context across sessions",
|
"description": "Memory compression system for Claude Code - persist context across sessions",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"claude",
|
"claude",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "claude-mem",
|
"name": "claude-mem",
|
||||||
"version": "10.0.3",
|
"version": "10.0.4",
|
||||||
"description": "Persistent memory system for Claude Code - seamlessly preserve context across sessions",
|
"description": "Persistent memory system for Claude Code - seamlessly preserve context across sessions",
|
||||||
"author": {
|
"author": {
|
||||||
"name": "Alex Newman"
|
"name": "Alex Newman"
|
||||||
|
|||||||
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "claude-mem-plugin",
|
"name": "claude-mem-plugin",
|
||||||
"version": "10.0.3",
|
"version": "10.0.4",
|
||||||
"private": true,
|
"private": true,
|
||||||
"description": "Runtime dependencies for claude-mem bundled hooks",
|
"description": "Runtime dependencies for claude-mem bundled hooks",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
|
|||||||
@@ -9,7 +9,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import http from 'http';
|
import http from 'http';
|
||||||
import { execFileSync } from 'child_process';
|
|
||||||
import { logger } from '../../utils/logger.js';
|
import { logger } from '../../utils/logger.js';
|
||||||
import {
|
import {
|
||||||
getChildProcesses,
|
getChildProcesses,
|
||||||
@@ -77,21 +76,6 @@ export async function performGracefulShutdown(config: GracefulShutdownConfig): P
|
|||||||
await config.dbManager.close();
|
await config.dbManager.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// STEP 5.5: Kill any chroma-mcp children that survived transport.close() (Unix only)
|
|
||||||
// On Unix, getChildProcesses() returns [] (Windows-only), so chroma-mcp
|
|
||||||
// subprocesses spawned via StdioClientTransport may escape STEP 5 cleanup
|
|
||||||
if (process.platform !== 'win32') {
|
|
||||||
try {
|
|
||||||
execFileSync('pkill', ['-P', String(process.pid), '-f', 'chroma-mcp'], {
|
|
||||||
timeout: 3000,
|
|
||||||
stdio: 'ignore'
|
|
||||||
});
|
|
||||||
logger.info('SYSTEM', 'Killed chroma-mcp child processes');
|
|
||||||
} catch {
|
|
||||||
// pkill returns exit code 1 if no processes matched — that's fine
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// STEP 6: Force kill any remaining child processes (Windows zombie port fix)
|
// STEP 6: Force kill any remaining child processes (Windows zombie port fix)
|
||||||
if (childPids.length > 0) {
|
if (childPids.length > 0) {
|
||||||
logger.info('SYSTEM', 'Force killing remaining children');
|
logger.info('SYSTEM', 'Force killing remaining children');
|
||||||
|
|||||||
@@ -339,77 +339,6 @@ export async function cleanupOrphanedProcesses(): Promise<void> {
|
|||||||
logger.info('SYSTEM', 'Orphaned processes cleaned up', { count: pidsToKill.length });
|
logger.info('SYSTEM', 'Orphaned processes cleaned up', { count: pidsToKill.length });
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Clean up excess chroma-mcp processes by count (not age).
|
|
||||||
*
|
|
||||||
* Unlike cleanupOrphanedProcesses() which uses ORPHAN_MAX_AGE_MINUTES = 30,
|
|
||||||
* this function kills by count — essential for catching spawn storms where
|
|
||||||
* all processes are young. Keeps the newest processes (by elapsed time)
|
|
||||||
* and kills the rest.
|
|
||||||
*
|
|
||||||
* Returns the number of processes killed.
|
|
||||||
*/
|
|
||||||
export async function cleanupExcessChromaProcesses(maxAllowed: number = 2): Promise<number> {
|
|
||||||
// Windows: Chroma is disabled entirely, no cleanup needed
|
|
||||||
if (process.platform === 'win32') return 0;
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { stdout } = await execAsync(
|
|
||||||
'ps -eo pid,etime,command | grep -E "chroma-mcp" | grep -v grep || true'
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!stdout.trim()) return 0;
|
|
||||||
|
|
||||||
const processes: Array<{ pid: number; ageMinutes: number }> = [];
|
|
||||||
|
|
||||||
for (const line of stdout.trim().split('\n')) {
|
|
||||||
if (!line.trim()) continue;
|
|
||||||
const match = line.trim().match(/^(\d+)\s+(\S+)\s+(.*)$/);
|
|
||||||
if (!match) continue;
|
|
||||||
|
|
||||||
const pid = parseInt(match[1], 10);
|
|
||||||
const etime = match[2];
|
|
||||||
|
|
||||||
if (!Number.isInteger(pid) || pid <= 0 || pid === process.pid) continue;
|
|
||||||
|
|
||||||
const ageMinutes = parseElapsedTime(etime);
|
|
||||||
// Skip entries with unparseable etime (-1) to avoid sort corruption
|
|
||||||
if (ageMinutes < 0) continue;
|
|
||||||
processes.push({ pid, ageMinutes });
|
|
||||||
}
|
|
||||||
|
|
||||||
if (processes.length <= maxAllowed) return 0;
|
|
||||||
|
|
||||||
// Sort: newest first (lowest age), keep maxAllowed, kill rest
|
|
||||||
processes.sort((a, b) => a.ageMinutes - b.ageMinutes);
|
|
||||||
const toKill = processes.slice(maxAllowed);
|
|
||||||
|
|
||||||
let killed = 0;
|
|
||||||
for (const { pid } of toKill) {
|
|
||||||
try {
|
|
||||||
process.kill(pid, 'SIGTERM');
|
|
||||||
killed++;
|
|
||||||
logger.info('SYSTEM', 'Killed excess chroma-mcp process', { pid });
|
|
||||||
} catch {
|
|
||||||
// Process may already be dead
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (killed > 0) {
|
|
||||||
logger.warn('SYSTEM', 'Cleaned up excess chroma-mcp processes by count', {
|
|
||||||
found: processes.length,
|
|
||||||
killed,
|
|
||||||
maxAllowed
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return killed;
|
|
||||||
} catch (error) {
|
|
||||||
logger.debug('SYSTEM', 'Failed to enumerate chroma-mcp processes', {}, error as Error);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Spawn a detached daemon process
|
* Spawn a detached daemon process
|
||||||
* Returns the child PID or undefined if spawn failed
|
* Returns the child PID or undefined if spawn failed
|
||||||
|
|||||||
+17
-153
@@ -18,16 +18,12 @@ import { USER_SETTINGS_PATH } from '../../shared/paths.js';
|
|||||||
import path from 'path';
|
import path from 'path';
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import { execSync, execFileSync } from 'child_process';
|
import { execSync } from 'child_process';
|
||||||
import { parseElapsedTime } from '../infrastructure/ProcessManager.js';
|
|
||||||
|
|
||||||
// Version injected at build time by esbuild define
|
// Version injected at build time by esbuild define
|
||||||
declare const __DEFAULT_PACKAGE_VERSION__: string;
|
declare const __DEFAULT_PACKAGE_VERSION__: string;
|
||||||
const packageVersion = typeof __DEFAULT_PACKAGE_VERSION__ !== 'undefined' ? __DEFAULT_PACKAGE_VERSION__ : '0.0.0-dev';
|
const packageVersion = typeof __DEFAULT_PACKAGE_VERSION__ !== 'undefined' ? __DEFAULT_PACKAGE_VERSION__ : '0.0.0-dev';
|
||||||
|
|
||||||
// Maximum allowed chroma-mcp processes before pre-spawn guard kills excess
|
|
||||||
const MAX_CHROMA_PROCESSES = 2; // 1 active + 1 starting
|
|
||||||
|
|
||||||
interface ChromaDocument {
|
interface ChromaDocument {
|
||||||
id: string;
|
id: string;
|
||||||
document: string;
|
document: string;
|
||||||
@@ -94,16 +90,6 @@ export class ChromaSync {
|
|||||||
// MCP SDK's StdioClientTransport uses shell:false and no detached flag, so console is inherited.
|
// MCP SDK's StdioClientTransport uses shell:false and no detached flag, so console is inherited.
|
||||||
private readonly disabled: boolean = false;
|
private readonly disabled: boolean = false;
|
||||||
|
|
||||||
// Layer 0: Connection mutex — coalesces concurrent callers onto single spawn
|
|
||||||
private connectionPromise: Promise<void> | null = null;
|
|
||||||
|
|
||||||
// Layer 4: Circuit breaker — stops retry storms after repeated failures
|
|
||||||
private consecutiveFailures: number = 0;
|
|
||||||
private lastFailureTime: number = 0;
|
|
||||||
private static readonly MAX_FAILURES = 3;
|
|
||||||
private static readonly BACKOFF_BASE_MS = 2000;
|
|
||||||
private static readonly CIRCUIT_OPEN_MS = 60000; // 1 minute cooldown
|
|
||||||
|
|
||||||
constructor(project: string) {
|
constructor(project: string) {
|
||||||
this.project = project;
|
this.project = project;
|
||||||
this.collectionName = `cm__${project}`;
|
this.collectionName = `cm__${project}`;
|
||||||
@@ -192,114 +178,14 @@ export class ChromaSync {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure MCP client is connected to Chroma server (mutex wrapper).
|
* Ensure MCP client is connected to Chroma server
|
||||||
* Coalesces concurrent callers onto a single connection attempt.
|
* Throws error if connection fails
|
||||||
* This prevents N concurrent calls from each spawning a chroma-mcp subprocess.
|
|
||||||
*/
|
*/
|
||||||
private async ensureConnection(): Promise<void> {
|
private async ensureConnection(): Promise<void> {
|
||||||
if (this.connected && this.client) return;
|
if (this.connected && this.client) {
|
||||||
|
return;
|
||||||
// Layer 0: Coalesce concurrent callers onto a single connection attempt
|
|
||||||
if (this.connectionPromise) {
|
|
||||||
return this.connectionPromise;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
this.connectionPromise = this._doConnect();
|
|
||||||
try {
|
|
||||||
await this.connectionPromise;
|
|
||||||
} finally {
|
|
||||||
this.connectionPromise = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Layer 4: Circuit breaker — refuse to spawn after repeated failures.
|
|
||||||
* After MAX_FAILURES consecutive connection failures, stops all spawn
|
|
||||||
* attempts for CIRCUIT_OPEN_MS to prevent process accumulation storms.
|
|
||||||
*/
|
|
||||||
private checkCircuitBreaker(): void {
|
|
||||||
if (this.consecutiveFailures >= ChromaSync.MAX_FAILURES) {
|
|
||||||
const elapsed = Date.now() - this.lastFailureTime;
|
|
||||||
if (elapsed < ChromaSync.CIRCUIT_OPEN_MS) {
|
|
||||||
throw new Error(
|
|
||||||
`Chroma circuit breaker open: ${this.consecutiveFailures} consecutive failures. ` +
|
|
||||||
`Retry in ${Math.ceil((ChromaSync.CIRCUIT_OPEN_MS - elapsed) / 1000)}s`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// Cooldown expired, allow retry
|
|
||||||
logger.info('CHROMA_SYNC', 'Circuit breaker cooldown expired, allowing retry', {
|
|
||||||
consecutiveFailures: this.consecutiveFailures,
|
|
||||||
cooldownMs: ChromaSync.CIRCUIT_OPEN_MS
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Layer 1: Pre-spawn process count guard.
|
|
||||||
* Kills excess chroma-mcp processes before spawning a new one.
|
|
||||||
* Uses execFileSync (no shell) to list processes, filters in JavaScript.
|
|
||||||
*/
|
|
||||||
private killExcessChromaProcesses(): void {
|
|
||||||
if (process.platform === 'win32') return; // Windows has Chroma disabled entirely
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Use execFileSync to avoid shell injection — filter and sort in JavaScript
|
|
||||||
// Include etime column for reliable age-based sorting (PID order is unreliable)
|
|
||||||
const output = execFileSync('ps', ['-eo', 'pid,etime,command'], {
|
|
||||||
encoding: 'utf8',
|
|
||||||
timeout: 5000,
|
|
||||||
stdio: ['pipe', 'pipe', 'pipe']
|
|
||||||
});
|
|
||||||
|
|
||||||
// Filter for chroma-mcp, parse elapsed time, sort by actual age
|
|
||||||
const processes = output.split('\n')
|
|
||||||
.filter(l => l.includes('chroma-mcp'))
|
|
||||||
.map(l => {
|
|
||||||
const parts = l.trim().split(/\s+/);
|
|
||||||
const pid = parseInt(parts[0], 10);
|
|
||||||
const etime = parts[1] || '';
|
|
||||||
const ageMinutes = parseElapsedTime(etime);
|
|
||||||
return { pid, ageMinutes };
|
|
||||||
})
|
|
||||||
.filter(p => p.pid > 0 && p.pid !== process.pid && p.ageMinutes >= 0)
|
|
||||||
.sort((a, b) => a.ageMinutes - b.ageMinutes); // Ascending: newest (lowest age) first
|
|
||||||
|
|
||||||
if (processes.length < MAX_CHROMA_PROCESSES) return;
|
|
||||||
|
|
||||||
// Keep newest MAX_CHROMA_PROCESSES - 1 (making room for the one we're about to spawn)
|
|
||||||
const toKill = processes.slice(MAX_CHROMA_PROCESSES - 1);
|
|
||||||
for (const { pid } of toKill) {
|
|
||||||
try {
|
|
||||||
process.kill(pid, 'SIGTERM');
|
|
||||||
} catch {
|
|
||||||
// Process may already be dead
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (toKill.length > 0) {
|
|
||||||
logger.warn('CHROMA_SYNC', 'Killed excess chroma-mcp processes before spawning', {
|
|
||||||
found: processes.length,
|
|
||||||
killed: toKill.length,
|
|
||||||
maxAllowed: MAX_CHROMA_PROCESSES
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// ps may fail — don't block connection
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal connection logic — called only via ensureConnection() mutex.
|
|
||||||
* Implements circuit breaker (Layer 4), pre-spawn guard (Layer 1),
|
|
||||||
* and actual connection setup.
|
|
||||||
*/
|
|
||||||
private async _doConnect(): Promise<void> {
|
|
||||||
// Layer 4: Circuit breaker check — refuse if too many recent failures
|
|
||||||
this.checkCircuitBreaker();
|
|
||||||
|
|
||||||
// Layer 1: Kill excess processes before spawning a new one
|
|
||||||
this.killExcessChromaProcesses();
|
|
||||||
|
|
||||||
logger.info('CHROMA_SYNC', 'Connecting to Chroma MCP server...', { project: this.project });
|
logger.info('CHROMA_SYNC', 'Connecting to Chroma MCP server...', { project: this.project });
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -352,20 +238,9 @@ export class ChromaSync {
|
|||||||
await this.client.connect(this.transport);
|
await this.client.connect(this.transport);
|
||||||
this.connected = true;
|
this.connected = true;
|
||||||
|
|
||||||
// Layer 4: Reset circuit breaker on success
|
|
||||||
this.consecutiveFailures = 0;
|
|
||||||
|
|
||||||
logger.info('CHROMA_SYNC', 'Connected to Chroma MCP server', { project: this.project });
|
logger.info('CHROMA_SYNC', 'Connected to Chroma MCP server', { project: this.project });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Layer 4: Track failure for circuit breaker
|
logger.error('CHROMA_SYNC', 'Failed to connect to Chroma MCP server', { project: this.project }, error as Error);
|
||||||
this.consecutiveFailures++;
|
|
||||||
this.lastFailureTime = Date.now();
|
|
||||||
|
|
||||||
logger.error('CHROMA_SYNC', 'Failed to connect to Chroma MCP server', {
|
|
||||||
project: this.project,
|
|
||||||
consecutiveFailures: this.consecutiveFailures,
|
|
||||||
circuitBreakerThreshold: ChromaSync.MAX_FAILURES
|
|
||||||
}, error as Error);
|
|
||||||
throw new Error(`Chroma connection failed: ${error instanceof Error ? error.message : String(error)}`);
|
throw new Error(`Chroma connection failed: ${error instanceof Error ? error.message : String(error)}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -416,7 +291,6 @@ export class ChromaSync {
|
|||||||
this.connected = false;
|
this.connected = false;
|
||||||
this.client = null;
|
this.client = null;
|
||||||
this.transport = null;
|
this.transport = null;
|
||||||
this.connectionPromise = null;
|
|
||||||
logger.error('CHROMA_SYNC', 'Connection lost during collection check',
|
logger.error('CHROMA_SYNC', 'Connection lost during collection check',
|
||||||
{ collection: this.collectionName }, error as Error);
|
{ collection: this.collectionName }, error as Error);
|
||||||
throw new Error(`Chroma connection lost: ${errorMessage}`);
|
throw new Error(`Chroma connection lost: ${errorMessage}`);
|
||||||
@@ -1086,7 +960,6 @@ export class ChromaSync {
|
|||||||
this.connected = false;
|
this.connected = false;
|
||||||
this.client = null;
|
this.client = null;
|
||||||
this.transport = null;
|
this.transport = null;
|
||||||
this.connectionPromise = null;
|
|
||||||
logger.error('CHROMA_SYNC', 'Connection lost during query',
|
logger.error('CHROMA_SYNC', 'Connection lost during query',
|
||||||
{ project: this.project, query }, error as Error);
|
{ project: this.project, query }, error as Error);
|
||||||
throw new Error(`Chroma query failed - connection lost: ${errorMessage}`);
|
throw new Error(`Chroma query failed - connection lost: ${errorMessage}`);
|
||||||
@@ -1144,37 +1017,28 @@ export class ChromaSync {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Close the Chroma client connection and cleanup subprocess.
|
* Close the Chroma client connection and cleanup subprocess
|
||||||
* Uses try-finally to guarantee state reset even if close() throws.
|
|
||||||
* Individual close calls use .catch() to prevent one failure from
|
|
||||||
* blocking the other (e.g., client.close() failing shouldn't prevent
|
|
||||||
* transport.close() from killing the subprocess).
|
|
||||||
*/
|
*/
|
||||||
async close(): Promise<void> {
|
async close(): Promise<void> {
|
||||||
if (!this.connected && !this.client && !this.transport) {
|
if (!this.connected && !this.client && !this.transport) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
// Close client first
|
||||||
// Close client first, then transport — catch individual errors
|
|
||||||
if (this.client) {
|
if (this.client) {
|
||||||
await this.client.close().catch((err: Error) => {
|
await this.client.close();
|
||||||
logger.debug('CHROMA_SYNC', 'Client close error (may already be disconnected)', {}, err);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Explicitly close transport to kill subprocess
|
||||||
if (this.transport) {
|
if (this.transport) {
|
||||||
await this.transport.close().catch((err: Error) => {
|
await this.transport.close();
|
||||||
logger.debug('CHROMA_SYNC', 'Transport close error (may already be dead)', {}, err);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
// Always reset state, even if close throws
|
|
||||||
this.connected = false;
|
|
||||||
this.client = null;
|
|
||||||
this.transport = null;
|
|
||||||
this.connectionPromise = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info('CHROMA_SYNC', 'Chroma client and subprocess closed', { project: this.project });
|
logger.info('CHROMA_SYNC', 'Chroma client and subprocess closed', { project: this.project });
|
||||||
|
|
||||||
|
// Always reset state
|
||||||
|
this.connected = false;
|
||||||
|
this.client = null;
|
||||||
|
this.transport = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,7 +68,6 @@ import {
|
|||||||
removePidFile,
|
removePidFile,
|
||||||
getPlatformTimeout,
|
getPlatformTimeout,
|
||||||
cleanupOrphanedProcesses,
|
cleanupOrphanedProcesses,
|
||||||
cleanupExcessChromaProcesses,
|
|
||||||
cleanStalePidFile,
|
cleanStalePidFile,
|
||||||
spawnDaemon,
|
spawnDaemon,
|
||||||
createSignalHandler
|
createSignalHandler
|
||||||
@@ -361,7 +360,6 @@ export class WorkerService {
|
|||||||
private async initializeBackground(): Promise<void> {
|
private async initializeBackground(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
await cleanupOrphanedProcesses();
|
await cleanupOrphanedProcesses();
|
||||||
await cleanupExcessChromaProcesses();
|
|
||||||
|
|
||||||
// Load mode configuration
|
// Load mode configuration
|
||||||
const { ModeManager } = await import('./domain/ModeManager.js');
|
const { ModeManager } = await import('./domain/ModeManager.js');
|
||||||
|
|||||||
@@ -19,7 +19,6 @@
|
|||||||
import { spawn, exec, ChildProcess } from 'child_process';
|
import { spawn, exec, ChildProcess } from 'child_process';
|
||||||
import { promisify } from 'util';
|
import { promisify } from 'util';
|
||||||
import { logger } from '../../utils/logger.js';
|
import { logger } from '../../utils/logger.js';
|
||||||
import { cleanupExcessChromaProcesses } from '../infrastructure/ProcessManager.js';
|
|
||||||
|
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
|
|
||||||
@@ -213,7 +212,7 @@ async function killSystemOrphans(): Promise<number> {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const { stdout } = await execAsync(
|
const { stdout } = await execAsync(
|
||||||
'ps -eo pid,ppid,args 2>/dev/null | grep -E "claude.*haiku|claude.*output-format|chroma-mcp" | grep -v grep'
|
'ps -eo pid,ppid,args 2>/dev/null | grep -E "claude.*haiku|claude.*output-format" | grep -v grep'
|
||||||
);
|
);
|
||||||
|
|
||||||
let killed = 0;
|
let killed = 0;
|
||||||
@@ -263,9 +262,6 @@ export async function reapOrphanedProcesses(activeSessionIds: Set<number>): Prom
|
|||||||
// Daemon children: find idle SDK processes that didn't terminate
|
// Daemon children: find idle SDK processes that didn't terminate
|
||||||
killed += await killIdleDaemonChildren();
|
killed += await killIdleDaemonChildren();
|
||||||
|
|
||||||
// Count-based: kill excess chroma-mcp processes regardless of age
|
|
||||||
killed += await cleanupExcessChromaProcesses();
|
|
||||||
|
|
||||||
return killed;
|
return killed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import {
|
|||||||
removePidFile,
|
removePidFile,
|
||||||
getPlatformTimeout,
|
getPlatformTimeout,
|
||||||
parseElapsedTime,
|
parseElapsedTime,
|
||||||
cleanupExcessChromaProcesses,
|
|
||||||
isProcessAlive,
|
isProcessAlive,
|
||||||
cleanStalePidFile,
|
cleanStalePidFile,
|
||||||
spawnDaemon,
|
spawnDaemon,
|
||||||
@@ -226,65 +225,6 @@ describe('ProcessManager', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('cleanupExcessChromaProcesses (Issue #1063)', () => {
|
|
||||||
/**
|
|
||||||
* Tests for count-based chroma-mcp process cleanup.
|
|
||||||
* Unlike the age-based cleanupOrphanedProcesses() which has a 30-minute
|
|
||||||
* threshold, this function kills by count — essential for catching spawn
|
|
||||||
* storms where all processes are young.
|
|
||||||
*/
|
|
||||||
|
|
||||||
it('should be exported and callable', () => {
|
|
||||||
expect(typeof cleanupExcessChromaProcesses).toBe('function');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return 0 on Windows (Chroma disabled)', async () => {
|
|
||||||
const originalPlatform = process.platform;
|
|
||||||
Object.defineProperty(process, 'platform', {
|
|
||||||
value: 'win32',
|
|
||||||
writable: true,
|
|
||||||
configurable: true
|
|
||||||
});
|
|
||||||
|
|
||||||
try {
|
|
||||||
const killed = await cleanupExcessChromaProcesses();
|
|
||||||
expect(killed).toBe(0);
|
|
||||||
} finally {
|
|
||||||
Object.defineProperty(process, 'platform', {
|
|
||||||
value: originalPlatform,
|
|
||||||
writable: true,
|
|
||||||
configurable: true
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should accept custom maxAllowed parameter', async () => {
|
|
||||||
// Should not throw with any valid maxAllowed value
|
|
||||||
const killed = await cleanupExcessChromaProcesses(5);
|
|
||||||
expect(killed).toBeGreaterThanOrEqual(0);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return a number (killed count)', async () => {
|
|
||||||
const killed = await cleanupExcessChromaProcesses();
|
|
||||||
expect(typeof killed).toBe('number');
|
|
||||||
expect(killed).toBeGreaterThanOrEqual(0);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should exist in ProcessManager source with count-based logic', async () => {
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/infrastructure/ProcessManager.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
// Verify count-based logic exists (not age-based)
|
|
||||||
expect(sourceFile).toContain('cleanupExcessChromaProcesses');
|
|
||||||
expect(sourceFile).toContain('chroma-mcp');
|
|
||||||
|
|
||||||
// Should sort by age and keep newest
|
|
||||||
expect(sourceFile).toContain('.sort(');
|
|
||||||
expect(sourceFile).toContain('.slice(maxAllowed)');
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe('isProcessAlive', () => {
|
describe('isProcessAlive', () => {
|
||||||
it('should return true for the current process', () => {
|
it('should return true for the current process', () => {
|
||||||
expect(isProcessAlive(process.pid)).toBe(true);
|
expect(isProcessAlive(process.pid)).toBe(true);
|
||||||
|
|||||||
@@ -392,209 +392,4 @@ describe('ChromaSync Vector Sync Integration', () => {
|
|||||||
expect(sourceFile).toContain('this.transport = null');
|
expect(sourceFile).toContain('this.transport = null');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('Spawn storm prevention (Issue #1063)', () => {
|
|
||||||
/**
|
|
||||||
* Regression tests for chroma-mcp spawn storm:
|
|
||||||
* 641 processes spawned in ~5 minutes from 6 concurrent sessions.
|
|
||||||
*
|
|
||||||
* Root cause: ensureConnection() had no mutex. Concurrent callers
|
|
||||||
* each spawned a chroma-mcp subprocess via StdioClientTransport.
|
|
||||||
*
|
|
||||||
* Fix: 5 defense layers — connection mutex, pre-spawn count guard,
|
|
||||||
* hardened close(), count-based orphan reaper, circuit breaker.
|
|
||||||
*/
|
|
||||||
|
|
||||||
describe('Layer 0: Connection mutex', () => {
|
|
||||||
it('should have connectionPromise field for mutex', async () => {
|
|
||||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
|
||||||
const sync = new ChromaSync(testProject);
|
|
||||||
const syncAny = sync as any;
|
|
||||||
|
|
||||||
// connectionPromise should be null initially
|
|
||||||
expect(syncAny.connectionPromise).toBeNull();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should coalesce concurrent ensureConnection calls via source code', async () => {
|
|
||||||
// Static analysis: verify mutex pattern exists in source
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
// Verify mutex pattern: check for connectionPromise, return it
|
|
||||||
expect(sourceFile).toContain('if (this.connectionPromise)');
|
|
||||||
expect(sourceFile).toContain('return this.connectionPromise');
|
|
||||||
expect(sourceFile).toContain('this.connectionPromise = this._doConnect()');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should clear connectionPromise in finally block', async () => {
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
// The finally block ensures connectionPromise is cleared even on error
|
|
||||||
expect(sourceFile).toContain('finally {');
|
|
||||||
expect(sourceFile).toContain('this.connectionPromise = null');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should clear connectionPromise in error recovery paths', async () => {
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
// Count occurrences of connectionPromise = null (should be in multiple places:
|
|
||||||
// finally block, ensureCollection error, queryChroma error, close())
|
|
||||||
const matches = sourceFile.match(/this\.connectionPromise = null/g) || [];
|
|
||||||
expect(matches.length).toBeGreaterThanOrEqual(4);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe('Layer 1: Pre-spawn process count guard', () => {
|
|
||||||
it('should have killExcessChromaProcesses method', async () => {
|
|
||||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
|
||||||
const sync = new ChromaSync(testProject);
|
|
||||||
const syncAny = sync as any;
|
|
||||||
|
|
||||||
expect(typeof syncAny.killExcessChromaProcesses).toBe('function');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should use execFileSync not execSync for safety', async () => {
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
// killExcessChromaProcesses should use execFileSync (no shell injection)
|
|
||||||
// Extract just the method body
|
|
||||||
const methodStart = sourceFile.indexOf('killExcessChromaProcesses');
|
|
||||||
const methodBody = sourceFile.slice(methodStart, methodStart + 500);
|
|
||||||
expect(methodBody).toContain('execFileSync');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should define MAX_CHROMA_PROCESSES constant', async () => {
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
expect(sourceFile).toContain('MAX_CHROMA_PROCESSES');
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe('Layer 2: Hardened close()', () => {
|
|
||||||
it('should use try-finally to guarantee state reset', async () => {
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
// Find the close() method body (needs larger slice to capture finally block)
|
|
||||||
const closeStart = sourceFile.indexOf('async close():');
|
|
||||||
const closeBody = sourceFile.slice(closeStart, closeStart + 1000);
|
|
||||||
|
|
||||||
// Verify try-finally pattern
|
|
||||||
expect(closeBody).toContain('try {');
|
|
||||||
expect(closeBody).toContain('} finally {');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should catch individual close errors with .catch()', async () => {
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
const closeStart = sourceFile.indexOf('async close():');
|
|
||||||
const closeBody = sourceFile.slice(closeStart, closeStart + 600);
|
|
||||||
|
|
||||||
// Both client and transport close should have .catch()
|
|
||||||
expect(closeBody).toContain('this.client.close().catch(');
|
|
||||||
expect(closeBody).toContain('this.transport.close().catch(');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should reset connectionPromise in close()', async () => {
|
|
||||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
|
||||||
const sync = new ChromaSync(testProject);
|
|
||||||
const syncAny = sync as any;
|
|
||||||
|
|
||||||
// Simulate partially initialized state — must set connected=true
|
|
||||||
// so close() doesn't early-return before reaching the finally block
|
|
||||||
syncAny.connectionPromise = Promise.resolve();
|
|
||||||
syncAny.connected = true;
|
|
||||||
|
|
||||||
await sync.close();
|
|
||||||
|
|
||||||
// connectionPromise must be reset
|
|
||||||
expect(syncAny.connectionPromise).toBeNull();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe('Layer 4: Circuit breaker', () => {
|
|
||||||
it('should have circuit breaker fields', async () => {
|
|
||||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
|
||||||
const sync = new ChromaSync(testProject);
|
|
||||||
const syncAny = sync as any;
|
|
||||||
|
|
||||||
expect(syncAny.consecutiveFailures).toBe(0);
|
|
||||||
expect(syncAny.lastFailureTime).toBe(0);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should have circuit breaker constants', async () => {
|
|
||||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
|
||||||
const syncClass = ChromaSync as any;
|
|
||||||
|
|
||||||
expect(syncClass.MAX_FAILURES).toBe(3);
|
|
||||||
expect(syncClass.CIRCUIT_OPEN_MS).toBe(60000);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should throw when circuit breaker is open', async () => {
|
|
||||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
|
||||||
const sync = new ChromaSync(testProject);
|
|
||||||
const syncAny = sync as any;
|
|
||||||
|
|
||||||
// Simulate 3 consecutive failures just now
|
|
||||||
syncAny.consecutiveFailures = 3;
|
|
||||||
syncAny.lastFailureTime = Date.now();
|
|
||||||
|
|
||||||
// checkCircuitBreaker should throw
|
|
||||||
expect(() => syncAny.checkCircuitBreaker()).toThrow('circuit breaker open');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should allow retry after cooldown expires', async () => {
|
|
||||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
|
||||||
const sync = new ChromaSync(testProject);
|
|
||||||
const syncAny = sync as any;
|
|
||||||
|
|
||||||
// Simulate 3 failures that happened 2 minutes ago (past cooldown)
|
|
||||||
syncAny.consecutiveFailures = 3;
|
|
||||||
syncAny.lastFailureTime = Date.now() - 120000;
|
|
||||||
|
|
||||||
// checkCircuitBreaker should NOT throw — cooldown expired
|
|
||||||
expect(() => syncAny.checkCircuitBreaker()).not.toThrow();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should not throw when under failure threshold', async () => {
|
|
||||||
const { ChromaSync } = await import('../../src/services/sync/ChromaSync.js');
|
|
||||||
const sync = new ChromaSync(testProject);
|
|
||||||
const syncAny = sync as any;
|
|
||||||
|
|
||||||
// 2 failures (under MAX_FAILURES = 3)
|
|
||||||
syncAny.consecutiveFailures = 2;
|
|
||||||
syncAny.lastFailureTime = Date.now();
|
|
||||||
|
|
||||||
expect(() => syncAny.checkCircuitBreaker()).not.toThrow();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should track failures in _doConnect error path via source code', async () => {
|
|
||||||
const sourceFile = await Bun.file(
|
|
||||||
new URL('../../src/services/sync/ChromaSync.ts', import.meta.url)
|
|
||||||
).text();
|
|
||||||
|
|
||||||
// Verify _doConnect increments consecutiveFailures on error
|
|
||||||
const doConnectStart = sourceFile.indexOf('private async _doConnect');
|
|
||||||
const doConnectBody = sourceFile.slice(doConnectStart, doConnectStart + 3000);
|
|
||||||
|
|
||||||
expect(doConnectBody).toContain('this.consecutiveFailures++');
|
|
||||||
expect(doConnectBody).toContain('this.lastFailureTime = Date.now()');
|
|
||||||
|
|
||||||
// And resets on success
|
|
||||||
expect(doConnectBody).toContain('this.consecutiveFailures = 0');
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user