Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 130abe04a9 | |||
| bff10d49c9 | |||
| 40a71d3250 | |||
| ae3d20c71a | |||
| 54ef9662c1 | |||
| 9aec461e14 | |||
| 0fe0705133 | |||
| a5bf653a47 | |||
| 1fec1e8339 | |||
| 1afb14d0d6 |
@@ -10,7 +10,7 @@
|
||||
"plugins": [
|
||||
{
|
||||
"name": "claude-mem",
|
||||
"version": "7.3.4",
|
||||
"version": "7.3.7",
|
||||
"source": "./plugin",
|
||||
"description": "Persistent memory system for Claude Code - context compression across sessions"
|
||||
}
|
||||
|
||||
@@ -4,6 +4,27 @@ All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [7.3.6] - 2025-12-17
|
||||
|
||||
## Bug Fixes
|
||||
|
||||
- Enhanced SDKAgent response handling and message processing
|
||||
|
||||
## [7.3.5] - 2025-12-17
|
||||
|
||||
## What's Changed
|
||||
* fix(windows): solve zombie port problem with wrapper architecture by @ToxMox in https://github.com/thedotmack/claude-mem/pull/372
|
||||
* chore: bump version to 7.3.5 by @thedotmack in https://github.com/thedotmack/claude-mem/pull/375
|
||||
|
||||
## New Contributors
|
||||
* @ToxMox made their first contribution in https://github.com/thedotmack/claude-mem/pull/372
|
||||
|
||||
**Full Changelog**: https://github.com/thedotmack/claude-mem/compare/v7.3.4...v7.3.5
|
||||
|
||||
## [7.3.4] - 2025-12-17
|
||||
|
||||
Patch release for bug fixes and minor improvements
|
||||
|
||||
## [7.3.3] - 2025-12-16
|
||||
|
||||
## What's Changed
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claude-mem",
|
||||
"version": "7.3.4",
|
||||
"version": "7.3.7",
|
||||
"description": "Memory compression system for Claude Code - persist context across sessions",
|
||||
"keywords": [
|
||||
"claude",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claude-mem",
|
||||
"version": "7.3.4",
|
||||
"version": "7.3.7",
|
||||
"description": "Persistent memory system for Claude Code - seamlessly preserve context across sessions",
|
||||
"author": {
|
||||
"name": "Alex Newman"
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claude-mem-plugin",
|
||||
"version": "7.3.4",
|
||||
"version": "7.3.7",
|
||||
"private": true,
|
||||
"description": "Runtime dependencies for claude-mem bundled hooks",
|
||||
"type": "module",
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+26
-12
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Executable
+2
@@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env bun
|
||||
"use strict";var u=Object.create;var w=Object.defineProperty;var I=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,k=Object.prototype.hasOwnProperty;var y=(e,i,t,o)=>{if(i&&typeof i=="object"||typeof i=="function")for(let s of f(i))!k.call(e,s)&&s!==t&&w(e,s,{get:()=>i[s],enumerable:!(o=I(i,s))||o.enumerable});return e};var P=(e,i,t)=>(t=e!=null?u(g(e)):{},y(i||!e||!e.__esModule?w(t,"default",{value:e,enumerable:!0}):t,e));var c=require("child_process"),p=P(require("path"),1),h=process.platform==="win32",x=__dirname,l=p.default.join(x,"worker-service.cjs"),n=null,a=!1;function r(e){let i=new Date().toISOString();console.log(`[${i}] [wrapper] ${e}`)}function m(){r(`Spawning inner worker: ${l}`),n=(0,c.spawn)(process.execPath,[l],{stdio:["inherit","inherit","inherit","ipc"],env:{...process.env,CLAUDE_MEM_MANAGED:"true"},cwd:p.default.dirname(l)}),n.on("message",async e=>{(e.type==="restart"||e.type==="shutdown")&&(r(`${e.type} requested by inner`),a=!0,await d(),r("Exiting wrapper"),process.exit(0))}),n.on("exit",(e,i)=>{r(`Inner exited with code=${e}, signal=${i}`),n=null,!a&&e!==0&&(r("Inner crashed, respawning in 1 second..."),setTimeout(()=>m(),1e3))}),n.on("error",e=>{r(`Inner error: ${e.message}`)})}async function d(){if(!n||!n.pid){r("No inner process to kill");return}let e=n.pid;if(r(`Killing inner process tree (pid=${e})`),h)try{(0,c.execSync)(`taskkill /PID ${e} /T /F`,{timeout:1e4,stdio:"ignore"}),r(`taskkill completed for pid=${e}`)}catch(i){r(`taskkill failed (process may be dead): ${i}`)}else{n.kill("SIGTERM");let i=new Promise(o=>{if(!n){o();return}n.on("exit",()=>o())}),t=new Promise(o=>setTimeout(()=>o(),5e3));await Promise.race([i,t]),n&&!n.killed&&(r("Inner did not exit gracefully, force killing"),n.kill("SIGKILL"))}await S(e,5e3),n=null,r("Inner process terminated")}async function S(e,i){let t=Date.now();for(;Date.now()-t<i;)try{process.kill(e,0),await new Promise(o=>setTimeout(o,100))}catch{return}r(`Timeout waiting for process ${e} to exit`)}process.on("SIGTERM",async()=>{r("Wrapper received SIGTERM"),a=!0,await d(),process.exit(0)});process.on("SIGINT",async()=>{r("Wrapper received SIGINT"),a=!0,await d(),process.exit(0)});r("Wrapper starting");m();
|
||||
Binary file not shown.
@@ -26,6 +26,11 @@ const WORKER_SERVICE = {
|
||||
source: 'src/services/worker-service.ts'
|
||||
};
|
||||
|
||||
const WORKER_WRAPPER = {
|
||||
name: 'worker-wrapper',
|
||||
source: 'src/services/worker-wrapper.ts'
|
||||
};
|
||||
|
||||
const MCP_SERVER = {
|
||||
name: 'mcp-server',
|
||||
source: 'src/servers/mcp-server.ts'
|
||||
@@ -120,6 +125,31 @@ async function buildHooks() {
|
||||
const workerStats = fs.statSync(`${hooksDir}/${WORKER_SERVICE.name}.cjs`);
|
||||
console.log(`✓ worker-service built (${(workerStats.size / 1024).toFixed(2)} KB)`);
|
||||
|
||||
// Build worker wrapper (Windows zombie port fix)
|
||||
console.log(`\n🔧 Building worker wrapper...`);
|
||||
await build({
|
||||
entryPoints: [WORKER_WRAPPER.source],
|
||||
bundle: true,
|
||||
platform: 'node',
|
||||
target: 'node18',
|
||||
format: 'cjs',
|
||||
outfile: `${hooksDir}/${WORKER_WRAPPER.name}.cjs`,
|
||||
minify: true,
|
||||
logLevel: 'error',
|
||||
external: ['bun:sqlite'],
|
||||
define: {
|
||||
'__DEFAULT_PACKAGE_VERSION__': `"${version}"`
|
||||
},
|
||||
banner: {
|
||||
js: '#!/usr/bin/env bun'
|
||||
}
|
||||
});
|
||||
|
||||
// Make worker wrapper executable
|
||||
fs.chmodSync(`${hooksDir}/${WORKER_WRAPPER.name}.cjs`, 0o755);
|
||||
const wrapperStats = fs.statSync(`${hooksDir}/${WORKER_WRAPPER.name}.cjs`);
|
||||
console.log(`✓ worker-wrapper built (${(wrapperStats.size / 1024).toFixed(2)} KB)`);
|
||||
|
||||
// Build MCP server
|
||||
console.log(`\n🔧 Building MCP server...`);
|
||||
await build({
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
* native module dependencies.
|
||||
*/
|
||||
|
||||
import path from "path";
|
||||
import { stdin } from "process";
|
||||
import { ensureWorkerRunning, getWorkerPort } from "../shared/worker-utils.js";
|
||||
import { HOOK_TIMEOUTS } from "../shared/hook-constants.js";
|
||||
import { handleWorkerError } from "../shared/hook-error-handler.js";
|
||||
import { handleFetchError } from "./shared/error-handler.js";
|
||||
import { getProjectName } from "../utils/project-name.js";
|
||||
|
||||
export interface SessionStartInput {
|
||||
session_id: string;
|
||||
@@ -25,7 +25,7 @@ async function contextHook(input?: SessionStartInput): Promise<string> {
|
||||
await ensureWorkerRunning();
|
||||
|
||||
const cwd = input?.cwd ?? process.cwd();
|
||||
const project = cwd ? path.basename(cwd) : "unknown-project";
|
||||
const project = getProjectName(cwd);
|
||||
const port = getWorkerPort();
|
||||
|
||||
const url = `http://127.0.0.1:${port}/api/context/inject?project=${encodeURIComponent(project)}`;
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import path from 'path';
|
||||
import { stdin } from 'process';
|
||||
import { createHookResponse } from './hook-response.js';
|
||||
import { ensureWorkerRunning, getWorkerPort } from '../shared/worker-utils.js';
|
||||
import { handleWorkerError } from '../shared/hook-error-handler.js';
|
||||
import { handleFetchError } from './shared/error-handler.js';
|
||||
import { getProjectName } from '../utils/project-name.js';
|
||||
|
||||
export interface UserPromptSubmitInput {
|
||||
session_id: string;
|
||||
@@ -24,7 +24,7 @@ async function newHook(input?: UserPromptSubmitInput): Promise<void> {
|
||||
}
|
||||
|
||||
const { session_id, cwd, prompt } = input;
|
||||
const project = path.basename(cwd);
|
||||
const project = getProjectName(cwd);
|
||||
|
||||
const port = getWorkerPort();
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ import {
|
||||
toRelativePath,
|
||||
extractFirstFile
|
||||
} from '../shared/timeline-formatting.js';
|
||||
import { getProjectName } from '../utils/project-name.js';
|
||||
|
||||
// Version marker path - use homedir-based path that works in both CJS and ESM contexts
|
||||
const VERSION_MARKER_PATH = path.join(homedir(), '.claude', 'plugins', 'marketplaces', 'thedotmack', 'plugin', '.install-version');
|
||||
@@ -222,7 +223,7 @@ function extractPriorMessages(transcriptPath: string): { userMessage: string; as
|
||||
export async function generateContext(input?: ContextInput, useColors: boolean = false): Promise<string> {
|
||||
const config = loadContextConfig();
|
||||
const cwd = input?.cwd ?? process.cwd();
|
||||
const project = cwd ? path.basename(cwd) : 'unknown-project';
|
||||
const project = getProjectName(cwd);
|
||||
|
||||
let db: SessionStore | null = null;
|
||||
try {
|
||||
|
||||
@@ -43,8 +43,10 @@ export class ProcessManager {
|
||||
// Ensure log directory exists
|
||||
mkdirSync(LOG_DIR, { recursive: true });
|
||||
|
||||
// Get worker script path
|
||||
const workerScript = join(MARKETPLACE_ROOT, 'plugin', 'scripts', 'worker-service.cjs');
|
||||
// On Windows, use the wrapper script to solve zombie port problem
|
||||
// On Unix, use the worker directly
|
||||
const scriptName = process.platform === 'win32' ? 'worker-wrapper.cjs' : 'worker-service.cjs';
|
||||
const workerScript = join(MARKETPLACE_ROOT, 'plugin', 'scripts', scriptName);
|
||||
|
||||
if (!existsSync(workerScript)) {
|
||||
return { success: false, error: `Worker script not found at ${workerScript}` };
|
||||
@@ -86,6 +88,10 @@ export class ProcessManager {
|
||||
// Note: windowsHide: true doesn't work with detached: true (Bun inherits Node.js process spawning semantics)
|
||||
// See: https://github.com/nodejs/node/issues/21825 and PR #315 for detailed testing
|
||||
//
|
||||
// On Windows, we start worker-wrapper.cjs which manages the actual worker-service.cjs.
|
||||
// This solves the zombie port problem: the wrapper has no sockets, so when it kills
|
||||
// and respawns the inner worker, the socket is properly released.
|
||||
//
|
||||
// Security: All paths (bunPath, script, MARKETPLACE_ROOT) are application-controlled system paths,
|
||||
// not user input. If an attacker could modify these paths, they would already have full filesystem
|
||||
// access including direct access to ~/.claude-mem/claude-mem.db. Nevertheless, we properly escape
|
||||
@@ -168,8 +174,21 @@ export class ProcessManager {
|
||||
if (!info) return true;
|
||||
|
||||
try {
|
||||
process.kill(info.pid, 'SIGTERM');
|
||||
await this.waitForExit(info.pid, timeout);
|
||||
if (process.platform === 'win32') {
|
||||
// On Windows, use taskkill /T /F to kill entire process tree
|
||||
// This ensures the wrapper AND all its children (inner worker, MCP, ChromaSync) are killed
|
||||
// which is necessary to properly release the socket and avoid zombie ports
|
||||
const { execSync } = await import('child_process');
|
||||
try {
|
||||
execSync(`taskkill /PID ${info.pid} /T /F`, { timeout: 10000, stdio: 'ignore' });
|
||||
} catch {
|
||||
// Process may already be dead
|
||||
}
|
||||
} else {
|
||||
// On Unix, use signals
|
||||
process.kill(info.pid, 'SIGTERM');
|
||||
await this.waitForExit(info.pid, timeout);
|
||||
}
|
||||
} catch {
|
||||
try {
|
||||
process.kill(info.pid, 'SIGKILL');
|
||||
@@ -252,29 +271,39 @@ export class ProcessManager {
|
||||
|
||||
private static async waitForHealth(pid: number, port: number, timeoutMs: number = HEALTH_CHECK_TIMEOUT_MS): Promise<{ success: boolean; pid?: number; error?: string }> {
|
||||
const startTime = Date.now();
|
||||
const isWindows = process.platform === 'win32';
|
||||
// Increase timeout on Windows to account for slower process startup
|
||||
const adjustedTimeout = isWindows ? timeoutMs * 2 : timeoutMs;
|
||||
|
||||
while (Date.now() - startTime < timeoutMs) {
|
||||
while (Date.now() - startTime < adjustedTimeout) {
|
||||
// Check if process is still alive
|
||||
if (!this.isProcessAlive(pid)) {
|
||||
return { success: false, error: 'Process died during startup' };
|
||||
const errorMsg = isWindows
|
||||
? `Process died during startup\n\nTroubleshooting:\n1. Check Task Manager for zombie 'bun.exe' or 'node.exe' processes\n2. Verify port ${port} is not in use: netstat -ano | findstr ${port}\n3. Check worker logs in ~/.claude-mem/logs/\n4. See GitHub issues: #363, #367, #371, #373\n5. Docs: https://docs.claude-mem.ai/troubleshooting/windows-issues`
|
||||
: 'Process died during startup';
|
||||
return { success: false, error: errorMsg };
|
||||
}
|
||||
|
||||
// Try health check
|
||||
// Try readiness check (changed from /health to /api/readiness)
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/health`, {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/readiness`, {
|
||||
signal: AbortSignal.timeout(HEALTH_CHECK_FETCH_TIMEOUT_MS)
|
||||
});
|
||||
if (response.ok) {
|
||||
return { success: true, pid };
|
||||
}
|
||||
} catch {
|
||||
// Not ready yet
|
||||
// Not ready yet, continue polling
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, HEALTH_CHECK_INTERVAL_MS));
|
||||
}
|
||||
|
||||
return { success: false, error: 'Health check timed out' };
|
||||
const timeoutMsg = isWindows
|
||||
? `Worker failed to start on Windows (readiness check timed out after ${adjustedTimeout}ms)\n\nTroubleshooting:\n1. Check Task Manager for zombie 'bun.exe' or 'node.exe' processes\n2. Verify port ${port} is not in use: netstat -ano | findstr ${port}\n3. Check worker logs in ~/.claude-mem/logs/\n4. See GitHub issues: #363, #367, #371, #373\n5. Docs: https://docs.claude-mem.ai/troubleshooting/windows-issues`
|
||||
: `Readiness check timed out after ${adjustedTimeout}ms`;
|
||||
|
||||
return { success: false, error: timeoutMsg };
|
||||
}
|
||||
|
||||
private static async waitForExit(pid: number, timeout: number): Promise<void> {
|
||||
|
||||
@@ -101,7 +101,9 @@ export class ChromaSync {
|
||||
// See: https://github.com/thedotmack/claude-mem/issues/170 (Python 3.14 incompatibility)
|
||||
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
|
||||
const pythonVersion = settings.CLAUDE_MEM_PYTHON_VERSION;
|
||||
this.transport = new StdioClientTransport({
|
||||
const isWindows = process.platform === 'win32';
|
||||
|
||||
const transportOptions: any = {
|
||||
command: 'uvx',
|
||||
args: [
|
||||
'--python', pythonVersion,
|
||||
@@ -110,7 +112,16 @@ export class ChromaSync {
|
||||
'--data-dir', this.VECTOR_DB_DIR
|
||||
],
|
||||
stderr: 'ignore'
|
||||
});
|
||||
};
|
||||
|
||||
// CRITICAL: On Windows, try to hide console window to prevent PowerShell popups
|
||||
// Note: windowsHide may not be supported by MCP SDK's StdioClientTransport
|
||||
if (isWindows) {
|
||||
transportOptions.windowsHide = true;
|
||||
logger.debug('CHROMA_SYNC', 'Windows detected, attempting to hide console window', { project: this.project });
|
||||
}
|
||||
|
||||
this.transport = new StdioClientTransport(transportOptions);
|
||||
|
||||
this.client = new Client({
|
||||
name: 'claude-mem-chroma-sync',
|
||||
|
||||
+248
-41
@@ -14,7 +14,7 @@ import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
||||
import { getWorkerPort, getWorkerHost } from '../shared/worker-utils.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { exec } from 'child_process';
|
||||
import { exec, execSync } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
@@ -45,6 +45,10 @@ export class WorkerService {
|
||||
private startTime: number = Date.now();
|
||||
private mcpClient: Client;
|
||||
|
||||
// Initialization flags for MCP/SDK readiness tracking
|
||||
private mcpReady: boolean = false;
|
||||
private initializationCompleteFlag: boolean = false;
|
||||
|
||||
// Domain services
|
||||
private dbManager: DatabaseManager;
|
||||
private sessionManager: SessionManager;
|
||||
@@ -118,18 +122,46 @@ export class WorkerService {
|
||||
*/
|
||||
private setupRoutes(): void {
|
||||
// Health check endpoint
|
||||
// TEST_BUILD_ID helps verify which build is running during debugging
|
||||
const TEST_BUILD_ID = 'TEST-008-wrapper-ipc';
|
||||
this.app.get('/api/health', (_req, res) => {
|
||||
res.status(200).json({ status: 'ok' });
|
||||
res.status(200).json({
|
||||
status: 'ok',
|
||||
build: TEST_BUILD_ID,
|
||||
managed: process.env.CLAUDE_MEM_MANAGED === 'true',
|
||||
hasIpc: typeof process.send === 'function',
|
||||
platform: process.platform,
|
||||
pid: process.pid,
|
||||
initialized: this.initializationCompleteFlag,
|
||||
mcpReady: this.mcpReady,
|
||||
});
|
||||
});
|
||||
|
||||
// Readiness check endpoint - returns 503 until full initialization completes
|
||||
// Used by ProcessManager and worker-utils to ensure worker is fully ready before routing requests
|
||||
this.app.get('/api/readiness', (_req, res) => {
|
||||
if (this.initializationCompleteFlag) {
|
||||
res.status(200).json({
|
||||
status: 'ready',
|
||||
mcpReady: this.mcpReady,
|
||||
});
|
||||
} else {
|
||||
res.status(503).json({
|
||||
status: 'initializing',
|
||||
message: 'Worker is still initializing, please retry',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Version endpoint - returns the worker's current version
|
||||
this.app.get('/api/version', (_req, res) => {
|
||||
const { homedir } = require('os');
|
||||
const { readFileSync } = require('fs');
|
||||
const marketplaceRoot = path.join(homedir(), '.claude', 'plugins', 'marketplaces', 'thedotmack');
|
||||
const packageJsonPath = path.join(marketplaceRoot, 'package.json');
|
||||
|
||||
try {
|
||||
// Read version from marketplace package.json
|
||||
const { homedir } = require('os');
|
||||
const { readFileSync } = require('fs');
|
||||
const marketplaceRoot = path.join(homedir(), '.claude', 'plugins', 'marketplaces', 'thedotmack');
|
||||
const packageJsonPath = path.join(marketplaceRoot, 'package.json');
|
||||
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
||||
res.status(200).json({ version: packageJson.version });
|
||||
} catch (error) {
|
||||
@@ -179,18 +211,43 @@ export class WorkerService {
|
||||
// Admin endpoints for process management
|
||||
this.app.post('/api/admin/restart', async (_req, res) => {
|
||||
res.json({ status: 'restarting' });
|
||||
setTimeout(async () => {
|
||||
await this.shutdown();
|
||||
process.exit(0);
|
||||
}, 100);
|
||||
|
||||
// On Windows, if managed by wrapper, send message to parent to handle restart
|
||||
// This solves the Windows zombie port problem where sockets aren't properly released
|
||||
const isWindowsManaged = process.platform === 'win32' &&
|
||||
process.env.CLAUDE_MEM_MANAGED === 'true' &&
|
||||
process.send;
|
||||
|
||||
if (isWindowsManaged) {
|
||||
logger.info('SYSTEM', 'Sending restart request to wrapper');
|
||||
process.send!({ type: 'restart' });
|
||||
} else {
|
||||
// Unix or standalone Windows - handle restart ourselves
|
||||
setTimeout(async () => {
|
||||
await this.shutdown();
|
||||
process.exit(0);
|
||||
}, 100);
|
||||
}
|
||||
});
|
||||
|
||||
this.app.post('/api/admin/shutdown', async (_req, res) => {
|
||||
res.json({ status: 'shutting_down' });
|
||||
setTimeout(async () => {
|
||||
await this.shutdown();
|
||||
process.exit(0);
|
||||
}, 100);
|
||||
|
||||
// On Windows, if managed by wrapper, send message to parent to handle shutdown
|
||||
const isWindowsManaged = process.platform === 'win32' &&
|
||||
process.env.CLAUDE_MEM_MANAGED === 'true' &&
|
||||
process.send;
|
||||
|
||||
if (isWindowsManaged) {
|
||||
logger.info('SYSTEM', 'Sending shutdown request to wrapper');
|
||||
process.send!({ type: 'shutdown' });
|
||||
} else {
|
||||
// Unix or standalone Windows - handle shutdown ourselves
|
||||
setTimeout(async () => {
|
||||
await this.shutdown();
|
||||
process.exit(0);
|
||||
}, 100);
|
||||
}
|
||||
});
|
||||
|
||||
this.viewerRoutes.setupRoutes(this.app);
|
||||
@@ -261,25 +318,47 @@ export class WorkerService {
|
||||
*/
|
||||
private async cleanupOrphanedProcesses(): Promise<void> {
|
||||
try {
|
||||
// Find all chroma-mcp processes
|
||||
const { stdout } = await execAsync('ps aux | grep "chroma-mcp" | grep -v grep || true');
|
||||
|
||||
if (!stdout.trim()) {
|
||||
logger.debug('SYSTEM', 'No orphaned chroma-mcp processes found');
|
||||
return;
|
||||
}
|
||||
|
||||
const lines = stdout.trim().split('\n');
|
||||
const isWindows = process.platform === 'win32';
|
||||
const pids: number[] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
const parts = line.trim().split(/\s+/);
|
||||
if (parts.length > 1) {
|
||||
const pid = parseInt(parts[1], 10);
|
||||
if (!isNaN(pid)) {
|
||||
if (isWindows) {
|
||||
// Windows: Use PowerShell Get-CimInstance to find chroma-mcp processes
|
||||
const cmd = `powershell -Command "Get-CimInstance Win32_Process | Where-Object { $_.Name -like '*python*' -and $_.CommandLine -like '*chroma-mcp*' } | Select-Object -ExpandProperty ProcessId"`;
|
||||
const { stdout } = await execAsync(cmd, { timeout: 5000 });
|
||||
|
||||
if (!stdout.trim()) {
|
||||
logger.debug('SYSTEM', 'No orphaned chroma-mcp processes found (Windows)');
|
||||
return;
|
||||
}
|
||||
|
||||
const pidStrings = stdout.trim().split('\n');
|
||||
for (const pidStr of pidStrings) {
|
||||
const pid = parseInt(pidStr.trim(), 10);
|
||||
// SECURITY: Validate PID is positive integer before adding to list
|
||||
if (!isNaN(pid) && Number.isInteger(pid) && pid > 0) {
|
||||
pids.push(pid);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Unix: Use ps aux | grep
|
||||
const { stdout } = await execAsync('ps aux | grep "chroma-mcp" | grep -v grep || true');
|
||||
|
||||
if (!stdout.trim()) {
|
||||
logger.debug('SYSTEM', 'No orphaned chroma-mcp processes found (Unix)');
|
||||
return;
|
||||
}
|
||||
|
||||
const lines = stdout.trim().split('\n');
|
||||
for (const line of lines) {
|
||||
const parts = line.trim().split(/\s+/);
|
||||
if (parts.length > 1) {
|
||||
const pid = parseInt(parts[1], 10);
|
||||
// SECURITY: Validate PID is positive integer before adding to list
|
||||
if (!isNaN(pid) && Number.isInteger(pid) && pid > 0) {
|
||||
pids.push(pid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pids.length === 0) {
|
||||
@@ -287,12 +366,28 @@ export class WorkerService {
|
||||
}
|
||||
|
||||
logger.info('SYSTEM', 'Cleaning up orphaned chroma-mcp processes', {
|
||||
platform: isWindows ? 'Windows' : 'Unix',
|
||||
count: pids.length,
|
||||
pids
|
||||
});
|
||||
|
||||
// Kill all found processes
|
||||
await execAsync(`kill ${pids.join(' ')}`);
|
||||
if (isWindows) {
|
||||
for (const pid of pids) {
|
||||
// SECURITY: Double-check PID validation before using in taskkill command
|
||||
if (!Number.isInteger(pid) || pid <= 0) {
|
||||
logger.warn('SYSTEM', 'Skipping invalid PID', { pid });
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
execSync(`taskkill /PID ${pid} /T /F`, { timeout: 5000, stdio: 'ignore' });
|
||||
} catch (error) {
|
||||
logger.warn('SYSTEM', 'Failed to kill orphaned process', { pid }, error as Error);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
await execAsync(`kill ${pids.join(' ')}`);
|
||||
}
|
||||
|
||||
logger.info('SYSTEM', 'Orphaned processes cleaned up', { count: pids.length });
|
||||
} catch (error) {
|
||||
@@ -346,7 +441,7 @@ export class WorkerService {
|
||||
this.searchRoutes.setupRoutes(this.app); // Setup search routes now that SearchManager is ready
|
||||
logger.info('WORKER', 'SearchManager initialized and search routes registered');
|
||||
|
||||
// Connect to MCP server
|
||||
// Connect to MCP server with timeout guard
|
||||
const mcpServerPath = path.join(__dirname, 'mcp-server.cjs');
|
||||
const transport = new StdioClientTransport({
|
||||
command: 'node',
|
||||
@@ -354,10 +449,19 @@ export class WorkerService {
|
||||
env: process.env
|
||||
});
|
||||
|
||||
await this.mcpClient.connect(transport);
|
||||
// Add timeout guard to prevent hanging on MCP connection (15 seconds)
|
||||
const MCP_INIT_TIMEOUT_MS = 15000;
|
||||
const mcpConnectionPromise = this.mcpClient.connect(transport);
|
||||
const timeoutPromise = new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('MCP connection timeout after 15s')), MCP_INIT_TIMEOUT_MS)
|
||||
);
|
||||
|
||||
await Promise.race([mcpConnectionPromise, timeoutPromise]);
|
||||
this.mcpReady = true;
|
||||
logger.success('WORKER', 'Connected to MCP server');
|
||||
|
||||
// Signal that initialization is complete
|
||||
this.initializationCompleteFlag = true;
|
||||
this.resolveInitialization();
|
||||
logger.info('SYSTEM', 'Background initialization complete');
|
||||
} catch (error) {
|
||||
@@ -399,12 +503,32 @@ export class WorkerService {
|
||||
|
||||
/**
|
||||
* Shutdown the worker service
|
||||
*
|
||||
* IMPORTANT: On Windows, we must kill all child processes before exiting
|
||||
* to prevent zombie ports. The socket handle can be inherited by children,
|
||||
* and if not properly closed, the port stays bound after process death.
|
||||
*/
|
||||
async shutdown(): Promise<void> {
|
||||
// Shutdown all active sessions
|
||||
logger.info('SYSTEM', 'Shutdown initiated');
|
||||
|
||||
// STEP 1: Enumerate all child processes BEFORE we start closing things
|
||||
const childPids = await this.getChildProcesses(process.pid);
|
||||
logger.info('SYSTEM', 'Found child processes', { count: childPids.length, pids: childPids });
|
||||
|
||||
// STEP 2: Close HTTP server first
|
||||
if (this.server) {
|
||||
this.server.closeAllConnections();
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
this.server!.close(err => err ? reject(err) : resolve());
|
||||
});
|
||||
this.server = null;
|
||||
logger.info('SYSTEM', 'HTTP server closed');
|
||||
}
|
||||
|
||||
// STEP 3: Shutdown active sessions
|
||||
await this.sessionManager.shutdownAll();
|
||||
|
||||
// Close MCP client connection (terminates MCP server process)
|
||||
// STEP 4: Close MCP client connection (signals child to exit gracefully)
|
||||
if (this.mcpClient) {
|
||||
try {
|
||||
await this.mcpClient.close();
|
||||
@@ -414,19 +538,102 @@ export class WorkerService {
|
||||
}
|
||||
}
|
||||
|
||||
// Close HTTP server
|
||||
if (this.server) {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
this.server!.close(err => err ? reject(err) : resolve());
|
||||
});
|
||||
}
|
||||
|
||||
// Close database connection (includes ChromaSync cleanup)
|
||||
// STEP 5: Close database connection (includes ChromaSync cleanup)
|
||||
await this.dbManager.close();
|
||||
|
||||
// STEP 6: Force kill any remaining child processes (Windows zombie port fix)
|
||||
if (childPids.length > 0) {
|
||||
logger.info('SYSTEM', 'Force killing remaining children');
|
||||
for (const pid of childPids) {
|
||||
await this.forceKillProcess(pid);
|
||||
}
|
||||
// Wait for children to fully exit
|
||||
await this.waitForProcessesExit(childPids, 5000);
|
||||
}
|
||||
|
||||
logger.info('SYSTEM', 'Worker shutdown complete');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all child process PIDs (Windows-specific)
|
||||
*/
|
||||
private async getChildProcesses(parentPid: number): Promise<number[]> {
|
||||
if (process.platform !== 'win32') {
|
||||
return [];
|
||||
}
|
||||
|
||||
// SECURITY: Validate PID is a positive integer to prevent command injection
|
||||
if (!Number.isInteger(parentPid) || parentPid <= 0) {
|
||||
logger.warn('SYSTEM', 'Invalid parent PID for child process enumeration', { parentPid });
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const cmd = `powershell -Command "Get-CimInstance Win32_Process | Where-Object { $_.ParentProcessId -eq ${parentPid} } | Select-Object -ExpandProperty ProcessId"`;
|
||||
const { stdout } = await execAsync(cmd, { timeout: 5000 });
|
||||
return stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map(s => parseInt(s.trim(), 10))
|
||||
.filter(n => !isNaN(n) && Number.isInteger(n) && n > 0); // SECURITY: Validate each PID
|
||||
} catch (error) {
|
||||
logger.warn('SYSTEM', 'Failed to enumerate child processes', {}, error as Error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Force kill a process by PID (Windows: uses taskkill /F /T)
|
||||
*/
|
||||
private async forceKillProcess(pid: number): Promise<void> {
|
||||
// SECURITY: Validate PID is a positive integer to prevent command injection
|
||||
if (!Number.isInteger(pid) || pid <= 0) {
|
||||
logger.warn('SYSTEM', 'Invalid PID for force kill', { pid });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.platform === 'win32') {
|
||||
// /T kills entire process tree, /F forces termination
|
||||
await execAsync(`taskkill /PID ${pid} /T /F`, { timeout: 5000 });
|
||||
logger.info('SYSTEM', 'Killed process', { pid });
|
||||
} else {
|
||||
process.kill(pid, 'SIGKILL');
|
||||
}
|
||||
} catch (error) {
|
||||
// Process may already be dead, which is fine
|
||||
logger.debug('SYSTEM', 'Process already dead or kill failed', { pid });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for processes to fully exit
|
||||
*/
|
||||
private async waitForProcessesExit(pids: number[], timeoutMs: number): Promise<void> {
|
||||
const start = Date.now();
|
||||
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
const stillAlive = pids.filter(pid => {
|
||||
try {
|
||||
process.kill(pid, 0); // Signal 0 checks if process exists
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
if (stillAlive.length === 0) {
|
||||
logger.info('SYSTEM', 'All child processes exited');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.debug('SYSTEM', 'Waiting for processes to exit', { stillAlive });
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
}
|
||||
|
||||
logger.warn('SYSTEM', 'Timeout waiting for child processes to exit');
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarize request body for logging
|
||||
* Used to avoid logging sensitive data or large payloads
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
/**
|
||||
* Worker Wrapper - Manages worker process lifecycle
|
||||
*
|
||||
* This wrapper exists to solve the Windows zombie port problem.
|
||||
* The wrapper spawns the actual worker as a child process.
|
||||
* When restart/shutdown is requested, the wrapper kills the child
|
||||
* and respawns it (or exits), ensuring clean socket cleanup.
|
||||
*
|
||||
* The wrapper itself has no sockets, so Bun's socket cleanup bug
|
||||
* doesn't affect it.
|
||||
*/
|
||||
|
||||
import { spawn, ChildProcess, execSync } from 'child_process';
|
||||
import path from 'path';
|
||||
|
||||
const isWindows = process.platform === 'win32';
|
||||
|
||||
const SCRIPT_DIR = __dirname;
|
||||
const INNER_SCRIPT = path.join(SCRIPT_DIR, 'worker-service.cjs');
|
||||
|
||||
let inner: ChildProcess | null = null;
|
||||
let isShuttingDown = false;
|
||||
|
||||
function log(msg: string) {
|
||||
const timestamp = new Date().toISOString();
|
||||
console.log(`[${timestamp}] [wrapper] ${msg}`);
|
||||
}
|
||||
|
||||
function spawnInner() {
|
||||
log(`Spawning inner worker: ${INNER_SCRIPT}`);
|
||||
|
||||
inner = spawn(process.execPath, [INNER_SCRIPT], {
|
||||
stdio: ['inherit', 'inherit', 'inherit', 'ipc'],
|
||||
env: { ...process.env, CLAUDE_MEM_MANAGED: 'true' },
|
||||
cwd: path.dirname(INNER_SCRIPT),
|
||||
});
|
||||
|
||||
inner.on('message', async (msg: { type: string }) => {
|
||||
if (msg.type === 'restart' || msg.type === 'shutdown') {
|
||||
// Both restart and shutdown: kill inner and exit wrapper
|
||||
// The hooks will start a fresh wrapper+inner if needed
|
||||
log(`${msg.type} requested by inner`);
|
||||
isShuttingDown = true;
|
||||
await killInner();
|
||||
log('Exiting wrapper');
|
||||
process.exit(0);
|
||||
}
|
||||
});
|
||||
|
||||
inner.on('exit', (code, signal) => {
|
||||
log(`Inner exited with code=${code}, signal=${signal}`);
|
||||
inner = null;
|
||||
|
||||
// If inner crashed unexpectedly (not during shutdown), respawn it
|
||||
if (!isShuttingDown && code !== 0) {
|
||||
log('Inner crashed, respawning in 1 second...');
|
||||
setTimeout(() => spawnInner(), 1000);
|
||||
}
|
||||
});
|
||||
|
||||
inner.on('error', (err) => {
|
||||
log(`Inner error: ${err.message}`);
|
||||
});
|
||||
}
|
||||
|
||||
async function killInner(): Promise<void> {
|
||||
if (!inner || !inner.pid) {
|
||||
log('No inner process to kill');
|
||||
return;
|
||||
}
|
||||
|
||||
const pid = inner.pid;
|
||||
log(`Killing inner process tree (pid=${pid})`);
|
||||
|
||||
if (isWindows) {
|
||||
// On Windows, use taskkill /T /F to kill entire process tree
|
||||
// This ensures all children (MCP server, ChromaSync, etc.) are killed
|
||||
// which is necessary to properly release the socket
|
||||
try {
|
||||
execSync(`taskkill /PID ${pid} /T /F`, { timeout: 10000, stdio: 'ignore' });
|
||||
log(`taskkill completed for pid=${pid}`);
|
||||
} catch (error) {
|
||||
// Process may already be dead
|
||||
log(`taskkill failed (process may be dead): ${error}`);
|
||||
}
|
||||
} else {
|
||||
// On Unix, SIGTERM then SIGKILL
|
||||
inner.kill('SIGTERM');
|
||||
|
||||
// Wait for exit with timeout
|
||||
const exitPromise = new Promise<void>(resolve => {
|
||||
if (!inner) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
inner.on('exit', () => resolve());
|
||||
});
|
||||
|
||||
const timeoutPromise = new Promise<void>(resolve =>
|
||||
setTimeout(() => resolve(), 5000)
|
||||
);
|
||||
|
||||
await Promise.race([exitPromise, timeoutPromise]);
|
||||
|
||||
// Force kill if still alive
|
||||
if (inner && !inner.killed) {
|
||||
log('Inner did not exit gracefully, force killing');
|
||||
inner.kill('SIGKILL');
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the process to fully exit
|
||||
await waitForProcessExit(pid, 5000);
|
||||
|
||||
inner = null;
|
||||
log('Inner process terminated');
|
||||
}
|
||||
|
||||
async function waitForProcessExit(pid: number, timeoutMs: number): Promise<void> {
|
||||
const start = Date.now();
|
||||
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
try {
|
||||
process.kill(pid, 0); // Check if process exists
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
} catch {
|
||||
// Process is dead
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
log(`Timeout waiting for process ${pid} to exit`);
|
||||
}
|
||||
|
||||
// Handle wrapper signals
|
||||
process.on('SIGTERM', async () => {
|
||||
log('Wrapper received SIGTERM');
|
||||
isShuttingDown = true;
|
||||
await killInner();
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
process.on('SIGINT', async () => {
|
||||
log('Wrapper received SIGINT');
|
||||
isShuttingDown = true;
|
||||
await killInner();
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
// Start the inner worker
|
||||
log('Wrapper starting');
|
||||
spawnInner();
|
||||
@@ -113,7 +113,7 @@ export class SDKAgent {
|
||||
// Calculate discovery tokens (delta for this response only)
|
||||
const discoveryTokens = (session.cumulativeInputTokens + session.cumulativeOutputTokens) - tokensBeforeResponse;
|
||||
|
||||
// Only log non-empty responses (filter out noise)
|
||||
// Process response (empty or not) and mark messages as processed
|
||||
if (responseSize > 0) {
|
||||
const truncatedResponse = responseSize > 100
|
||||
? textContent.substring(0, 100) + '...'
|
||||
@@ -125,6 +125,9 @@ export class SDKAgent {
|
||||
|
||||
// Parse and process response with discovery token delta
|
||||
await this.processSDKResponse(session, textContent, worker, discoveryTokens);
|
||||
} else {
|
||||
// Empty response - still need to mark pending messages as processed
|
||||
await this.markMessagesProcessed(session, worker);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -396,8 +399,15 @@ export class SDKAgent {
|
||||
}
|
||||
}
|
||||
|
||||
// CRITICAL: Mark ALL pending messages as successfully processed
|
||||
// This prevents message loss if worker crashes before SDK finishes
|
||||
// Mark messages as processed after successful observation/summary storage
|
||||
await this.markMessagesProcessed(session, worker);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark all pending messages as successfully processed
|
||||
* CRITICAL: Prevents message loss and duplicate processing
|
||||
*/
|
||||
private async markMessagesProcessed(session: ActiveSession, worker: any | undefined): Promise<void> {
|
||||
const pendingMessageStore = this.sessionManager.getPendingMessageStore();
|
||||
if (session.pendingProcessingIds.size > 0) {
|
||||
for (const messageId of session.pendingProcessingIds) {
|
||||
|
||||
@@ -58,17 +58,18 @@ export function getWorkerHost(): string {
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if worker is responsive by trying the health endpoint
|
||||
* Check if worker is responsive and fully initialized by trying the readiness endpoint
|
||||
* Changed from /health to /api/readiness to ensure MCP initialization is complete
|
||||
*/
|
||||
async function isWorkerHealthy(): Promise<boolean> {
|
||||
try {
|
||||
const port = getWorkerPort();
|
||||
const response = await fetch(`http://127.0.0.1:${port}/health`, {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/readiness`, {
|
||||
signal: AbortSignal.timeout(HEALTH_CHECK_TIMEOUT_MS)
|
||||
});
|
||||
return response.ok;
|
||||
} catch (error) {
|
||||
logger.debug('SYSTEM', 'Worker health check failed', {
|
||||
logger.debug('SYSTEM', 'Worker readiness check failed', {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
errorType: error?.constructor?.name
|
||||
});
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
import path from 'path';
|
||||
import { logger } from './logger.js';
|
||||
|
||||
/**
|
||||
* Extract project name from working directory path
|
||||
* Handles edge cases: null/undefined cwd, drive roots, trailing slashes
|
||||
*
|
||||
* @param cwd - Current working directory (absolute path)
|
||||
* @returns Project name or "unknown-project" if extraction fails
|
||||
*/
|
||||
export function getProjectName(cwd: string | null | undefined): string {
|
||||
if (!cwd || cwd.trim() === '') {
|
||||
logger.warn('PROJECT_NAME', 'Empty cwd provided, using fallback', { cwd });
|
||||
return 'unknown-project';
|
||||
}
|
||||
|
||||
// Extract basename (handles trailing slashes automatically)
|
||||
const basename = path.basename(cwd);
|
||||
|
||||
// Edge case: Drive roots on Windows (C:\, J:\) or Unix root (/)
|
||||
// path.basename('C:\') returns '' (empty string)
|
||||
if (basename === '') {
|
||||
// Extract drive letter on Windows, or use 'root' on Unix
|
||||
const isWindows = process.platform === 'win32';
|
||||
if (isWindows && cwd.match(/^[A-Z]:\\/i)) {
|
||||
const driveLetter = cwd[0].toUpperCase();
|
||||
const projectName = `drive-${driveLetter}`;
|
||||
logger.info('PROJECT_NAME', 'Drive root detected', { cwd, projectName });
|
||||
return projectName;
|
||||
} else {
|
||||
logger.warn('PROJECT_NAME', 'Root directory detected, using fallback', { cwd });
|
||||
return 'unknown-project';
|
||||
}
|
||||
}
|
||||
|
||||
return basename;
|
||||
}
|
||||
Reference in New Issue
Block a user