fix: Move ensureWorkerRunning to start of hooks to prevent race condition

All hooks now call ensureWorkerRunning() BEFORE querying the database. This
ensures the worker's orphaned session cleanup runs before hooks check for
active sessions, preventing 404 errors when hooks try to use sessions that
don't exist in worker memory after a restart.

Hook order now:
1. ensureWorkerRunning() - starts worker, runs cleanup
2. Query DB - cleanup already marked orphaned sessions as failed
3. Use session - only valid sessions are processed

Fixed in:
- new-hook: Line 26, before DB queries
- save-hook: Line 37, before DB queries
- summary-hook: Line 24, before DB queries
- cleanup-hook: Line 50, before DB queries

This prevents the race condition where hooks would read session status before
cleanup ran, then get 404 from worker after cleanup marked sessions failed.
This commit is contained in:
Alex Newman
2025-10-19 02:01:11 -04:00
parent daf368e343
commit f849a69506
8 changed files with 54 additions and 52 deletions
+1 -1
View File
@@ -239,4 +239,4 @@ ${e.stack}`:e.message;if(Array.isArray(e))return`[${e.length} items]`;let s=Obje
UPDATE sdk_sessions
SET status = 'failed', completed_at = ?, completed_at_epoch = ?
WHERE status = 'active'
`).run(e.toISOString(),s).changes}close(){this.db.close()}};function W(n,e,s){return n==="PreCompact"?e?{continue:!0,suppressOutput:!0}:{continue:!1,stopReason:s.reason||"Pre-compact operation failed",suppressOutput:!0}:n==="SessionStart"?e&&s.context?{continue:!0,suppressOutput:!0,hookSpecificOutput:{hookEventName:"SessionStart",additionalContext:s.context}}:{continue:!0,suppressOutput:!0}:n==="UserPromptSubmit"||n==="PostToolUse"?{continue:!0,suppressOutput:!0}:n==="Stop"?{continue:!0,suppressOutput:!0}:{continue:e,suppressOutput:!0,...s.reason&&!e?{stopReason:s.reason}:{}}}function S(n,e,s={}){let t=W(n,e,s);return JSON.stringify(t)}import b from"path";import{existsSync as N}from"fs";import{spawn as $}from"child_process";var j=parseInt(process.env.CLAUDE_MEM_WORKER_PORT||"37777",10),B=`http://127.0.0.1:${j}/health`;async function k(){try{return(await fetch(B,{signal:AbortSignal.timeout(500)})).ok}catch{return!1}}async function C(){try{if(await k())return!0;console.error("[claude-mem] Worker not responding, starting...");let n=A(),e=b.join(n,"plugin","scripts","worker-service.cjs");if(!N(e))return console.error(`[claude-mem] Worker service not found at ${e}`),!1;let s=b.join(n,"ecosystem.config.cjs"),t=b.join(n,"node_modules",".bin","pm2");if(!N(t))throw new Error(`PM2 binary not found at ${t}. This is a bundled dependency - try running: npm install`);if(!N(s))throw new Error(`PM2 ecosystem config not found at ${s}. Plugin installation may be corrupted.`);let r=$(t,["start",s],{detached:!0,stdio:"ignore",cwd:n});r.on("error",o=>{throw new Error(`Failed to spawn PM2: ${o.message}`)}),r.unref(),console.error("[claude-mem] Worker started with PM2");for(let o=0;o<3;o++)if(await new Promise(i=>setTimeout(i,500)),await k())return console.error("[claude-mem] Worker is healthy"),!0;return console.error("[claude-mem] Worker failed to become healthy after startup"),!1}catch(n){return console.error(`[claude-mem] Failed to start worker: ${n.message}`),!1}}async function D(n){if(!n)throw new Error("summaryHook requires input");let{session_id:e}=n,s=new m,t=s.findActiveSDKSession(e);if(!t){s.close(),console.log(S("Stop",!0));return}if(!t.worker_port)throw s.close(),u.error("HOOK","No worker port for session",{sessionId:t.id}),new Error("No worker port for session - session may not be properly initialized");let r=s.getPromptCounter(t.id);if(s.close(),!await C())throw new Error("Worker service failed to start or become healthy");u.dataIn("HOOK","Stop: Requesting summary",{sessionId:t.id,workerPort:t.worker_port,promptNumber:r});let i=await fetch(`http://127.0.0.1:${t.worker_port}/sessions/${t.id}/summarize`,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({prompt_number:r}),signal:AbortSignal.timeout(2e3)});if(!i.ok){let d=await i.text();throw u.failure("HOOK","Failed to generate summary",{sessionId:t.id,status:i.status},d),new Error(`Failed to request summary from worker: ${i.status} ${d}`)}u.debug("HOOK","Summary request sent successfully",{sessionId:t.id}),console.log(S("Stop",!0))}import{stdin as y}from"process";var f="";y.on("data",n=>f+=n);y.on("end",async()=>{let n=f.trim()?JSON.parse(f):void 0;await D(n),process.exit(0)});
`).run(e.toISOString(),s).changes}close(){this.db.close()}};function W(n,e,s){return n==="PreCompact"?e?{continue:!0,suppressOutput:!0}:{continue:!1,stopReason:s.reason||"Pre-compact operation failed",suppressOutput:!0}:n==="SessionStart"?e&&s.context?{continue:!0,suppressOutput:!0,hookSpecificOutput:{hookEventName:"SessionStart",additionalContext:s.context}}:{continue:!0,suppressOutput:!0}:n==="UserPromptSubmit"||n==="PostToolUse"?{continue:!0,suppressOutput:!0}:n==="Stop"?{continue:!0,suppressOutput:!0}:{continue:e,suppressOutput:!0,...s.reason&&!e?{stopReason:s.reason}:{}}}function S(n,e,s={}){let t=W(n,e,s);return JSON.stringify(t)}import b from"path";import{existsSync as N}from"fs";import{spawn as $}from"child_process";var j=parseInt(process.env.CLAUDE_MEM_WORKER_PORT||"37777",10),B=`http://127.0.0.1:${j}/health`;async function k(){try{return(await fetch(B,{signal:AbortSignal.timeout(500)})).ok}catch{return!1}}async function C(){try{if(await k())return!0;console.error("[claude-mem] Worker not responding, starting...");let n=A(),e=b.join(n,"plugin","scripts","worker-service.cjs");if(!N(e))return console.error(`[claude-mem] Worker service not found at ${e}`),!1;let s=b.join(n,"ecosystem.config.cjs"),t=b.join(n,"node_modules",".bin","pm2");if(!N(t))throw new Error(`PM2 binary not found at ${t}. This is a bundled dependency - try running: npm install`);if(!N(s))throw new Error(`PM2 ecosystem config not found at ${s}. Plugin installation may be corrupted.`);let r=$(t,["start",s],{detached:!0,stdio:"ignore",cwd:n});r.on("error",o=>{throw new Error(`Failed to spawn PM2: ${o.message}`)}),r.unref(),console.error("[claude-mem] Worker started with PM2");for(let o=0;o<3;o++)if(await new Promise(i=>setTimeout(i,500)),await k())return console.error("[claude-mem] Worker is healthy"),!0;return console.error("[claude-mem] Worker failed to become healthy after startup"),!1}catch(n){return console.error(`[claude-mem] Failed to start worker: ${n.message}`),!1}}async function D(n){if(!n)throw new Error("summaryHook requires input");let{session_id:e}=n;if(!await C())throw new Error("Worker service failed to start or become healthy");let t=new m,r=t.findActiveSDKSession(e);if(!r){t.close(),console.log(S("Stop",!0));return}if(!r.worker_port)throw t.close(),u.error("HOOK","No worker port for session",{sessionId:r.id}),new Error("No worker port for session - session may not be properly initialized");let o=t.getPromptCounter(r.id);t.close(),u.dataIn("HOOK","Stop: Requesting summary",{sessionId:r.id,workerPort:r.worker_port,promptNumber:o});let i=await fetch(`http://127.0.0.1:${r.worker_port}/sessions/${r.id}/summarize`,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({prompt_number:o}),signal:AbortSignal.timeout(2e3)});if(!i.ok){let d=await i.text();throw u.failure("HOOK","Failed to generate summary",{sessionId:r.id,status:i.status},d),new Error(`Failed to request summary from worker: ${i.status} ${d}`)}u.debug("HOOK","Summary request sent successfully",{sessionId:r.id}),console.log(S("Stop",!0))}import{stdin as y}from"process";var f="";y.on("data",n=>f+=n);y.on("end",async()=>{let n=f.trim()?JSON.parse(f):void 0;await D(n),process.exit(0)});