fix: self-healing claimNextMessage prevents stuck processing messages (#1159)

* fix: self-healing claimNextMessage prevents stuck processing messages claimAndDelete → claimNextMessage with atomic self-healing: resets stale processing messages (>60s) back to pending before claiming. Eliminates stuck messages from generator crashes without external timers. Removes redundant idle-timeout reset in worker-service.ts. Adds QUEUE to logger Component type. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: update stale comments in SessionQueueProcessor to reflect claim-confirm pattern Comments still referenced the old claim-and-delete pattern after the claimNextMessage rename. Updated to accurately describe the current lifecycle where messages are marked as processing and stay in DB until confirmProcessed() is called. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: move Date.now() inside transaction and extract stale threshold constant - Move Date.now() inside claimNextMessage transaction closure so timestamp is fresh if WAL contention causes retry - Extract STALE_PROCESSING_THRESHOLD_MS to module-level constant - Add comment clarifying strict < boundary semantics Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 23:15:46 -05:00
parent b2e3a7e668
commit b88251bc8b
9 changed files with 298 additions and 93 deletions
@@ -6,7 +6,7 @@ ${o.stack}`:` ${o.message}`:this.getLevel()===0&&typeof o=="object"?m=`
 `,"utf8")}catch(E){process.stderr.write(`[LOGGER] Failed to write to log file: ${E}
 `)}else process.stderr.write(T+`
 `)}debug(e,t,s,n){this.log(0,e,t,s,n)}info(e,t,s,n){this.log(1,e,t,s,n)}warn(e,t,s,n){this.log(2,e,t,s,n)}error(e,t,s,n){this.log(3,e,t,s,n)}dataIn(e,t,s,n){this.info(e,`\u2192 ${t}`,s,n)}dataOut(e,t,s,n){this.info(e,`\u2190 ${t}`,s,n)}success(e,t,s,n){this.info(e,`\u2713 ${t}`,s,n)}failure(e,t,s,n){this.error(e,`\u2717 ${t}`,s,n)}timing(e,t,s,n){this.info(e,`\u23F1 ${t}`,n,{duration:`${s}ms`})}happyPathError(e,t,s,n,o=""){let c=((new Error().stack||"").split(`
-`)[2]||"").match(/at\s+(?:.*\s+)?\(?([^:]+):(\d+):(\d+)\)?/),m=c?`${c[1].split("/").pop()}:${c[2]}`:"unknown",l={...s,location:m};return this.warn(e,`[HAPPY-PATH] ${t}`,l,n),o}},_=new W;var Ot={};function bt(){return typeof __dirname<"u"?__dirname:(0,b.dirname)((0,ce.fileURLToPath)(Ot.url))}var ht=bt(),N=I.get("CLAUDE_MEM_DATA_DIR"),y=process.env.CLAUDE_CONFIG_DIR||(0,b.join)((0,de.homedir)(),".claude"),Bt=(0,b.join)(y,"plugins","marketplaces","thedotmack"),Ht=(0,b.join)(N,"archives"),Wt=(0,b.join)(N,"logs"),Yt=(0,b.join)(N,"trash"),Vt=(0,b.join)(N,"backups"),qt=(0,b.join)(N,"modes"),Kt=(0,b.join)(N,"settings.json"),_e=(0,b.join)(N,"claude-mem.db"),Jt=(0,b.join)(N,"vector-db"),zt=(0,b.join)(N,"observer-sessions"),Qt=(0,b.join)(y,"settings.json"),Zt=(0,b.join)(y,"commands"),es=(0,b.join)(y,"CLAUDE.md");function me(r){(0,pe.mkdirSync)(r,{recursive:!0})}function le(){return(0,b.join)(ht,"..")}var $=class{db;constructor(e=_e){e!==":memory:"&&me(N),this.db=new ue.Database(e),this.db.run("PRAGMA journal_mode = WAL"),this.db.run("PRAGMA synchronous = NORMAL"),this.db.run("PRAGMA foreign_keys = ON"),this.initializeSchema(),this.ensureWorkerPortColumn(),this.ensurePromptTrackingColumns(),this.removeSessionSummariesUniqueConstraint(),this.addObservationHierarchicalFields(),this.makeObservationsTextNullable(),this.createUserPromptsTable(),this.ensureDiscoveryTokensColumn(),this.createPendingMessagesTable(),this.renameSessionIdColumns(),this.repairSessionIdColumnRename(),this.addFailedAtEpochColumn(),this.addOnUpdateCascadeToForeignKeys()}initializeSchema(){this.db.run(`
+`)[2]||"").match(/at\s+(?:.*\s+)?\(?([^:]+):(\d+):(\d+)\)?/),m=c?`${c[1].split("/").pop()}:${c[2]}`:"unknown",l={...s,location:m};return this.warn(e,`[HAPPY-PATH] ${t}`,l,n),o}},_=new W;var Ot={};function bt(){return typeof __dirname<"u"?__dirname:(0,b.dirname)((0,ce.fileURLToPath)(Ot.url))}var ht=bt(),N=I.get("CLAUDE_MEM_DATA_DIR"),y=process.env.CLAUDE_CONFIG_DIR||(0,b.join)((0,de.homedir)(),".claude"),Bt=(0,b.join)(y,"plugins","marketplaces","thedotmack"),Ht=(0,b.join)(N,"archives"),Wt=(0,b.join)(N,"logs"),Yt=(0,b.join)(N,"trash"),Vt=(0,b.join)(N,"backups"),qt=(0,b.join)(N,"modes"),Kt=(0,b.join)(N,"settings.json"),_e=(0,b.join)(N,"claude-mem.db"),Jt=(0,b.join)(N,"vector-db"),Qt=(0,b.join)(N,"observer-sessions"),zt=(0,b.join)(y,"settings.json"),Zt=(0,b.join)(y,"commands"),es=(0,b.join)(y,"CLAUDE.md");function me(r){(0,pe.mkdirSync)(r,{recursive:!0})}function le(){return(0,b.join)(ht,"..")}var $=class{db;constructor(e=_e){e!==":memory:"&&me(N),this.db=new ue.Database(e),this.db.run("PRAGMA journal_mode = WAL"),this.db.run("PRAGMA synchronous = NORMAL"),this.db.run("PRAGMA foreign_keys = ON"),this.initializeSchema(),this.ensureWorkerPortColumn(),this.ensurePromptTrackingColumns(),this.removeSessionSummariesUniqueConstraint(),this.addObservationHierarchicalFields(),this.makeObservationsTextNullable(),this.createUserPromptsTable(),this.ensureDiscoveryTokensColumn(),this.createPendingMessagesTable(),this.renameSessionIdColumns(),this.repairSessionIdColumnRename(),this.addFailedAtEpochColumn(),this.addOnUpdateCascadeToForeignKeys()}initializeSchema(){this.db.run(`
      CREATE TABLE IF NOT EXISTS schema_versions (
        id INTEGER PRIMARY KEY,
        version INTEGER UNIQUE NOT NULL,
@@ -611,7 +611,7 @@ ${o.stack}`:` ${o.message}`:this.getLevel()===0&&typeof o=="object"?m=`
      )
    ORDER BY created_at_epoch DESC
    LIMIT ?
-  `).all(e,...s,...o,t.totalObservationCount)}function z(r,e,t){return r.db.prepare(`
+  `).all(e,...s,...o,t.totalObservationCount)}function Q(r,e,t){return r.db.prepare(`
    SELECT id, memory_session_id, request, investigated, learned, completed, next_steps, created_at, created_at_epoch
    FROM session_summaries
    WHERE project = ?
@@ -638,13 +638,13 @@ ${o.stack}`:` ${o.message}`:this.getLevel()===0&&typeof o=="object"?m=`
    ORDER BY created_at_epoch DESC
    LIMIT ?
  `).all(...e,t.sessionCount+V)}function Nt(r){return r.replace(/\//g,"-")}function Ct(r){try{if(!(0,X.existsSync)(r))return{userMessage:"",assistantMessage:""};let e=(0,X.readFileSync)(r,"utf-8").trim();if(!e)return{userMessage:"",assistantMessage:""};let t=e.split(`
-`).filter(n=>n.trim()),s="";for(let n=t.length-1;n>=0;n--)try{let o=t[n];if(!o.includes('"type":"assistant"'))continue;let i=JSON.parse(o);if(i.type==="assistant"&&i.message?.content&&Array.isArray(i.message.content)){let a="";for(let d of i.message.content)d.type==="text"&&(a+=d.text);if(a=a.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g,"").trim(),a){s=a;break}}}catch(o){_.debug("PARSER","Skipping malformed transcript line",{lineIndex:n},o);continue}return{userMessage:"",assistantMessage:s}}catch(e){return _.failure("WORKER","Failed to extract prior messages from transcript",{transcriptPath:r},e),{userMessage:"",assistantMessage:""}}}function Q(r,e,t,s){if(!e.showLastMessage||r.length===0)return{userMessage:"",assistantMessage:""};let n=r.find(d=>d.memory_session_id!==t);if(!n)return{userMessage:"",assistantMessage:""};let o=n.memory_session_id,i=Nt(s),a=be.default.join(y,"projects",i,`${o}.jsonl`);return Ct(a)}function Re(r,e){let t=e[0]?.id;return r.map((s,n)=>{let o=n===0?null:e[n+1];return{...s,displayEpoch:o?o.created_at_epoch:s.created_at_epoch,displayTime:o?o.created_at:s.created_at,shouldShowLink:s.id!==t}})}function Z(r,e){let t=[...r.map(s=>({type:"observation",data:s})),...e.map(s=>({type:"summary",data:s}))];return t.sort((s,n)=>{let o=s.type==="observation"?s.data.created_at_epoch:s.data.displayEpoch,i=n.type==="observation"?n.data.created_at_epoch:n.data.displayEpoch;return o-i}),t}function Ne(r,e){return new Set(r.slice(0,e).map(t=>t.id))}function Ce(){let r=new Date,e=r.toLocaleDateString("en-CA"),t=r.toLocaleTimeString("en-US",{hour:"numeric",minute:"2-digit",hour12:!0}).toLowerCase().replace(" ",""),s=r.toLocaleTimeString("en-US",{timeZoneName:"short"}).split(" ").pop();return`${e} ${t} ${s}`}function Ae(r){return[`# [${r}] recent context, ${Ce()}`,""]}function Ie(){return[`**Legend:** session-request | ${O.getInstance().getActiveMode().observation_types.map(t=>`${t.emoji} ${t.id}`).join(" | ")}`,""]}function ye(){return["**Column Key**:","- **Read**: Tokens to read this observation (cost to learn it now)","- **Work**: Tokens spent on work that produced this record ( research, building, deciding)",""]}function Me(){return["**Context Index:** This semantic index (titles, types, files, tokens) is usually sufficient to understand past work.","","When you need implementation details, rationale, or debugging context:","- Use MCP tools (search, get_observations) to fetch full observations on-demand","- Critical types ( bugfix, decision) often need detailed fetching","- Trust this index over re-reading code for past decisions and learnings",""]}function Le(r,e){let t=[];if(t.push("**Context Economics**:"),t.push(`- Loading: ${r.totalObservations} observations (${r.totalReadTokens.toLocaleString()} tokens to read)`),t.push(`- Work investment: ${r.totalDiscoveryTokens.toLocaleString()} tokens spent on research, building, and decisions`),r.totalDiscoveryTokens>0&&(e.showSavingsAmount||e.showSavingsPercent)){let s="- Your savings: ";e.showSavingsAmount&&e.showSavingsPercent?s+=`${r.savings.toLocaleString()} tokens (${r.savingsPercent}% reduction from reuse)`:e.showSavingsAmount?s+=`${r.savings.toLocaleString()} tokens`:s+=`${r.savingsPercent}% reduction from reuse`,t.push(s)}return t.push(""),t}function ve(r){return[`### ${r}`,""]}function De(r){return[`**${r}**`,"| ID | Time | T | Title | Read | Work |","|----|------|---|-------|------|------|"]}function xe(r,e,t){let s=r.title||"Untitled",n=O.getInstance().getTypeIcon(r.type),{readTokens:o,discoveryDisplay:i}=M(r,t),a=t.showReadTokens?`~${o}`:"",d=t.showWorkTokens?i:"";return`| #${r.id} | ${e||'"'} | ${n} | ${s} | ${a} | ${d} |`}function Ue(r,e,t,s){let n=[],o=r.title||"Untitled",i=O.getInstance().getTypeIcon(r.type),{readTokens:a,discoveryDisplay:d}=M(r,s);n.push(`**#${r.id}** ${e||'"'} ${i} **${o}**`),t&&(n.push(""),n.push(t),n.push(""));let c=[];return s.showReadTokens&&c.push(`Read: ~${a}`),s.showWorkTokens&&c.push(`Work: ${d}`),c.length>0&&n.push(c.join(", ")),n.push(""),n}function ke(r,e){let t=`${r.request||"Session started"} (${e})`;return[`**#S${r.id}** ${t}`,""]}function x(r,e){return e?[`**${r}**: ${e}`,""]:[]}function we(r){return r.assistantMessage?["","---","","**Previously**","",`A: ${r.assistantMessage}`,""]:[]}function $e(r,e){return["",`Access ${Math.round(r/1e3)}k tokens of past research & decisions for just ${e.toLocaleString()}t. Use MCP search tools to access memories by ID.`]}function Fe(r){return`# [${r}] recent context, ${Ce()}
+`).filter(n=>n.trim()),s="";for(let n=t.length-1;n>=0;n--)try{let o=t[n];if(!o.includes('"type":"assistant"'))continue;let i=JSON.parse(o);if(i.type==="assistant"&&i.message?.content&&Array.isArray(i.message.content)){let a="";for(let d of i.message.content)d.type==="text"&&(a+=d.text);if(a=a.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g,"").trim(),a){s=a;break}}}catch(o){_.debug("PARSER","Skipping malformed transcript line",{lineIndex:n},o);continue}return{userMessage:"",assistantMessage:s}}catch(e){return _.failure("WORKER","Failed to extract prior messages from transcript",{transcriptPath:r},e),{userMessage:"",assistantMessage:""}}}function z(r,e,t,s){if(!e.showLastMessage||r.length===0)return{userMessage:"",assistantMessage:""};let n=r.find(d=>d.memory_session_id!==t);if(!n)return{userMessage:"",assistantMessage:""};let o=n.memory_session_id,i=Nt(s),a=be.default.join(y,"projects",i,`${o}.jsonl`);return Ct(a)}function Re(r,e){let t=e[0]?.id;return r.map((s,n)=>{let o=n===0?null:e[n+1];return{...s,displayEpoch:o?o.created_at_epoch:s.created_at_epoch,displayTime:o?o.created_at:s.created_at,shouldShowLink:s.id!==t}})}function Z(r,e){let t=[...r.map(s=>({type:"observation",data:s})),...e.map(s=>({type:"summary",data:s}))];return t.sort((s,n)=>{let o=s.type==="observation"?s.data.created_at_epoch:s.data.displayEpoch,i=n.type==="observation"?n.data.created_at_epoch:n.data.displayEpoch;return o-i}),t}function Ne(r,e){return new Set(r.slice(0,e).map(t=>t.id))}function Ce(){let r=new Date,e=r.toLocaleDateString("en-CA"),t=r.toLocaleTimeString("en-US",{hour:"numeric",minute:"2-digit",hour12:!0}).toLowerCase().replace(" ",""),s=r.toLocaleTimeString("en-US",{timeZoneName:"short"}).split(" ").pop();return`${e} ${t} ${s}`}function Ae(r){return[`# [${r}] recent context, ${Ce()}`,""]}function Ie(){return[`**Legend:** session-request | ${O.getInstance().getActiveMode().observation_types.map(t=>`${t.emoji} ${t.id}`).join(" | ")}`,""]}function ye(){return["**Column Key**:","- **Read**: Tokens to read this observation (cost to learn it now)","- **Work**: Tokens spent on work that produced this record ( research, building, deciding)",""]}function Me(){return["**Context Index:** This semantic index (titles, types, files, tokens) is usually sufficient to understand past work.","","When you need implementation details, rationale, or debugging context:","- Use MCP tools (search, get_observations) to fetch full observations on-demand","- Critical types ( bugfix, decision) often need detailed fetching","- Trust this index over re-reading code for past decisions and learnings",""]}function Le(r,e){let t=[];if(t.push("**Context Economics**:"),t.push(`- Loading: ${r.totalObservations} observations (${r.totalReadTokens.toLocaleString()} tokens to read)`),t.push(`- Work investment: ${r.totalDiscoveryTokens.toLocaleString()} tokens spent on research, building, and decisions`),r.totalDiscoveryTokens>0&&(e.showSavingsAmount||e.showSavingsPercent)){let s="- Your savings: ";e.showSavingsAmount&&e.showSavingsPercent?s+=`${r.savings.toLocaleString()} tokens (${r.savingsPercent}% reduction from reuse)`:e.showSavingsAmount?s+=`${r.savings.toLocaleString()} tokens`:s+=`${r.savingsPercent}% reduction from reuse`,t.push(s)}return t.push(""),t}function ve(r){return[`### ${r}`,""]}function De(r){return[`**${r}**`,"| ID | Time | T | Title | Read | Work |","|----|------|---|-------|------|------|"]}function xe(r,e,t){let s=r.title||"Untitled",n=O.getInstance().getTypeIcon(r.type),{readTokens:o,discoveryDisplay:i}=M(r,t),a=t.showReadTokens?`~${o}`:"",d=t.showWorkTokens?i:"";return`| #${r.id} | ${e||'"'} | ${n} | ${s} | ${a} | ${d} |`}function Ue(r,e,t,s){let n=[],o=r.title||"Untitled",i=O.getInstance().getTypeIcon(r.type),{readTokens:a,discoveryDisplay:d}=M(r,s);n.push(`**#${r.id}** ${e||'"'} ${i} **${o}**`),t&&(n.push(""),n.push(t),n.push(""));let c=[];return s.showReadTokens&&c.push(`Read: ~${a}`),s.showWorkTokens&&c.push(`Work: ${d}`),c.length>0&&n.push(c.join(", ")),n.push(""),n}function ke(r,e){let t=`${r.request||"Session started"} (${e})`;return[`**#S${r.id}** ${t}`,""]}function x(r,e){return e?[`**${r}**: ${e}`,""]:[]}function we(r){return r.assistantMessage?["","---","","**Previously**","",`A: ${r.assistantMessage}`,""]:[]}function $e(r,e){return["",`Access ${Math.round(r/1e3)}k tokens of past research & decisions for just ${e.toLocaleString()}t. Use MCP search tools to access memories by ID.`]}function Fe(r){return`# [${r}] recent context, ${Ce()}

-No previous sessions found for this project yet.`}function Pe(){let r=new Date,e=r.toLocaleDateString("en-CA"),t=r.toLocaleTimeString("en-US",{hour:"numeric",minute:"2-digit",hour12:!0}).toLowerCase().replace(" ",""),s=r.toLocaleTimeString("en-US",{timeZoneName:"short"}).split(" ").pop();return`${e} ${t} ${s}`}function Xe(r){return["",`${p.bright}${p.cyan}[${r}] recent context, ${Pe()}${p.reset}`,`${p.gray}${"\u2500".repeat(60)}${p.reset}`,""]}function je(){let e=O.getInstance().getActiveMode().observation_types.map(t=>`${t.emoji} ${t.id}`).join(" | ");return[`${p.dim}Legend: session-request | ${e}${p.reset}`,""]}function Ge(){return[`${p.bright}Column Key${p.reset}`,`${p.dim}  Read: Tokens to read this observation (cost to learn it now)${p.reset}`,`${p.dim}  Work: Tokens spent on work that produced this record ( research, building, deciding)${p.reset}`,""]}function Be(){return[`${p.dim}Context Index: This semantic index (titles, types, files, tokens) is usually sufficient to understand past work.${p.reset}`,"",`${p.dim}When you need implementation details, rationale, or debugging context:${p.reset}`,`${p.dim}  - Use MCP tools (search, get_observations) to fetch full observations on-demand${p.reset}`,`${p.dim}  - Critical types ( bugfix, decision) often need detailed fetching${p.reset}`,`${p.dim}  - Trust this index over re-reading code for past decisions and learnings${p.reset}`,""]}function He(r,e){let t=[];if(t.push(`${p.bright}${p.cyan}Context Economics${p.reset}`),t.push(`${p.dim}  Loading: ${r.totalObservations} observations (${r.totalReadTokens.toLocaleString()} tokens to read)${p.reset}`),t.push(`${p.dim}  Work investment: ${r.totalDiscoveryTokens.toLocaleString()} tokens spent on research, building, and decisions${p.reset}`),r.totalDiscoveryTokens>0&&(e.showSavingsAmount||e.showSavingsPercent)){let s="  Your savings: ";e.showSavingsAmount&&e.showSavingsPercent?s+=`${r.savings.toLocaleString()} tokens (${r.savingsPercent}% reduction from reuse)`:e.showSavingsAmount?s+=`${r.savings.toLocaleString()} tokens`:s+=`${r.savingsPercent}% reduction from reuse`,t.push(`${p.green}${s}${p.reset}`)}return t.push(""),t}function We(r){return[`${p.bright}${p.cyan}${r}${p.reset}`,""]}function Ye(r){return[`${p.dim}${r}${p.reset}`]}function Ve(r,e,t,s){let n=r.title||"Untitled",o=O.getInstance().getTypeIcon(r.type),{readTokens:i,discoveryTokens:a,workEmoji:d}=M(r,s),c=t?`${p.dim}${e}${p.reset}`:" ".repeat(e.length),m=s.showReadTokens&&i>0?`${p.dim}(~${i}t)${p.reset}`:"",l=s.showWorkTokens&&a>0?`${p.dim}(${d} ${a.toLocaleString()}t)${p.reset}`:"";return`  ${p.dim}#${r.id}${p.reset}  ${c}  ${o}  ${n} ${m} ${l}`}function qe(r,e,t,s,n){let o=[],i=r.title||"Untitled",a=O.getInstance().getTypeIcon(r.type),{readTokens:d,discoveryTokens:c,workEmoji:m}=M(r,n),l=t?`${p.dim}${e}${p.reset}`:" ".repeat(e.length),T=n.showReadTokens&&d>0?`${p.dim}(~${d}t)${p.reset}`:"",E=n.showWorkTokens&&c>0?`${p.dim}(${m} ${c.toLocaleString()}t)${p.reset}`:"";return o.push(`  ${p.dim}#${r.id}${p.reset}  ${l}  ${a}  ${p.bright}${i}${p.reset}`),s&&o.push(`    ${p.dim}${s}${p.reset}`),(T||E)&&o.push(`    ${T} ${E}`),o.push(""),o}function Ke(r,e){let t=`${r.request||"Session started"} (${e})`;return[`${p.yellow}#S${r.id}${p.reset} ${t}`,""]}function U(r,e,t){return e?[`${t}${r}:${p.reset} ${e}`,""]:[]}function Je(r){return r.assistantMessage?["","---","",`${p.bright}${p.magenta}Previously${p.reset}`,"",`${p.dim}A: ${r.assistantMessage}${p.reset}`,""]:[]}function ze(r,e){let t=Math.round(r/1e3);return["",`${p.dim}Access ${t}k tokens of past research & decisions for just ${e.toLocaleString()}t. Use MCP search tools to access memories by ID.${p.reset}`]}function Qe(r){return`
+No previous sessions found for this project yet.`}function Pe(){let r=new Date,e=r.toLocaleDateString("en-CA"),t=r.toLocaleTimeString("en-US",{hour:"numeric",minute:"2-digit",hour12:!0}).toLowerCase().replace(" ",""),s=r.toLocaleTimeString("en-US",{timeZoneName:"short"}).split(" ").pop();return`${e} ${t} ${s}`}function Xe(r){return["",`${p.bright}${p.cyan}[${r}] recent context, ${Pe()}${p.reset}`,`${p.gray}${"\u2500".repeat(60)}${p.reset}`,""]}function je(){let e=O.getInstance().getActiveMode().observation_types.map(t=>`${t.emoji} ${t.id}`).join(" | ");return[`${p.dim}Legend: session-request | ${e}${p.reset}`,""]}function Ge(){return[`${p.bright}Column Key${p.reset}`,`${p.dim}  Read: Tokens to read this observation (cost to learn it now)${p.reset}`,`${p.dim}  Work: Tokens spent on work that produced this record ( research, building, deciding)${p.reset}`,""]}function Be(){return[`${p.dim}Context Index: This semantic index (titles, types, files, tokens) is usually sufficient to understand past work.${p.reset}`,"",`${p.dim}When you need implementation details, rationale, or debugging context:${p.reset}`,`${p.dim}  - Use MCP tools (search, get_observations) to fetch full observations on-demand${p.reset}`,`${p.dim}  - Critical types ( bugfix, decision) often need detailed fetching${p.reset}`,`${p.dim}  - Trust this index over re-reading code for past decisions and learnings${p.reset}`,""]}function He(r,e){let t=[];if(t.push(`${p.bright}${p.cyan}Context Economics${p.reset}`),t.push(`${p.dim}  Loading: ${r.totalObservations} observations (${r.totalReadTokens.toLocaleString()} tokens to read)${p.reset}`),t.push(`${p.dim}  Work investment: ${r.totalDiscoveryTokens.toLocaleString()} tokens spent on research, building, and decisions${p.reset}`),r.totalDiscoveryTokens>0&&(e.showSavingsAmount||e.showSavingsPercent)){let s="  Your savings: ";e.showSavingsAmount&&e.showSavingsPercent?s+=`${r.savings.toLocaleString()} tokens (${r.savingsPercent}% reduction from reuse)`:e.showSavingsAmount?s+=`${r.savings.toLocaleString()} tokens`:s+=`${r.savingsPercent}% reduction from reuse`,t.push(`${p.green}${s}${p.reset}`)}return t.push(""),t}function We(r){return[`${p.bright}${p.cyan}${r}${p.reset}`,""]}function Ye(r){return[`${p.dim}${r}${p.reset}`]}function Ve(r,e,t,s){let n=r.title||"Untitled",o=O.getInstance().getTypeIcon(r.type),{readTokens:i,discoveryTokens:a,workEmoji:d}=M(r,s),c=t?`${p.dim}${e}${p.reset}`:" ".repeat(e.length),m=s.showReadTokens&&i>0?`${p.dim}(~${i}t)${p.reset}`:"",l=s.showWorkTokens&&a>0?`${p.dim}(${d} ${a.toLocaleString()}t)${p.reset}`:"";return`  ${p.dim}#${r.id}${p.reset}  ${c}  ${o}  ${n} ${m} ${l}`}function qe(r,e,t,s,n){let o=[],i=r.title||"Untitled",a=O.getInstance().getTypeIcon(r.type),{readTokens:d,discoveryTokens:c,workEmoji:m}=M(r,n),l=t?`${p.dim}${e}${p.reset}`:" ".repeat(e.length),T=n.showReadTokens&&d>0?`${p.dim}(~${d}t)${p.reset}`:"",E=n.showWorkTokens&&c>0?`${p.dim}(${m} ${c.toLocaleString()}t)${p.reset}`:"";return o.push(`  ${p.dim}#${r.id}${p.reset}  ${l}  ${a}  ${p.bright}${i}${p.reset}`),s&&o.push(`    ${p.dim}${s}${p.reset}`),(T||E)&&o.push(`    ${T} ${E}`),o.push(""),o}function Ke(r,e){let t=`${r.request||"Session started"} (${e})`;return[`${p.yellow}#S${r.id}${p.reset} ${t}`,""]}function U(r,e,t){return e?[`${t}${r}:${p.reset} ${e}`,""]:[]}function Je(r){return r.assistantMessage?["","---","",`${p.bright}${p.magenta}Previously${p.reset}`,"",`${p.dim}A: ${r.assistantMessage}${p.reset}`,""]:[]}function Qe(r,e){let t=Math.round(r/1e3);return["",`${p.dim}Access ${t}k tokens of past research & decisions for just ${e.toLocaleString()}t. Use MCP search tools to access memories by ID.${p.reset}`]}function ze(r){return`
 ${p.bright}${p.cyan}[${r}] recent context, ${Pe()}${p.reset}
 ${p.gray}${"\u2500".repeat(60)}${p.reset}

 ${p.dim}No previous sessions found for this project yet.${p.reset}
 `}function Ze(r,e,t,s){let n=[];return s?n.push(...Xe(r)):n.push(...Ae(r)),s?n.push(...je()):n.push(...Ie()),s?n.push(...Ge()):n.push(...ye()),s?n.push(...Be()):n.push(...Me()),P(t)&&(s?n.push(...He(e,t)):n.push(...Le(e,t))),n}var ee=L(require("path"),1);function B(r){if(!r)return[];try{let e=JSON.parse(r);return Array.isArray(e)?e:[]}catch(e){return _.debug("PARSER","Failed to parse JSON array, using empty fallback",{preview:r?.substring(0,50)},e),[]}}function tt(r){return new Date(r).toLocaleString("en-US",{month:"short",day:"numeric",hour:"numeric",minute:"2-digit",hour12:!0})}function st(r){return new Date(r).toLocaleString("en-US",{hour:"numeric",minute:"2-digit",hour12:!0})}function rt(r){return new Date(r).toLocaleString("en-US",{month:"short",day:"numeric",year:"numeric"})}function et(r,e){return ee.default.isAbsolute(r)?ee.default.relative(e,r):r}function nt(r,e,t){let s=B(r);if(s.length>0)return et(s[0],e);if(t){let n=B(t);if(n.length>0)return et(n[0],e)}return"General"}function At(r){let e=new Map;for(let s of r){let n=s.type==="observation"?s.data.created_at:s.data.displayTime,o=rt(n);e.has(o)||e.set(o,[]),e.get(o).push(s)}let t=Array.from(e.entries()).sort((s,n)=>{let o=new Date(s[0]).getTime(),i=new Date(n[0]).getTime();return o-i});return new Map(t)}function It(r,e){return e.fullObservationField==="narrative"?r.narrative:r.facts?B(r.facts).join(`
-`):null}function yt(r,e,t,s,n,o){let i=[];o?i.push(...We(r)):i.push(...ve(r));let a=null,d="",c=!1;for(let m of e)if(m.type==="summary"){c&&(i.push(""),c=!1,a=null,d="");let l=m.data,T=tt(l.displayTime);o?i.push(...Ke(l,T)):i.push(...ke(l,T))}else{let l=m.data,T=nt(l.files_modified,n,l.files_read),E=st(l.created_at),g=E!==d,h=g?E:"";d=E;let u=t.has(l.id);if(T!==a&&(c&&i.push(""),o?i.push(...Ye(T)):i.push(...De(T)),a=T,c=!0),u){let S=It(l,s);o?i.push(...qe(l,E,g,S,s)):(c&&!o&&(i.push(""),c=!1),i.push(...Ue(l,h,S,s)),a=null)}else o?i.push(Ve(l,E,g,s)):i.push(xe(l,h,s))}return c&&i.push(""),i}function ot(r,e,t,s,n){let o=[],i=At(r);for(let[a,d]of i)o.push(...yt(a,d,e,t,s,n));return o}function it(r,e,t){return!(!r.showLastSummary||!e||!!!(e.investigated||e.learned||e.completed||e.next_steps)||t&&e.created_at_epoch<=t.created_at_epoch)}function at(r,e){let t=[];return e?(t.push(...U("Investigated",r.investigated,p.blue)),t.push(...U("Learned",r.learned,p.yellow)),t.push(...U("Completed",r.completed,p.green)),t.push(...U("Next Steps",r.next_steps,p.magenta))):(t.push(...x("Investigated",r.investigated)),t.push(...x("Learned",r.learned)),t.push(...x("Completed",r.completed)),t.push(...x("Next Steps",r.next_steps))),t}function dt(r,e){return e?Je(r):we(r)}function pt(r,e,t){return!P(e)||r.totalDiscoveryTokens<=0||r.savings<=0?[]:t?ze(r.totalDiscoveryTokens,r.totalReadTokens):$e(r.totalDiscoveryTokens,r.totalReadTokens)}var Mt=ct.default.join((0,_t.homedir)(),".claude","plugins","marketplaces","thedotmack","plugin",".install-version");function Lt(){try{return new $}catch(r){if(r.code==="ERR_DLOPEN_FAILED"){try{(0,mt.unlinkSync)(Mt)}catch(e){_.debug("SYSTEM","Marker file cleanup failed (may not exist)",{},e)}return _.error("SYSTEM","Native module rebuild needed - restart Claude Code to auto-fix"),null}throw r}}function vt(r,e){return e?Qe(r):Fe(r)}function Dt(r,e,t,s,n,o,i){let a=[],d=K(e);a.push(...Ze(r,d,s,i));let c=t.slice(0,s.sessionCount),m=Re(c,t),l=Z(e,m),T=Ne(e,s.fullObservationCount);a.push(...ot(l,T,s,n,i));let E=t[0],g=e[0];it(s,E,g)&&a.push(...at(E,i));let h=Q(e,s,o,n);return a.push(...dt(h,i)),a.push(...pt(d,s,i)),a.join(`
-`).trimEnd()}async function te(r,e=!1){let t=Y(),s=r?.cwd??process.cwd(),n=Te(s),o=r?.projects||[n],i=Lt();if(!i)return"";try{let a=o.length>1?he(i,o,t):J(i,n,t),d=o.length>1?Oe(i,o,t):z(i,n,t);return a.length===0&&d.length===0?vt(n,e):Dt(n,a,d,t,s,r?.session_id,e)}finally{i.close()}}0&&(module.exports={generateContext});
+`):null}function yt(r,e,t,s,n,o){let i=[];o?i.push(...We(r)):i.push(...ve(r));let a=null,d="",c=!1;for(let m of e)if(m.type==="summary"){c&&(i.push(""),c=!1,a=null,d="");let l=m.data,T=tt(l.displayTime);o?i.push(...Ke(l,T)):i.push(...ke(l,T))}else{let l=m.data,T=nt(l.files_modified,n,l.files_read),E=st(l.created_at),g=E!==d,h=g?E:"";d=E;let u=t.has(l.id);if(T!==a&&(c&&i.push(""),o?i.push(...Ye(T)):i.push(...De(T)),a=T,c=!0),u){let S=It(l,s);o?i.push(...qe(l,E,g,S,s)):(c&&!o&&(i.push(""),c=!1),i.push(...Ue(l,h,S,s)),a=null)}else o?i.push(Ve(l,E,g,s)):i.push(xe(l,h,s))}return c&&i.push(""),i}function ot(r,e,t,s,n){let o=[],i=At(r);for(let[a,d]of i)o.push(...yt(a,d,e,t,s,n));return o}function it(r,e,t){return!(!r.showLastSummary||!e||!!!(e.investigated||e.learned||e.completed||e.next_steps)||t&&e.created_at_epoch<=t.created_at_epoch)}function at(r,e){let t=[];return e?(t.push(...U("Investigated",r.investigated,p.blue)),t.push(...U("Learned",r.learned,p.yellow)),t.push(...U("Completed",r.completed,p.green)),t.push(...U("Next Steps",r.next_steps,p.magenta))):(t.push(...x("Investigated",r.investigated)),t.push(...x("Learned",r.learned)),t.push(...x("Completed",r.completed)),t.push(...x("Next Steps",r.next_steps))),t}function dt(r,e){return e?Je(r):we(r)}function pt(r,e,t){return!P(e)||r.totalDiscoveryTokens<=0||r.savings<=0?[]:t?Qe(r.totalDiscoveryTokens,r.totalReadTokens):$e(r.totalDiscoveryTokens,r.totalReadTokens)}var Mt=ct.default.join((0,_t.homedir)(),".claude","plugins","marketplaces","thedotmack","plugin",".install-version");function Lt(){try{return new $}catch(r){if(r.code==="ERR_DLOPEN_FAILED"){try{(0,mt.unlinkSync)(Mt)}catch(e){_.debug("SYSTEM","Marker file cleanup failed (may not exist)",{},e)}return _.error("SYSTEM","Native module rebuild needed - restart Claude Code to auto-fix"),null}throw r}}function vt(r,e){return e?ze(r):Fe(r)}function Dt(r,e,t,s,n,o,i){let a=[],d=K(e);a.push(...Ze(r,d,s,i));let c=t.slice(0,s.sessionCount),m=Re(c,t),l=Z(e,m),T=Ne(e,s.fullObservationCount);a.push(...ot(l,T,s,n,i));let E=t[0],g=e[0];it(s,E,g)&&a.push(...at(E,i));let h=z(e,s,o,n);return a.push(...dt(h,i)),a.push(...pt(d,s,i)),a.join(`
+`).trimEnd()}async function te(r,e=!1){let t=Y(),s=r?.cwd??process.cwd(),n=Te(s),o=r?.projects||[n],i=Lt();if(!i)return"";try{let a=o.length>1?he(i,o,t):J(i,n,t),d=o.length>1?Oe(i,o,t):Q(i,n,t);return a.length===0&&d.length===0?vt(n,e):Dt(n,a,d,t,s,r?.session_id,e)}finally{i.close()}}0&&(module.exports={generateContext});
@@ -20,8 +20,9 @@ export class SessionQueueProcessor {

  /**
   * Create an async iterator that yields messages as they become available.
-   * Uses atomic claim-and-delete to prevent duplicates.
-   * The queue is a pure buffer: claim it, delete it, process in memory.
+   * Uses atomic claim-confirm to prevent duplicates.
+   * Messages are claimed (marked processing) and stay in DB until confirmProcessed().
+   * Self-heals stale processing messages before each claim.
   * Waits for 'message' event when queue is empty.
   *
   * CRITICAL: Calls onIdleTimeout callback after 3 minutes of inactivity.
@@ -34,14 +35,14 @@ export class SessionQueueProcessor {

    while (!signal.aborted) {
      try {
-        // Atomically claim AND DELETE next message from DB
-        // Message is now in memory only - no "processing" state tracking needed
-        const persistentMessage = this.store.claimAndDelete(sessionDbId);
+        // Atomically claim next pending message (marks as 'processing')
+        // Self-heals any stale processing messages before claiming
+        const persistentMessage = this.store.claimNextMessage(sessionDbId);

        if (persistentMessage) {
          // Reset activity time when we successfully yield a message
          lastActivityTime = Date.now();
-          // Yield the message for processing (it's already deleted from queue)
+          // Yield the message for processing (it's marked as 'processing' in DB)
          yield this.toPendingMessageWithId(persistentMessage);
        } else {
          // Queue empty - wait for wake-up event or timeout
@@ -2,6 +2,9 @@ import { Database } from './sqlite-compat.js';
 import type { PendingMessage } from '../worker-types.js';
 import { logger } from '../../utils/logger.js';

+/** Messages processing longer than this are considered stale and reset to pending by self-healing */
+const STALE_PROCESSING_THRESHOLD_MS = 60_000;
+
 /**
 * Persistent pending message record from database
 */
@@ -26,12 +29,17 @@ export interface PersistentPendingMessage {
 /**
 * PendingMessageStore - Persistent work queue for SDK messages
 *
- * Messages are persisted before processing using a claim-and-delete pattern.
+ * Messages are persisted before processing using a claim-confirm pattern.
 * This simplifies the lifecycle and eliminates duplicate processing bugs.
 *
 * Lifecycle:
 * 1. enqueue() - Message persisted with status 'pending'
- * 2. claimAndDelete() - Atomically claims and deletes message (process in memory)
+ * 2. claimNextMessage() - Atomically claims next pending message (marks as 'processing')
+ * 3. confirmProcessed() - Deletes message after successful processing
+ *
+ * Self-healing:
+ * - claimNextMessage() resets stale 'processing' messages (>60s) back to 'pending' before claiming
+ * - This eliminates stuck messages from generator crashes without external timers
 *
 * Recovery:
 * - getSessionsWithPendingMessages() - Find sessions that need recovery on startup
@@ -78,13 +86,29 @@ export class PendingMessageStore {

  /**
   * Atomically claim the next pending message by marking it as 'processing'.
-   * CRITICAL FIX: Does NOT delete - message stays in DB until confirmProcessed() is called.
-   * This prevents message loss if the generator crashes mid-processing.
+   * Self-healing: resets any stale 'processing' messages (>60s) back to 'pending' first.
+   * Message stays in DB until confirmProcessed() is called.
   * Uses a transaction to prevent race conditions.
   */
-  claimAndDelete(sessionDbId: number): PersistentPendingMessage | null {
-    const now = Date.now();
+  claimNextMessage(sessionDbId: number): PersistentPendingMessage | null {
    const claimTx = this.db.transaction((sessionId: number) => {
+      // Capture time inside transaction so it's fresh if WAL contention causes retry
+      const now = Date.now();
+      // Self-healing: reset stale 'processing' messages back to 'pending'
+      // This recovers from generator crashes without external timers
+      // Note: strict < means messages must be OLDER than threshold to be reset
+      const staleCutoff = now - STALE_PROCESSING_THRESHOLD_MS;
+      const resetStmt = this.db.prepare(`
+        UPDATE pending_messages
+        SET status = 'pending', started_processing_at_epoch = NULL
+        WHERE session_db_id = ? AND status = 'processing'
+          AND started_processing_at_epoch < ?
+      `);
+      const resetResult = resetStmt.run(sessionId, staleCutoff);
+      if (resetResult.changes > 0) {
+        logger.info('QUEUE', `SELF_HEAL | sessionDbId=${sessionId} | recovered ${resetResult.changes} stale processing message(s)`);
+      }
+
      const peekStmt = this.db.prepare(`
        SELECT * FROM pending_messages
        WHERE session_db_id = ? AND status = 'pending'
@@ -614,17 +614,16 @@ export class WorkerService {
          return;
        }

-        // Shared store for idle-reset and pending-count checks below
+        // Store for pending-count check below
        const { PendingMessageStore } = require('./sqlite/PendingMessageStore.js');
        const pendingStore = new PendingMessageStore(this.dbManager.getSessionStore().db, 3);

        // Idle timeout means no new work arrived for 3 minutes - don't restart
+        // No need to reset stale processing messages here — claimNextMessage() self-heals
        if (session.idleTimedOut) {
          logger.info('SYSTEM', 'Generator exited due to idle timeout, not restarting', {
            sessionId: session.sessionDbId
          });
-          // Reset stale processing messages so they can be picked up later
-          pendingStore.resetStaleProcessingMessages(0, session.sessionDbId); // Reset this session's messages only
          session.idleTimedOut = false; // Reset flag
          this.broadcastProcessingStatus();
          return;
@@ -15,7 +15,7 @@ export enum LogLevel {
  SILENT = 4
 }

-export type Component = 'HOOK' | 'WORKER' | 'SDK' | 'PARSER' | 'DB' | 'SYSTEM' | 'HTTP' | 'SESSION' | 'CHROMA' | 'CHROMA_SYNC' | 'FOLDER_INDEX' | 'CLAUDE_MD';
+export type Component = 'HOOK' | 'WORKER' | 'SDK' | 'PARSER' | 'DB' | 'SYSTEM' | 'HTTP' | 'SESSION' | 'CHROMA' | 'CHROMA_SYNC' | 'FOLDER_INDEX' | 'CLAUDE_MD' | 'QUEUE';

 interface LogContext {
  sessionId?: number;
@@ -5,11 +5,11 @@ import type { PendingMessageStore, PersistentPendingMessage } from '../../../src

 /**
 * Mock PendingMessageStore that returns null (empty queue) by default.
- * Individual tests can override claimAndDelete behavior.
+ * Individual tests can override claimNextMessage behavior.
 */
 function createMockStore(): PendingMessageStore {
  return {
-    claimAndDelete: mock(() => null),
+    claimNextMessage: mock(() => null),
    toPendingMessage: mock((msg: PersistentPendingMessage) => ({
      type: msg.message_type,
      tool_name: msg.tool_name || undefined,
@@ -140,7 +140,7 @@ describe('SessionQueueProcessor', () => {
        let callCount = 0;

        // Return a message on first call, then null
-        (store.claimAndDelete as any) = mock(() => {
+        (store.claimNextMessage as any) = mock(() => {
          callCount++;
          if (callCount === 1) {
            return createMockMessage({ id: 1 });
@@ -170,7 +170,7 @@ describe('SessionQueueProcessor', () => {
        expect(results).toHaveLength(1);
        expect(results[0]._persistentId).toBe(1);

-        // Store's claimAndDelete should have been called at least twice
+        // Store's claimNextMessage should have been called at least twice
        // (once returning message, once returning null)
        expect(callCount).toBeGreaterThanOrEqual(1);
      });
@@ -206,7 +206,7 @@ describe('SessionQueueProcessor', () => {
        const onIdleTimeout = mock(() => {});

        // Return null to trigger wait
-        (store.claimAndDelete as any) = mock(() => null);
+        (store.claimNextMessage as any) = mock(() => null);

        const options: CreateIteratorOptions = {
          sessionDbId: 123,
@@ -242,7 +242,7 @@ describe('SessionQueueProcessor', () => {
        // First call: return null (queue empty)
        // After message event: return message
        // Then return null again
-        (store.claimAndDelete as any) = mock(() => {
+        (store.claimNextMessage as any) = mock(() => {
          callCount++;
          if (callCount === 1) {
            // First check - queue empty, will wait
@@ -312,7 +312,7 @@ describe('SessionQueueProcessor', () => {

      it('should clean up event listeners when message received', async () => {
        // Return a message immediately
-        (store.claimAndDelete as any) = mock(() => createMockMessage({ id: 1 }));
+        (store.claimNextMessage as any) = mock(() => createMockMessage({ id: 1 }));

        const options: CreateIteratorOptions = {
          sessionDbId: 123,
@@ -344,7 +344,7 @@ describe('SessionQueueProcessor', () => {
      it('should continue after store error with backoff', async () => {
        let callCount = 0;

-        (store.claimAndDelete as any) = mock(() => {
+        (store.claimNextMessage as any) = mock(() => {
          callCount++;
          if (callCount === 1) {
            throw new Error('Database error');
@@ -377,7 +377,7 @@ describe('SessionQueueProcessor', () => {
      });

      it('should exit cleanly if aborted during error backoff', async () => {
-        (store.claimAndDelete as any) = mock(() => {
+        (store.claimNextMessage as any) = mock(() => {
          throw new Error('Database error');
        });

@@ -413,7 +413,7 @@ describe('SessionQueueProcessor', () => {
          created_at_epoch: 1704067200000
        });

-        (store.claimAndDelete as any) = mock(() => mockPersistentMessage);
+        (store.claimNextMessage as any) = mock(() => mockPersistentMessage);

        const options: CreateIteratorOptions = {
          sessionDbId: 123,
@@ -0,0 +1,146 @@
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { ClaudeMemDatabase } from '../../../src/services/sqlite/Database.js';
+import { PendingMessageStore } from '../../../src/services/sqlite/PendingMessageStore.js';
+import { createSDKSession } from '../../../src/services/sqlite/Sessions.js';
+import type { PendingMessage } from '../../../src/services/worker-types.js';
+import type { Database } from 'bun:sqlite';
+
+describe('PendingMessageStore - Self-Healing claimNextMessage', () => {
+  let db: Database;
+  let store: PendingMessageStore;
+  let sessionDbId: number;
+  const CONTENT_SESSION_ID = 'test-self-heal';
+
+  beforeEach(() => {
+    db = new ClaudeMemDatabase(':memory:').db;
+    store = new PendingMessageStore(db, 3);
+    sessionDbId = createSDKSession(db, CONTENT_SESSION_ID, 'test-project', 'Test prompt');
+  });
+
+  afterEach(() => {
+    db.close();
+  });
+
+  function enqueueMessage(overrides: Partial<PendingMessage> = {}): number {
+    const message: PendingMessage = {
+      type: 'observation',
+      tool_name: 'TestTool',
+      tool_input: { test: 'input' },
+      tool_response: { test: 'response' },
+      prompt_number: 1,
+      ...overrides,
+    };
+    return store.enqueue(sessionDbId, CONTENT_SESSION_ID, message);
+  }
+
+  /**
+   * Helper to simulate a stuck processing message by directly updating the DB
+   * to set started_processing_at_epoch to a time in the past (>60s ago)
+   */
+  function makeMessageStaleProcessing(messageId: number): void {
+    const staleTimestamp = Date.now() - 120_000; // 2 minutes ago (well past 60s threshold)
+    db.run(
+      `UPDATE pending_messages SET status = 'processing', started_processing_at_epoch = ? WHERE id = ?`,
+      [staleTimestamp, messageId]
+    );
+  }
+
+  test('stuck processing messages are recovered on next claim', () => {
+    // Enqueue a message and make it stuck in processing
+    const msgId = enqueueMessage();
+    makeMessageStaleProcessing(msgId);
+
+    // Verify it's stuck (status = processing)
+    const beforeClaim = db.query('SELECT status FROM pending_messages WHERE id = ?').get(msgId) as { status: string };
+    expect(beforeClaim.status).toBe('processing');
+
+    // claimNextMessage should self-heal: reset the stuck message, then claim it
+    const claimed = store.claimNextMessage(sessionDbId);
+
+    expect(claimed).not.toBeNull();
+    expect(claimed!.id).toBe(msgId);
+    // It should now be in 'processing' status again (freshly claimed)
+    const afterClaim = db.query('SELECT status FROM pending_messages WHERE id = ?').get(msgId) as { status: string };
+    expect(afterClaim.status).toBe('processing');
+  });
+
+  test('actively processing messages are NOT recovered', () => {
+    // Enqueue two messages
+    const activeId = enqueueMessage();
+    const pendingId = enqueueMessage();
+
+    // Make the first one actively processing (recent timestamp, NOT stale)
+    const recentTimestamp = Date.now() - 5_000; // 5 seconds ago (well within 60s threshold)
+    db.run(
+      `UPDATE pending_messages SET status = 'processing', started_processing_at_epoch = ? WHERE id = ?`,
+      [recentTimestamp, activeId]
+    );
+
+    // claimNextMessage should NOT reset the active one — should claim the pending one instead
+    const claimed = store.claimNextMessage(sessionDbId);
+
+    expect(claimed).not.toBeNull();
+    expect(claimed!.id).toBe(pendingId);
+
+    // The active message should still be processing
+    const activeMsg = db.query('SELECT status FROM pending_messages WHERE id = ?').get(activeId) as { status: string };
+    expect(activeMsg.status).toBe('processing');
+  });
+
+  test('recovery and claim is atomic within single call', () => {
+    // Enqueue three messages
+    const stuckId = enqueueMessage();
+    const pendingId1 = enqueueMessage();
+    const pendingId2 = enqueueMessage();
+
+    // Make the first one stuck
+    makeMessageStaleProcessing(stuckId);
+
+    // Single claimNextMessage should reset stuck AND claim oldest pending (which is the reset stuck one)
+    const claimed = store.claimNextMessage(sessionDbId);
+
+    expect(claimed).not.toBeNull();
+    // The stuck message was reset to pending, and being oldest, it gets claimed
+    expect(claimed!.id).toBe(stuckId);
+
+    // The other two should still be pending
+    const msg1 = db.query('SELECT status FROM pending_messages WHERE id = ?').get(pendingId1) as { status: string };
+    const msg2 = db.query('SELECT status FROM pending_messages WHERE id = ?').get(pendingId2) as { status: string };
+    expect(msg1.status).toBe('pending');
+    expect(msg2.status).toBe('pending');
+  });
+
+  test('no messages returns null without error', () => {
+    const claimed = store.claimNextMessage(sessionDbId);
+    expect(claimed).toBeNull();
+  });
+
+  test('self-healing only affects the specified session', () => {
+    // Create a second session
+    const session2Id = createSDKSession(db, 'other-session', 'test-project', 'Test');
+
+    // Enqueue and make stuck in session 1
+    const stuckInSession1 = enqueueMessage();
+    makeMessageStaleProcessing(stuckInSession1);
+
+    // Enqueue in session 2
+    const msg: PendingMessage = {
+      type: 'observation',
+      tool_name: 'TestTool',
+      tool_input: { test: 'input' },
+      tool_response: { test: 'response' },
+      prompt_number: 1,
+    };
+    const session2MsgId = store.enqueue(session2Id, 'other-session', msg);
+    makeMessageStaleProcessing(session2MsgId);
+
+    // Claim for session 2 — should only heal session 2's stuck message
+    const claimed = store.claimNextMessage(session2Id);
+    expect(claimed).not.toBeNull();
+    expect(claimed!.id).toBe(session2MsgId);
+
+    // Session 1's stuck message should still be stuck (not healed by session 2's claim)
+    const session1Msg = db.query('SELECT status FROM pending_messages WHERE id = ?').get(stuckInSession1) as { status: string };
+    expect(session1Msg.status).toBe('processing');
+  });
+});
@@ -192,8 +192,8 @@ describe('Zombie Agent Prevention', () => {
    // hasAnyPendingWork should return true
    expect(pendingStore.hasAnyPendingWork()).toBe(true);

-    // CLAIM-CONFIRM pattern: claimAndDelete marks as 'processing' (not deleted)
-    const claimed = pendingStore.claimAndDelete(sessionId);
+    // CLAIM-CONFIRM pattern: claimNextMessage marks as 'processing' (not deleted)
+    const claimed = pendingStore.claimNextMessage(sessionId);
    expect(claimed).not.toBeNull();
    expect(claimed?.id).toBe(msgId1);

@@ -206,11 +206,11 @@ describe('Zombie Agent Prevention', () => {
    expect(pendingStore.getPendingCount(sessionId)).toBe(2);

    // Claim and confirm remaining messages
-    const msg2 = pendingStore.claimAndDelete(sessionId);
+    const msg2 = pendingStore.claimNextMessage(sessionId);
    pendingStore.confirmProcessed(msg2!.id);
    expect(pendingStore.getPendingCount(sessionId)).toBe(1);

-    const msg3 = pendingStore.claimAndDelete(sessionId);
+    const msg3 = pendingStore.claimNextMessage(sessionId);
    pendingStore.confirmProcessed(msg3!.id);

    // Should be empty now
@@ -266,6 +266,36 @@ describe('Zombie Agent Prevention', () => {
    expect(session.abortController.signal.aborted).toBe(false);
  });

+  // Test: Stuck processing messages are recovered by claimNextMessage self-healing
+  test('should recover stuck processing messages via claimNextMessage self-healing', async () => {
+    const sessionId = createDbSession('content-stuck-recovery');
+
+    // Enqueue and claim a message (transitions to 'processing')
+    const msgId = enqueueTestMessage(sessionId, 'content-stuck-recovery');
+    const claimed = pendingStore.claimNextMessage(sessionId);
+    expect(claimed).not.toBeNull();
+    expect(claimed!.id).toBe(msgId);
+
+    // Simulate crash: message stuck in 'processing' with stale timestamp
+    const staleTimestamp = Date.now() - 120_000; // 2 minutes ago
+    db.run(
+      `UPDATE pending_messages SET started_processing_at_epoch = ? WHERE id = ?`,
+      [staleTimestamp, msgId]
+    );
+
+    // Verify it's stuck
+    expect(pendingStore.getPendingCount(sessionId)).toBe(1); // processing counts as pending work
+
+    // Next claimNextMessage should self-heal: reset stuck message and re-claim it
+    const recovered = pendingStore.claimNextMessage(sessionId);
+    expect(recovered).not.toBeNull();
+    expect(recovered!.id).toBe(msgId);
+
+    // Confirm it can be processed successfully
+    pendingStore.confirmProcessed(msgId);
+    expect(pendingStore.getPendingCount(sessionId)).toBe(0);
+  });
+
  // Test: Generator cleanup on session delete
  test('should properly cleanup generator promise on session delete', async () => {
    const session = createMockSession(1);