chore: merge upstream v13.3.0 + keep local fixes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-24 15:15:06 +09:00
parent b4278e67f9 c3d2af7c14
commit 59f30a5f3e
32 changed files with 1735 additions and 586 deletions
@@ -10,7 +10,7 @@
  "plugins": [
    {
      "name": "claude-mem",
-      "version": "13.2.0",
+      "version": "13.3.0",
      "source": "./plugin",
      "description": "Persistent memory system for Claude Code - context compression across sessions"
    }
@@ -1,6 +1,6 @@
 {
  "name": "claude-mem",
-  "version": "13.2.0",
+  "version": "13.3.0",
  "description": "Memory compression system for Claude Code - persist context across sessions",
  "author": {
    "name": "Alex Newman"
@@ -1,6 +1,6 @@
 {
  "name": "claude-mem",
-  "version": "13.2.0",
+  "version": "13.3.0",
  "description": "Memory compression system for Claude Code - persist context across sessions",
  "author": {
    "name": "Alex Newman",
@@ -23,7 +23,7 @@
    "nodejs"
  ],
  "skills": "./plugin/skills/",
-  "mcpServers": "./.mcp.json",
+  "mcpServers": "./plugin/.mcp.json",
  "hooks": "./plugin/hooks/codex-hooks.json",
  "interface": {
    "displayName": "claude-mem",
@@ -1,12 +0,0 @@
-{
-  "mcpServers": {
-    "mcp-search": {
-      "type": "stdio",
-      "command": "sh",
-      "args": [
-        "-c",
-        "_C=\"${CLAUDE_CONFIG_DIR:-$HOME/.claude}\"; _E=\"${CLAUDE_PLUGIN_ROOT:-${PLUGIN_ROOT:-}}\"; _P=$({ [ -n \"$_E\" ] && printf '%s\\n' \"$_E\"; printf '%s\\n' \"$PWD/plugin\" \"$PWD\"; ls -dt \"$HOME/.codex/plugins/cache/claude-mem-local/claude-mem\"/[0-9]*/ \"$HOME/.codex/plugins/cache/thedotmack/claude-mem\"/[0-9]*/ \"$_C/plugins/cache/thedotmack/claude-mem\"/[0-9]*/ 2>/dev/null; printf '%s\\n' \"$_C/plugins/marketplaces/thedotmack/plugin\"; } | while IFS= read -r _R; do [ -d \"$_R/plugin/scripts\" ] && _Q=\"$_R/plugin\" || _Q=\"$_R\"; [ -f \"$_Q/scripts/mcp-server.cjs\" ] && { printf '%s\\n' \"$_Q\"; break; }; done); [ -n \"$_P\" ] || { echo \"claude-mem: mcp server not found\" >&2; exit 1; }; exec node \"$_P/scripts/mcp-server.cjs\""
-      ]
-    }
-  }
-}
@@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.

 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

+## [13.3.0] - 2026-05-21
+
+## What's New
+
+### New skills
+
+- **design-is** (#2483) — audits a design against Dieter Rams' ten "Good design is..." principles. Produces per-principle 0–3 scores with file:line evidence and a NEW / REFINE / REDESIGN verdict, then hands off a ready-to-run `/make-plan` prompt.
+- **weekly-digests** (#2399) — produces a chapter-per-ISO-week serial digest of a project's full claude-mem timeline. Sequential subagent pipeline keeps the narrative coherent across 30+ chapters.
+- **oh-my-issues** (#2409) — root-cause issue clustering. Codifies the consolidation method that turned ~100 open issues into 6 plan-masters during the v13.0.1 cycle. Three modes: cluster pass, triage, bundle.
+
+### Fixes
+
+- **fix(mcp): drop duplicate root `.mcp.json`** (#2411) — Claude Code's `/doctor` was warning "MCP server mcp-search skipped — same command/URL as already-configured mcp-search" for every plugin user. The root copy was vestigial; the plugin's namespaced registration now wins.
+- **fix: stop Codex transcript replay after hooks migration** (#2365) — disables the default `~/.codex/sessions/**/*.jsonl` watch (native Codex hooks are now authoritative). Repairs `~/.codex/config.toml` to set `[features] hooks = true` and `[plugins."claude-mem@claude-mem-local"] enabled = true` directly. Fixes transcript replay where files discovered after startup ignored `startAtEnd` and re-injected history.
+
+Opt back into legacy Codex transcript ingestion with `CLAUDE_MEM_CODEX_TRANSCRIPT_INGESTION=true` if you depend on the JSONL watcher.
+
 ## [13.2.0] - 2026-05-12

 ## What's new
@@ -3,7 +3,7 @@
  "name": "Claude-Mem (Persistent Memory)",
  "description": "OpenClaw plugin for Claude-Mem. Records observations from embedded runner sessions and streams them to messaging channels.",
  "kind": "memory",
-  "version": "13.2.0",
+  "version": "13.3.0",
  "license": "Apache-2.0",
  "author": "thedotmack",
  "homepage": "https://claude-mem.ai",
@@ -1,6 +1,6 @@
 {
  "name": "claude-mem",
-  "version": "13.2.0",
+  "version": "13.3.0",
  "description": "Memory compression system for Claude Code - persist context across sessions",
  "keywords": [
    "claude",
@@ -44,7 +44,6 @@
    "dist",
    ".agents/plugins/marketplace.json",
    ".codex-plugin",
-    ".mcp.json",
    "plugin/.claude-plugin",
    "plugin/.codex-plugin",
    "plugin/.mcp.json",
@@ -1,6 +1,6 @@
 {
  "name": "claude-mem",
-  "version": "13.2.0",
+  "version": "13.3.0",
  "description": "Memory compression system for Claude Code - persist context across sessions",
  "author": {
    "name": "Alex Newman"
@@ -1,6 +1,6 @@
 {
  "name": "claude-mem",
-  "version": "13.2.0",
+  "version": "13.3.0",
  "description": "Memory compression system for Claude Code - persist context across sessions",
  "author": {
    "name": "Alex Newman",
@@ -1,6 +1,6 @@
 {
  "name": "claude-mem-plugin",
-  "version": "13.2.0",
+  "version": "13.3.0",
  "private": true,
  "description": "Runtime dependencies for claude-mem bundled hooks",
  "type": "module",
@@ -0,0 +1,312 @@
+---
+name: design-is
+description: Audit a design against Dieter Rams' ten "Good design is..." principles, then hand off a /make-plan prompt for one of three outcomes — new design, refine design, or redesign. Use when the user says "audit this design", "design review", "check this UI against Rams", "is this UI good", "critique this design", "design audit", or asks for a critique that should lead to a plan.
+---
+
+# Design Is
+
+## Do not use for
+
+- Routine UI code reviews → use `/review`
+- Pure copy edits → use a separate copy pass
+- Pre-design ideation with no artifact yet → start with `/make-plan` directly
+
+You are an ORCHESTRATOR. Audit a design against Dieter Rams' ten principles, score each principle with evidence, decide the outcome verdict (NEW / REFINE / REDESIGN), and hand off to `/make-plan` with a ready-to-run prompt.
+
+You do not write implementation code. You produce: evidence-cited scores, a verdict, and a `/make-plan` handoff prompt.
+
+## The Ten Principles (Dieter Rams)
+
+Audit each principle in this exact order. Each gets a score 0–3 and ≥1 piece of evidence (`file:line`, screenshot region, copy excerpt, or measured value).
+
+1. **Good design is innovative** — Does it advance the form, or imitate? Innovation rides on technology; never an end in itself.
+2. **Good design makes a product useful** — Does it serve the primary task? Emphasizes usefulness; disregards anything that detracts.
+3. **Good design is aesthetic** — Is it beautiful? Only well-executed objects can be beautiful; aesthetic quality affects well-being.
+4. **Good design makes a product understandable** — Does the structure clarify function? Or is it self-explanatory at best?
+5. **Good design is unobtrusive** — Does it stay out of the way? Neither decorative objects nor works of art — leave room for self-expression.
+6. **Good design is honest** — Does it claim only what it is? No false promises, no manipulation, no inflated value.
+7. **Good design is long-lasting** — Will it age well? Avoids being fashionable; never appears antiquated.
+8. **Good design is thorough down to the last detail** — Are edges, empty states, errors, focus rings, motion curves all considered? Care and accuracy express respect for the user.
+9. **Good design is environmentally friendly** — Does it conserve resources? Minimizes pollution — in software: bundle weight, energy, attention, cognitive load.
+10. **Good design is as little design as possible** — Less, but better. Concentrates on essentials; back to purity, back to simplicity.
+
+> The user wrote "Dieter Braun" — they mean Dieter Rams. Don't correct them inline; just use the right principles.
+
+## Delegation Model
+
+Use subagents for *evidence gathering* (reading components, measuring contrast, counting elements, inspecting tokens, screenshotting via agent-browser). Keep *scoring and verdict synthesis* with the orchestrator. Reject subagent reports that score without citing evidence and redeploy.
+
+### Subagent Reporting Contract (MANDATORY)
+
+Each evidence subagent response must include:
+1. Sources consulted — exact file paths and line ranges, or screenshot regions
+2. Concrete findings — what is present, what is missing, with quotes/values
+3. Per-principle facts (not opinions) — leave scoring to the orchestrator
+4. Known gaps — what could not be inspected and why
+
+## Output Artifacts
+
+All artifacts go in `DESIGN-IS-<YYYY-MM-DD>/` at repo root (or the project the user points at):
+
+- `00-scope.md` — what was audited (URL, component paths, screens), input materials
+- `01-evidence.md` — per-principle evidence collected by subagents
+- `02-scorecard.md` — per-principle 0–3 score with one-line justification + total
+- `03-verdict.md` — NEW / REFINE / REDESIGN with reasoning
+- `04-handoff-prompt.md` — copy-pasteable `/make-plan` prompt for the chosen outcome
+
+## Phases
+
+### Phase 0: Scope Lock (ALWAYS FIRST)
+
+Ask the user (or infer from the request) and write `00-scope.md`:
+- What is being audited? (live URL, repo path, Figma frame, component name)
+- Who is the primary user, and what is the primary task?
+- Constraints (brand, stack, deadline)
+- Reference designs or competitors, if any
+
+If the user is asking about a design that doesn't exist yet, skip Phases 1–2 and go straight to Phase 3 with verdict = **NEW**.
+
+### Phase 1: Evidence Gathering (FAN OUT)
+
+Deploy subagents in parallel. Each must return ONLY the required fields below — no prose paragraphs, no scoring.
+
+**1. Structural Evidence** subagent (always deploy)
+Required fields returned:
+- Total interactive-element count on audited surface
+- Max nesting depth of the primary component tree
+- Repeated-pattern count (same affordance appearing >1 place with the same purpose)
+- Dead-prop / unused-import count
+- File:line citations for every count
+
+**2. Visual Evidence** subagent (always deploy)
+Mode: if target is a reachable URL or running dev server → use the `agent-browser` skill for screenshots and computed-style inspection. If target is a static repo with no running instance → read source CSS / tokens / component files and report inferred facts only (mark these "INFERRED").
+Required fields returned:
+- Spacing scale observed (px array)
+- Type scale observed (px array)
+- Distinct color count (count of unique hex/oklch tokens actually rendered or referenced)
+- Lowest contrast ratio observed across primary text
+- States present checklist: empty / loading / error / success / focus / disabled — present or missing for each
+
+**3. Copy & Honesty** subagent (always deploy)
+Required fields returned:
+- List of every user-facing string with file:line
+- Flagged inflations (marketing superlatives without backing)
+- Flagged dark patterns (forced continuity, hidden cost, fake scarcity, confirmshaming)
+- Flagged jargon / unclear labels with proposed plain replacement
+- Label→behavior mismatches with file:line of both
+
+**4. Weight & Friction** subagent (always deploy)
+Required fields returned:
+- Initial JS bytes (number)
+- Network request count for primary view (number)
+- Time-to-interactive ms (number, measured or estimated with method noted)
+- Animation count on idle screen (number)
+- Notification / badge / modal count on initial load (number)
+
+**5. Accessibility Evidence** subagent (OPTIONAL — deploy only if target has a meaningful interactive UI surface; skip for static landing pages without interaction)
+Required fields returned:
+- WCAG contrast pass/fail per text token
+- Focus order list across primary controls
+- Keyboard reachability of every primary action (yes/no per action)
+- ARIA landmark count
+- Skip-link present (yes/no)
+
+**Principle → subagent mapping** (orchestrator uses this when scoring):
+
+| Principle | Fed by |
+|-----------|--------|
+| #1 innovative | orchestrator-only (judgment using all evidence) |
+| #2 useful | Structural, Accessibility |
+| #3 aesthetic | Visual |
+| #4 understandable | Structural, Copy & Honesty, Accessibility |
+| #5 unobtrusive | Structural, Visual |
+| #6 honest | Copy & Honesty |
+| #7 long-lasting | orchestrator-only (judgment using all evidence) |
+| #8 thorough | Visual |
+| #9 environmentally friendly | Weight & Friction |
+| #10 as little design as possible | Structural |
+
+The orchestrator writes `01-evidence.md` consolidating all subagent reports. Reject any finding without a source citation. Subagents are explicitly forbidden from scoring — only the orchestrator scores, using the rubric in Phase 2.
+
+### Phase 2: Scorecard (ORCHESTRATOR)
+
+The orchestrator scores each of the ten principles itself — do NOT delegate scoring.
+
+For each principle, write to `02-scorecard.md`:
+
+```
+N. Good design is <principle> — Score: X/3
+   Evidence: <one-line summary citing 01-evidence.md anchors>
+   Justification: <one sentence on why this score, not the one above or below>
+```
+
+Per-principle scoring anchors (apply verbatim — pick the level whose signal best matches the audited surface):
+
+#1 innovative — 3: introduces a pattern not seen in 5+ peer products and ships it with restraint. 2: refreshes an existing pattern with a clear improvement. 1: imitates competitors with minor variation. 0: copies a competitor's flow wholesale.
+#2 useful — 3: primary task completes in fewest possible steps; no decoy actions. 2: primary task completes but adjacent surface adds steps. 1: primary task requires unnecessary detours. 0: primary task is not directly supported on the screen audited.
+#3 aesthetic — 3: spacing/type/color obey a single visible system; no orphan styles. 2: ≤2 minor inconsistencies across audited surface. 1: 3–5 inconsistencies OR one jarring violation. 0: no visible system OR active visual noise.
+#4 understandable — 3: a first-time user names every primary control correctly. 2: 1 control needs a tooltip. 1: 2–3 controls unclear; jargon present. 0: primary action is not identifiable without help.
+#5 unobtrusive — 3: chrome recedes; content is the figure, UI the ground. 2: chrome visible but quiet. 1: decoration competes with content. 0: chrome dominates content.
+#6 honest — 3: every claim, badge, and label maps 1:1 to actual behavior. 2: ≤1 minor inflation (e.g. "powerful" once). 1: 2+ inflations OR one dark pattern. 0: any deceptive flow (forced continuity, hidden cost, fake scarcity).
+#7 long-lasting — 3: visual language has no dated trend markers; would read as current 3 years from now. 2: 1 dated marker. 1: 2–3 dated markers (skeuomorph residue, fad gradients, trend typography). 0: design reads as a specific year's trend.
+#8 thorough — 3: empty / loading / error / success / focus / disabled all present and considered. 2: 1 state missing or rough. 1: 2–3 states missing. 0: 4+ states missing or default-browser.
+#9 environmentally friendly — 3: initial JS <100KB, no idle animation, dark mode honored, prefers-reduced-motion respected. 2: <500KB, motion gated. 1: 500KB–2MB, motion always on. 0: >2MB OR autoplay video OR dark mode ignored.
+#10 as little design as possible — 3: every element earns its place; removing any one breaks the task. 2: ≤2 removable elements. 1: 3–5 removable elements. 0: page is dominated by decoration or duplicated affordances.
+
+Scoring rules:
+- **Tie-breaker rule**: When uncertain between two scores, pick the lower one. Convergence > generosity.
+- **Score worst, not mean**: When a principle has multiple representative instances on the audited surface, score the worst instance — not the average.
+- **No bonuses, no weights**: Scores stay 0–3 integer. Principles are equally weighted. Total is sum of ten scores, max 30.
+
+### Phase 3: Verdict (ORCHESTRATOR)
+
+Write `03-verdict.md` with one of three verdicts, chosen by these rules:
+
+- **NEW DESIGN** — No design exists yet, OR the existing artifact is a stub/wireframe with no real decisions to preserve.
+- **REFINE** — Total score ≥ 20 AND no individual principle scored 0. The bones are good; iterate.
+- **REDESIGN** — Total score < 20, OR any principle scored 0 on a load-bearing dimension (typically #2 useful, #4 understandable, or #6 honest). Start over from purpose.
+
+State the verdict in one sentence. Then list the 3–5 highest-leverage moves — each tied to a specific principle and evidence anchor. These become the spine of the next phase's plan.
+
+**Anti-patterns to reject in your own verdict:**
+- Recommending REFINE because the codebase is large (sunk cost is not a design principle)
+- Recommending REDESIGN because a single screen is ugly (scope it)
+- Recommending NEW when an honest REDESIGN is warranted (don't dodge the critique)
+
+### Phase 4: /make-plan Handoff
+
+Write `04-handoff-prompt.md` containing exactly ONE fenced `/make-plan` prompt matching the verdict. The prompt must be self-contained — the next session won't see this audit unless it's quoted in.
+
+Use the matching template below. Fill every `<bracket>`. Include the top 3–5 moves from Phase 3 verbatim, each with its evidence anchor.
+
+**Quote-in step (mandatory, applies to all three templates below):** Before emitting the handoff, replace EVERY `<bracket>` placeholder with concrete content from the audit. Inline the verdict paragraph from `03-verdict.md` and the top 3–5 moves verbatim into the template. Do NOT leave bare references like "see DESIGN-IS-.../03-verdict.md" — the next session won't have file access to the audit. The emitted handoff must be readable and actionable with zero external lookups.
+
+#### Template: NEW DESIGN
+
+````
+/make-plan Design <product/screen/component name> from scratch.
+
+Primary user: <who>
+Primary task: <one sentence>
+Constraints: <brand, stack, deadline, accessibility floor>
+
+Non-goals (do not design these now):
+- <explicit out-of-scope item 1>
+- <explicit out-of-scope item 2>
+- <explicit out-of-scope item 3>
+
+Reference principles to optimize for, in order:
+1. Useful (#2) — <what useful looks like here>
+2. Understandable (#4) — <what clarity looks like here>
+3. As little design as possible (#10) — <what restraint looks like here>
+
+Deliverables for the plan:
+- Information architecture (one screen map or component tree)
+- Primary flow wireframe (low-fi, labeled)
+- Token decisions (type scale, spacing scale, color count cap)
+- States checklist (empty, loading, error, success, focus, disabled)
+- Honesty audit on every user-facing string before ship
+
+Anti-patterns to guard against (specific to NEW):
+- Decoration without function
+- Novel interactions without precedent
+- Copy that overpromises
+- Designing for screens the Non-goals list excluded
+````
+
+#### Template: REFINE DESIGN
+
+````
+/make-plan Refine <product/screen/component name> based on a Dieter Rams audit (total <X>/30).
+
+Verdict paragraph (quoted from 03-verdict.md):
+> <paste the one-sentence verdict here>
+
+Keep (already strong, do NOT touch in this pass):
+- Principle #<N> (<name>) scored 3 — Evidence: <file:line or anchor>. Regression check: <what to grep / re-test to confirm it still scores 3 after the refine>.
+- <repeat for every principle that scored 3>
+
+Fix in priority order (top 3–5 moves from the audit, verbatim):
+1. <Principle # — short name>: <specific move>. Evidence: <file:line or anchor>.
+2. <Principle # — short name>: <specific move>. Evidence: <file:line or anchor>.
+3. <Principle # — short name>: <specific move>. Evidence: <file:line or anchor>.
+4. <optional 4th>
+5. <optional 5th>
+
+Out of scope for this refine pass: <explicit list — what NOT to touch>
+
+Deliverables for the plan:
+- Per-fix: target files, exact change, verification step
+- Token/spec changes consolidated in one place
+- Regression checklist for every "Keep" item above
+
+Anti-patterns to guard against (specific to REFINE):
+- Adding new abstractions where a direct change suffices
+- Restyling areas that already scored 3
+- Scope creep into structural redesign (if structure must change, this should be REDESIGN, not REFINE)
+- Letting fixes mutate principles outside the priority list
+````
+
+#### Template: REDESIGN
+
+````
+/make-plan Redesign <product/screen/component name>. Current design failed audit at <X>/30 with critical gaps in principles <comma-separated list of 0-scored or 1-scored load-bearing principles>.
+
+Verdict paragraph (quoted from 03-verdict.md):
+> <paste the one-sentence verdict here>
+
+Why redesign and not refine: <one sentence — usually a load-bearing principle (#2, #4, or #6) scored 0, or total is below threshold>
+
+Preserve from current design (MUST be non-empty — at minimum, name the brand tokens):
+- <specific element 1, with file:line>
+- <specific element 2, with file:line>
+- (if structurally nothing survives, write: "Brand tokens only — color palette and logo. Discard everything else.")
+
+Discard (MUST be non-empty — name the structural patterns causing the failures):
+- <pattern 1>. Evidence: <file:line>. Caused failure on principle #<N>.
+- <pattern 2>. Evidence: <file:line>. Caused failure on principle #<N>.
+
+Top 3–5 moves from the audit (verbatim):
+1. <Principle # — short name>: <specific move>. Evidence: <file:line>.
+2. <Principle # — short name>: <specific move>. Evidence: <file:line>.
+3. <Principle # — short name>: <specific move>. Evidence: <file:line>.
+
+Redesign principles in priority order:
+1. <Principle # — name> — <what success looks like>
+2. <Principle # — name> — <what success looks like>
+3. <Principle # — name> — <what success looks like>
+
+Deliverables for the plan:
+- New information architecture (not derived from old)
+- New primary flow (low-fi, labeled, compared side-by-side to current)
+- States checklist (empty, loading, error, success, focus, disabled)
+- Migration path for users currently on the old design
+- Cutover criteria (when is the old design retired)
+
+Anti-patterns to guard against (specific to REDESIGN):
+- Porting old structure under new styling
+- Keeping both designs behind a flag indefinitely
+- Redesigning to follow a trend rather than the principles above
+- Treating the Preserve list as optional — it must be filled before this handoff is valid
+````
+
+## Key Principles (for the auditor)
+
+- **Evidence over taste** — every score cites a source; "feels wrong" is not a finding
+- **Score what is, not what was intended** — design is what ships, not what was drawn
+- **Honesty applies to the audit too** — if total is 28/30, say REFINE even if the user wanted a redesign; if it's 12/30, say REDESIGN even if the user wanted a refine
+- **One verdict, not three** — pick NEW or REFINE or REDESIGN; do not hedge
+- **Handoff, don't implement** — `design-is` ends at the `/make-plan` prompt; `/make-plan` and `/do` take it from there
+- **Verdict commitment** — Once `02-scorecard.md` is written, the verdict follows the Phase 3 rule mechanically. Never re-score to back into a preferred verdict; if the scorecard says REDESIGN, the handoff is REDESIGN.
+
+## Failure Modes to Prevent
+
+- Scoring from screenshots alone without reading the code — redeploy with structural subagent
+- Scoring the codebase instead of the design — re-anchor on user-facing evidence
+- Awarding 3s generously to soften the verdict — recalibrate against the per-principle anchors in Phase 2
+- Producing a handoff prompt that doesn't quote the verdict and top moves — the next session is blind without them
+- Skipping Phase 0 scope lock — auditing the wrong surface wastes Phase 1
+- **Sunk-cost reasoning** — recommending REFINE because the codebase is large; sunk cost is not a design principle
+- **Hedging across verdicts** — "could be REFINE or REDESIGN depending on..." — pick one
+- **Score inflation to match a desired verdict** — score the evidence, then read the verdict off the rule
+- **Letting Phase 0 user preference override Phase 3 evidence** — the user can disagree with the verdict, but the audit reports what the evidence says
@@ -61,3 +61,7 @@ The orchestrator consolidates findings into a single Phase 0 output.
 - Adding parameters not in documentation
 - Skipping verification steps
 - Assuming structure without checking examples
+
+## See Also
+
+- `oh-my-issues` — the issue-side sibling. When the plan you're being asked to make is rooted in a bug or feature backlog rather than a fresh idea, route through `oh-my-issues` first to cluster issues by root cause into plan masters and `plans/0X-*.md` design docs. `make-plan` then operates on the design doc for one plan slice.
@@ -0,0 +1,226 @@
+---
+name: oh-my-issues
+description: Cluster a GitHub issue backlog by root cause into a small set of plan-master issues, redirect children with a standardized comment, and bundle architectural-fix PRs that close clusters atomically. Use when an issue tracker has accumulated dozens of reports that share underlying defects, when asked to triage / consolidate / cluster / dedupe issues, when asked to build a plan series or roadmap from open issues, or when routing a new incoming bug into an existing plan.
+---
+
+# oh-my-issues
+
+Turn an issue backlog into a roadmap. Issues are symptom data, not units of work — the unit of work is the architectural defect that produces them. The end state is `open issues == open plans`, 1:1.
+
+## Core principle
+
+Stop closing issues one at a time. Group symptoms that share a single architectural fix into a cluster, give the cluster one canonical home (a plan-master issue + a `plans/0X-*.md` design doc), close every child with a standardized redirect, and ship one PR per cluster that closes all children atomically. New incoming bugs get appended to the matching master as a "Round N" comment, not opened as new tracked issues.
+
+This compounds three ways: architectural fixes retire whole symptom families, the plan's test matrix institutionalizes prevention in CI, and standardized triage makes residual inflow cheap.
+
+## When to use
+
+- The repo has 20+ open issues and many feel like duplicates or platform-specific symptoms of the same defect.
+- The user asks to "triage", "consolidate", "cluster", "dedupe", "group", or "make a plan from" the issue list.
+- A new bug is filed and the user wants to know whether it belongs to existing work.
+- The user wants to ship a focused PR that resolves a cluster of related issues.
+
+## When NOT to use
+
+- Fewer than ~15 open issues: just close them.
+- Issues are genuinely independent (no shared root causes): one fix per issue is correct.
+- The repo lacks `plans/` discipline and the user does not want to introduce one — propose first, do not impose.
+
+## Three modes
+
+### Mode 1: Cluster pass (initial reduction)
+
+Use when the backlog has never been consolidated. Goal: go from N issues to N_plans masters in one operation.
+
+1. **Read everything in full.** Fetch every open issue's body *and* its comment thread — not just titles. Surface-level grouping fails without full text, and reproduction steps, linked duplicates, and diagnostic output often live in comments rather than the original body. See "GitHub CLI primitives" below for the correct paginated listing + per-issue comment fetch (a single `gh issue list` call does **not** return comment bodies).
+2. **Cluster by root cause, not by surface.** The clustering question is *would one architectural change retire all of these?* — not *do these mention the same word?*. "Windows" is a surface; "spawn contract violated by host shells" is a root cause. Two issues with different surfaces can share a cluster (e.g. an env-var leak in two different code paths sharing one missing env-isolation boundary).
+3. **Name each cluster as an architectural problem.** Title format: `[plan-XX] <Architectural Defect> — <one-line scope>`. Example: `[plan-02] Spawn-Contract Templating — canonical ${CLAUDE_PLUGIN_ROOT} resolution across all hosts`. The title must imply a fix, not a topic.
+4. **Open one master issue per cluster** with a body that lists: the architectural defect, the children (by issue number), the fix sequence, and a required test matrix (host × IDE × shell, etc.) that prevents regression.
+5. **Mirror each master as `plans/0X-<slug>.md`** in the repo. The issue is the public tracker; the doc is the design. They reference each other.
+6. **Close every child** with the standardized redirect comment (see below) and state `not planned`.
+7. **Verify end state:** `gh issue list --state open` returns exactly the masters and nothing else.
+
+Target shape for ~100 issues: 4–8 masters. More than 10 means you're clustering by surface; fewer than 3 means clusters are too broad to ship as one PR each.
+
+### Mode 2: Triage (new incoming bug, steady state)
+
+Use when a new issue is filed after consolidation is in place. Goal: never let the issue list re-accumulate.
+
+1. **Read the new issue's body in full.**
+2. **Pattern-match the symptom against existing plan masters.** For each open master, ask: *would the fix described here also fix this new bug?* If yes → it belongs to that plan.
+3. **If a match exists**, post a "Round N" comment on the master that:
+   - Names the new child by number
+   - Describes the symptom in one line
+   - Sketches the concrete fix (1–3 lines, e.g. "guard with `case "$_SH" in /*.exe|"") _SH=bash ;; esac`")
+   - Adds any new test-matrix cell the bug exposes
+4. **Close the child** with the standardized redirect comment, `not planned`.
+5. **If no match exists** and the bug is genuinely novel: open a new plan master + `plans/0X-*.md`. Resist this. Most bugs are children of existing plans.
+
+### Mode 3: Bundle (ship the cluster)
+
+Use when a plan slice is ready to ship. Goal: one PR closes N children atomically.
+
+1. **List the master's children.** From the master body and consolidation comments, collect every child issue number routed to this plan.
+2. **Verify each child's symptom is covered** by the architectural fix in the PR. If a child is not covered, the PR is not ready or that child belongs in a different plan.
+3. **Generate the PR description**: title is the plan slice (e.g. "fix(spawn): canonical ${CLAUDE_PLUGIN_ROOT} resolution"); body lists every child with `Closes #N` so GitHub auto-closes them on merge.
+4. **Add the test matrix from the plan** to CI in the same PR. Without the matrix, the cluster will re-emerge.
+5. **After merge**, the master issue can be closed only if every child was covered. If the plan has remaining scope, leave the master open and link the PR as a partial-shipping checkpoint.
+
+## Naming a plan master
+
+A plan-master title must imply its fix.
+
+| Bad (surface) | Good (architectural) |
+|---|---|
+| Windows bugs | Spawn-Contract Templating across hosts |
+| Worker crashes | Worker / Daemon Lifecycle Hardening — supervision, health, retry |
+| Auth issues | Worker Env Isolation — strip host CLI env from the SDK subprocess |
+| Install failures | Installer Failure Transparency — cross-IDE error taxonomy + 12×4 test matrix |
+
+If you cannot write a one-line architectural scope, the cluster is wrong.
+
+## The standardized redirect comment
+
+Use this exact phrasing on every child closure. Consistency lets contributors recognize the pattern at a glance and keeps the audit trail searchable.
+
+```text
+Consolidating into #<MASTER> (plan-XX). The root cause and fix sequencing are tracked there alongside the rest of the cluster — please follow that issue for progress.
+```
+
+Close as `not planned` (not `completed`) — the child was a symptom, not a unit of work.
+
+## GitHub CLI primitives
+
+Resolve repo:
+
+```bash
+repo_json=$(gh repo view --json owner,name)
+owner=$(jq -r '.owner.login // .owner.name' <<<"$repo_json")
+repo=$(jq -r '.name' <<<"$repo_json")
+```
+
+List all open issues (the read-everything pass). Two gotchas:
+- `gh issue list --json comments` returns only a count placeholder, not the comment bodies. You must fetch comments per issue with `gh issue view <N> --json comments`.
+- Any explicit `--limit` silently truncates if the backlog is larger. Always check the total open count first.
+
+```bash
+# 1. Confirm total — never trust an arbitrary --limit.
+# Note: GitHub's REST API treats PRs as issues, so .open_issues_count
+# from /repos/{owner}/{repo} is actually issues + PRs. Use the search
+# API to get the issue-only count.
+total=$(gh api "search/issues?q=repo:$owner/$repo+is:issue+is:open" --jq '.total_count')
+echo "Open issues: $total"
+
+# 2. List bodies (set --limit at or above the true total)
+gh issue list --state open --limit "$total" \
+  --json number,title,body,labels,author,createdAt
+
+# 3. For each issue, fetch its full comment thread
+for n in $(gh issue list --state open --limit "$total" --json number --jq '.[].number'); do
+  echo "=== Issue #$n ==="
+  gh issue view "$n" --json comments \
+    --jq '.comments[] | "\(.author.login) (\(.createdAt)): \(.body)"'
+done
+```
+
+If `total > 1000`, paginate via the REST API: `gh api "repos/$owner/$repo/issues?state=open&per_page=100&page=N"` looped until the result array is empty (note this includes PRs, so filter `select(.pull_request|not)`).
+
+Open a plan master:
+
+```bash
+gh issue create \
+  --title "[plan-02] Spawn-Contract Templating — canonical \${CLAUDE_PLUGIN_ROOT} resolution across all hosts" \
+  --body-file plans/02-spawn-contract-templating.md \
+  --label plan,plan-02
+```
+
+Post the consolidation comment + close the child:
+
+```bash
+gh issue comment <CHILD> --body "Consolidating into #<MASTER> (plan-XX). The root cause and fix sequencing are tracked there alongside the rest of the cluster — please follow that issue for progress."
+gh issue close <CHILD> --reason "not planned"
+```
+
+Append a "Round N" triage comment to a master:
+
+```bash
+gh issue comment <MASTER> --body "$(cat <<'EOF'
+**Round N consolidation**
+
+- #<CHILD> (<one-line symptom>) folded into this plan as <classification>.
+
+Proposed fix: <1–3 line sketch>.
+
+Adds matrix cell: <host/IDE/shell combination>.
+EOF
+)"
+```
+
+Verify final state:
+
+```bash
+gh issue list --state open --json number,title \
+  | jq -r '.[] | "\(.number)\t\(.title)"'
+```
+
+Output should be exactly the plan masters.
+
+## Plan master body template
+
+Save as `plans/0X-<slug>.md` and use as `--body-file` for the master issue.
+
+```markdown
+# [plan-XX] <Architectural Defect> — <one-line scope>
+
+## Defect
+
+<One paragraph: what is structurally broken, why it produces the observed family of symptoms.>
+
+## Children
+
+- #N — <symptom one-liner>
+- #N — <symptom one-liner>
+- ...
+
+## Fix sequence
+
+1. <First architectural change — bounded, reviewable>
+2. <Second>
+3. ...
+
+## Test matrix
+
+| Axis A | Axis B | Required behavior |
+|---|---|---|
+| ... | ... | ... |
+
+The matrix lives in CI. A future regression must fail CI before a user can file.
+
+## Out of scope
+
+<What this plan deliberately does not cover, with pointers to other plan masters.>
+```
+
+## Health checks
+
+Run periodically against the plan masters to catch the failure modes.
+
+- **Graveyard master:** master issue has accumulated 5+ "Round N" comments without a shipping PR. The plan needs a forcing PR or it must be split.
+- **Over-broad master:** the children's fixes cannot fit one PR. Split into two plans with narrower scope.
+- **Surface-clustered master:** the children share a topic but not a fix. Re-cluster by root cause; some children belong to different plans.
+- **Drift between issue and doc:** the plan master body and `plans/0X-*.md` disagree. Pick one as canonical (the doc) and regenerate the issue body from it.
+
+## Stop conditions
+
+For a cluster pass: stop when `gh issue list --state open` returns exactly the masters.
+
+For a triage: stop when the new child is closed and the master has a Round-N entry.
+
+For a bundle: stop when the PR is merged and every listed child is auto-closed by `Closes #N`.
+
+## Failure modes worth refusing
+
+- **Premature clustering** before reading every issue body in full. Don't.
+- **Closing children before the master is open.** Children must always have a redirect target.
+- **Using the redirect comment for issues that aren't symptoms** (e.g. genuine feature requests with no shared root cause). Those stay open or get their own track.
+- **Closing a master before every listed child is shipped.** The master is the contract; closing it early breaks the audit trail.
@@ -0,0 +1,262 @@
+---
+name: weekly-digests
+description: Generate a serial week-by-week narrative digest of a project's full claude-mem timeline. Splits the timeline into per-ISO-week files, then runs one consecutive subagent per week — each receiving the prior week's carry-forward block — to produce one chapter per ISO week of data. Use when asked for "weekly digests", "week-by-week story", "serial timeline", or "narrative chapters" of a project's history.
+---
+
+# Weekly Digests
+
+Produce a serial, multi-chapter narrative digest of a project's complete claude-mem history. Differs from `timeline-report` (one long report) — this generates one digest *per ISO week*, with each subagent reading the prior week's carry-forward block so the story stays coherent.
+
+**The chapter count equals the number of ISO weeks the timeline covers.** A project with 2 weeks of data produces 2 chapters; one with 30 weeks produces 30. There is no fixed length — count the weeks first, then drive the pipeline off that count.
+
+## When to Use
+
+Trigger when the user asks for:
+
+- "Weekly digests"
+- "Week-by-week story"
+- "Serial timeline"
+- "Story chapters of [project]"
+- "Run a digest for each week"
+- "Continue the story week by week"
+
+If the user wants a single sweeping report, use `timeline-report` instead. This skill is for serial chapter format.
+
+## Prerequisites
+
+- claude-mem worker running
+- Project has at least one ISO week of observations (the pipeline degenerates gracefully — even N=1 works)
+- A clean output directory the user is comfortable writing into
+
+**Resolve the worker port** (do this once, reuse `$WORKER_PORT`):
+
+```bash
+WORKER_PORT="${CLAUDE_MEM_WORKER_PORT:-$(node -e "const fs=require('fs'),p=require('path'),os=require('os');const uid=(typeof process.getuid==='function'?process.getuid():77);const fallback=String(37700+(uid%100));try{const s=JSON.parse(fs.readFileSync(p.join(os.homedir(),'.claude-mem','settings.json'),'utf-8'));process.stdout.write(String(s.CLAUDE_MEM_WORKER_PORT||fallback));}catch{process.stdout.write(fallback);}" 2>/dev/null)}"
+```
+
+## Workflow
+
+### Step 1: Determine the Project Name
+
+Same worktree-detection pattern as `timeline-report`. In a worktree, the data source is the **parent project**:
+
+```bash
+git_dir=$(git rev-parse --git-dir 2>/dev/null)
+git_common_dir=$(git rev-parse --git-common-dir 2>/dev/null)
+if [ "$git_dir" != "$git_common_dir" ]; then
+  parent_project=$(basename "$(dirname "$git_common_dir")")
+else
+  parent_project=$(basename "$PWD")
+fi
+echo "$parent_project"
+```
+
+### Step 2: Fetch the Full Timeline and Save It
+
+```bash
+mkdir -p .scratch
+curl -s "http://localhost:${WORKER_PORT}/api/context/inject?project=PROJECT_NAME&full=true" \
+  > .scratch/cm-timeline.md
+wc -l .scratch/cm-timeline.md
+```
+
+Sanity-check: confirm the file is non-empty and has the expected structure (preamble, then date headers like `### Mon DD, YYYY`, then numeric observation lines `<id> <time> <emoji> <title>` and session boundary lines `S<n> <prompt> (Mon DD at HH:MMpm)`).
+
+### Step 3: Split the Timeline Into Per-ISO-Week Files
+
+Write a Python script to `.scratch/split-timeline.py` that:
+
+1. Parses date headers (`### Mon DD, YYYY`).
+2. Groups days into ISO weeks via `date.isocalendar()` (Monday-start).
+3. Emits one file per week to `docs/timeline-weeks/<YYYY>-W<NN>-<MonDD>-to-<MonDD>.md`, preserving each day's section verbatim.
+4. Runs a dual-pass sanity check: total observations distributed must equal the count in the source file.
+
+Output structure (filenames illustrative):
+
+```
+docs/timeline-weeks/
+  README.md                       # weekly index table
+  YYYY-W<NN>-MonDD-to-MonDD.md    # one per ISO week the timeline covers
+  ...
+```
+
+Each weekly file should preserve the original daily sections verbatim. Do not paraphrase at this stage — the digest agents need raw fidelity.
+
+**Count the resulting files** before launching the pipeline. That count is `TOTAL` and drives every subsequent step. Empty weeks (zero observations between active weeks) should be skipped — the pipeline only operates on weeks that have content.
+
+### Step 4: Build the Weekly Index README
+
+Write `docs/timeline-weeks/README.md` with a markdown table: Week | Dates | Observations | Sessions | File. This becomes the operator's roadmap and helps the agents understand pacing (peak weeks vs trough weeks).
+
+### Step 5: Run the Consecutive Subagent Pipeline
+
+**Critical: subagents run sequentially, NOT in parallel.** Each agent receives the prior agent's carry-forward block. This is the entire point of the skill — without it you have N disjoint summaries; with it you have an N-chapter serial narrative.
+
+Create the output directory:
+
+```bash
+mkdir -p docs/timeline-weeks/digests
+```
+
+For each week, in chronological order, dispatch a Task subagent (general-purpose) with this prompt template. **Wait for each agent to complete before launching the next.** Capture the carry-forward block from the result and inject it as `STORY_SO_FAR` into the next prompt.
+
+#### Subagent Prompt Template
+
+```
+You are writing chapter {N} of {TOTAL} in a serial week-by-week digest of the {PROJECT} project's development history. Chapters 1 through {N-1} are written. {SPECIAL_NOTE: e.g. "This is the LARGEST week", "This is the TROUGH", "This is the FINAL chapter", "This is the ONLY chapter — both first AND final week"}.
+
+**Source file (read in full):**
+{ABSOLUTE_PATH_TO_WEEK_FILE}
+
+**Output digest file (write):**
+{ABSOLUTE_PATH_TO_DIGEST_FILE}
+
+**Format key for the source file:**
+- Numeric lines like `1 7:59p 🔵 Save hook file is empty` are observations (ID, time, type-emoji, title)
+- `S##` lines are session boundaries (the user prompt that started the session)
+- Emoji legend: 🎯session 🔴bugfix 🟣feature 🔄refactor ✅change 🔵discovery ⚖️decision 🚨security_alert 🔐security_note
+
+**Story so far (carry-forward from Week {N-1}):**
+
+{STORY_SO_FAR_BLOCK_OR_EMPTY_FOR_WEEK_1}
+
+**Your digest must include:**
+1. **Title line** — `# Week {N} ({WEEK_LABEL}): {DATE_RANGE} — [your chosen subtitle]`
+2. **One-line tagline** — what this week is about, in plain English
+3. **Narrative section** ({BUDGET}) — tell the story. Resolve threads from prior weeks where the data shows resolution. Introduce new arcs. Use specific observation details.
+4. **Threads continued / opened / resolved** sections
+5. **Cliffhanger / What's next**
+6. **Carry-forward block** at the very bottom, fenced as ```carry-forward ... ``` — structured handoff for the next week's agent.
+
+**CARRY-FORWARD DISCIPLINE:**
+- Cap at ~350 words.
+- AGGRESSIVELY PRUNE: drop arcs that didn't surface this week unless they're actively unresolved cliffhangers.
+- Drop cast members absent 2+ weeks unless load-bearing for the long arc.
+- Quality over completeness. The next agent inherits what you mention; mention judiciously.
+
+Required carry-forward sub-sections:
+- **Active arcs** — ongoing themes/projects the next agent should watch for
+- **Cast** — notable named systems/people/tools (continuing + new)
+- **Unresolved** — open questions or unfinished work
+- **Tone notes** — how the story is being told (voice, perspective, register evolution)
+
+**Tone rules:**
+- Third-person narrator, sharp, observational. Not twee.
+- AI is "Claude"; human is "{USER_FIRST_NAME}".
+- Treat codebase components as characters — whatever the project's recurring named systems are (e.g. a worker, a queue, a process manager, a recurring bug, a flaky migration). Don't import names from another project; use what shows up in this project's observations.
+- Don't manufacture drama. Name what's there.
+- Track the user's prompt-register evolution week by week (frustration markers, escalation language, shifts in tone).
+- Note meta-recursion if the project is reflexive about its own behavior (e.g. a tool that documents its own work, an AI agent debugging itself, a system that catches its own regressions).
+- Watch for new villains or co-stars and name them.
+- For trough/silent weeks: silence IS the story. Don't pad. Name what didn't happen.
+- For surge weeks (>2,000 obs): pick 4-7 spine arcs and tell them well. Don't catalog.
+
+**Important:** Do NOT speculate beyond what's in the source file.
+
+After writing the file, return:
+1. Path of the file you wrote
+2. The carry-forward block verbatim
+3. One-sentence summary of the week
+```
+
+#### Narrative Budget by Observation Count
+
+Scale narrative length proportionally to the week's volume:
+
+| Obs count | Narrative section budget |
+| --- | --- |
+| < 100 | 200–400 words |
+| 100–500 | 300–600 words |
+| 500–1,500 | 500–900 words |
+| 1,500–3,000 | 700–1,100 words |
+| 3,000+ | 800–1,300 words |
+
+Pad these into the `{BUDGET}` slot of the prompt for each week.
+
+#### The First Week
+
+For Week 1, pass an empty `STORY_SO_FAR_BLOCK` and an instruction noting it's the origin chapter — the agent should establish initial cast, tone, and arcs for everyone after.
+
+#### The Final Week
+
+The final week gets a different ending: **no carry-forward block**. Instead, instruct the agent to write a `## Where We Are` section (~250 words) naming what's still open at the moment of writing. Tell the agent the project is ongoing — the digest stops; the story doesn't. Don't give the story a false ending.
+
+#### When N = 1 (single-week project)
+
+Apply BOTH treatments to the same chapter: empty `STORY_SO_FAR_BLOCK` AND `## Where We Are` instead of a carry-forward block. The agent is writing both the origin and the close in one pass. Don't reference prior or future chapters that don't exist.
+
+### Step 6: Rename Files for Sortable Order
+
+The agents write digests with names like `YYYY-W<NN>-digest.md`. These already sort chronologically by ISO week (until a project crosses a year boundary inside one project name), but **add a zero-padded numeric prefix** so the order is unambiguous to humans browsing or scripting against the directory:
+
+```bash
+cd docs/timeline-weeks/digests
+total=$(ls *.md | wc -l | tr -d ' ')
+width=${#total}                  # 1 for N<10, 2 for N<100, 3 for N<1000
+[ "$width" -lt 2 ] && width=2    # always pad to at least 2 for readability
+i=0
+for f in *.md; do
+  printf -v prefix "%0${width}d" $i
+  mv "$f" "${prefix}-$f"
+  i=$((i+1))
+done
+```
+
+Result for N=30: `00-...md` through `29-...md`. For N=4: `00-...md` through `03-...md`. For N=120: `000-...md` through `119-...md`. **Always zero-pad** — `1-...md` and `10-...md` sort wrong without it.
+
+Do NOT also prepend the order number to the digest title line inside each file. The filename prefix is for sorting; the title stays clean: `# Week N (W##): Date — Subtitle`.
+
+### Step 7: Report Completion
+
+Tell the user:
+- Total weeks digested (N)
+- Output directory path
+- Date range covered
+- Any silent/trough weeks worth flagging
+- A one-sentence capstone summarizing the arc — written by the final-chapter agent, or composed by the operator from the final agent's `## Where We Are` section.
+
+## Pipeline Discipline
+
+These rules emerged from running the pipeline end-to-end. Encode them every time:
+
+1. **Sequential, not parallel.** The whole point is the carry-forward chain. Parallelism breaks it.
+2. **Carry-forward is bounded.** It will bloat without active pruning. Tell every agent: cap ~350 words, drop dormant arcs, drop absent cast.
+3. **Track register evolution explicitly.** The user's prompt-style across weeks is a story arc. Frustration markers shift over time (whatever they happen to be in this project's data). Name the shifts.
+4. **Treat components as characters.** Whatever recurring named systems show up in the observations are this project's villains and co-stars. Stable cast across weeks builds narrative coherence.
+5. **Honor silence.** Trough weeks (10–100 obs) are real chapters. Name what didn't happen. Don't pad.
+6. **Don't manufacture drama.** Just observe the data. If the project is reflexive, the recursion is the drama; you don't need to add more.
+7. **Final week: no false ending.** The digest stops; the project doesn't. Write `## Where We Are`, not "the end."
+
+## Error Handling
+
+- **Empty timeline**: project name wrong, or worker not running. `curl -s "http://localhost:${WORKER_PORT}/api/search?query=*&limit=1"` to verify.
+- **Worker not running**: start it via your usual method or check `ps aux | grep worker-service`.
+- **Subagent returns malformed carry-forward**: extract the carry-forward block by regex (` ```carry-forward ... ``` `) and pass forward verbatim. If missing, ask the agent to retry with the explicit instruction "your reply MUST include the carry-forward block fenced as ```carry-forward ... ``` at the very end."
+- **One agent fails mid-pipeline**: retry that week with the same carry-forward. Don't skip — the chain breaks.
+- **Carry-forward growing past ~500 words**: tighten the discipline instruction in subsequent prompts. Force pruning explicitly.
+
+## Examples
+
+### Long-running project (~30 weeks)
+
+User: "Make weekly digests for [project] from beginning to end"
+
+1. Resolve worker port, detect project name.
+2. Fetch full timeline → `.scratch/cm-timeline.md`.
+3. Run `.scratch/split-timeline.py` → N weekly files in `docs/timeline-weeks/` (e.g. 30).
+4. Generate `docs/timeline-weeks/README.md` index.
+5. Launch N subagents consecutively, one per week. Each gets the prior week's carry-forward. The first chapter starts with empty carry-forward; the final chapter writes `## Where We Are` instead of a carry-forward block.
+6. Rename digests with zero-padded order prefix (`00-...md` through `29-...md`).
+7. Report total chapters, date range, any troughs/peaks, and the one-line capstone the final agent produced.
+
+### Short-lived project (~3 weeks)
+
+Same flow, just smaller. N=3, so:
+- Chapter 1: empty carry-forward, establish cast/tone/arcs.
+- Chapter 2: receives chapter 1's carry-forward, builds on it.
+- Chapter 3: receives chapter 2's carry-forward, BUT gets the final-chapter treatment (`## Where We Are` instead of carry-forward block).
+- Filenames: `00-...md`, `01-...md`, `02-...md`.
+
+### Single-week project (N=1)
+
+Apply both first-and-final-chapter treatment to the only chapter: empty carry-forward, `## Where We Are` close, no inter-chapter references. Filename: `00-...md`.
@@ -414,7 +414,6 @@ async function buildHooks() {
      'plugin/.codex-plugin/plugin.json',
      'plugin/.mcp.json',
      '.codex-plugin/plugin.json',
-      '.mcp.json',
      '.agents/plugins/marketplace.json',
    ];
    for (const filePath of requiredDistributionFiles) {
@@ -433,11 +432,7 @@ async function buildHooks() {
    if (claudeMemMarketplaceEntry?.source?.path !== './plugin') {
      throw new Error('.agents/plugins/marketplace.json must point claude-mem source.path at ./plugin so Codex loads the bundled plugin root');
    }
-    const rootMcp = JSON.parse(fs.readFileSync('.mcp.json', 'utf-8'));
    const bundledMcp = JSON.parse(fs.readFileSync('plugin/.mcp.json', 'utf-8'));
-    if (JSON.stringify(rootMcp.mcpServers?.['mcp-search']) !== JSON.stringify(bundledMcp.mcpServers?.['mcp-search'])) {
-      throw new Error('.mcp.json and plugin/.mcp.json mcp-search launchers must stay in sync');
-    }
    const mcpSearchCommand = bundledMcp.mcpServers?.['mcp-search']?.args?.join(' ') ?? '';
    if (!mcpSearchCommand.includes('.codex/plugins/cache/claude-mem-local/claude-mem')) {
      throw new Error('plugin/.mcp.json mcp-search launcher must include Codex cache fallback for hosts that do not inject PLUGIN_ROOT');
@@ -34,9 +34,7 @@ function getGitignoreExcludes(basePath) {
  const gitignorePath = path.join(basePath, '.gitignore');
  if (!existsSync(gitignorePath)) return '';

-  const syncManagedFiles = new Set([
-    '.mcp.json',
-  ]);
+  const syncManagedFiles = new Set();

  const lines = readFileSync(gitignorePath, 'utf-8').split('\n');
  return lines
@@ -527,7 +527,6 @@ function copyPluginToMarketplace(): void {
  const allowedTopLevelEntries = [
    '.agents',
    '.codex-plugin',
-    '.mcp.json',
    'plugin',
    'package.json',
    'package-lock.json',
@@ -1,7 +1,7 @@
 import path from 'path';
 import { homedir } from 'os';
 import { execFileSync, spawnSync } from 'child_process';
-import { existsSync, readFileSync, writeFileSync } from 'fs';
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
 import { fileURLToPath } from 'url';
 import { logger } from '../../utils/logger.js';
 import { paths } from '../../shared/paths.js';
@@ -9,7 +9,10 @@ import { paths } from '../../shared/paths.js';
 const CODEX_DIR = path.join(homedir(), '.codex');
 const CODEX_AGENTS_MD_PATH = path.join(CODEX_DIR, 'AGENTS.md');
 const CODEX_TRANSCRIPT_WATCH_CONFIG_PATH = paths.transcriptsConfig();
+const CODEX_CONFIG_PATH = path.join(CODEX_DIR, 'config.toml');
 const MARKETPLACE_NAME = 'claude-mem-local';
+const CODEX_PLUGIN_ID = `claude-mem@${MARKETPLACE_NAME}`;
+const LEGACY_CODEX_PLUGIN_IDS = ['claude-mem@thedotmack'];
 const MIN_CODEX_MARKETPLACE_VERSION = '0.128.0';
 const REQUIRED_MARKETPLACE_FILES = [
  path.join('.agents', 'plugins', 'marketplace.json'),
@@ -131,6 +134,74 @@ function registerCodexMarketplace(marketplaceRoot: string): void {
  runCodex(['plugin', 'marketplace', 'add', marketplaceRoot]);
 }

+export function setTomlBooleanInTable(content: string, header: string, key: string, enabled: boolean): string {
+  const booleanLine = `${key} = ${enabled ? 'true' : 'false'}`;
+  const lines = content.split('\n');
+  const headerIndex = lines.findIndex((line) => line.trim() === header);
+
+  if (headerIndex === -1) {
+    const trimmed = content.trimEnd();
+    return `${trimmed}${trimmed ? '\n\n' : ''}${header}\n${booleanLine}\n`;
+  }
+
+  let sectionEnd = headerIndex + 1;
+  while (sectionEnd < lines.length && !/^\s*\[/.test(lines[sectionEnd])) {
+    sectionEnd += 1;
+  }
+
+  const escapedKey = key.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+  const keyPattern = new RegExp(`^\\s*${escapedKey}\\s*=`);
+  const keyIndex = lines.findIndex(
+    (line, index) => index > headerIndex && index < sectionEnd && keyPattern.test(line),
+  );
+
+  if (keyIndex === -1) {
+    lines.splice(headerIndex + 1, 0, booleanLine);
+  } else {
+    lines[keyIndex] = booleanLine;
+  }
+
+  return lines.join('\n');
+}
+
+export function setTomlPluginEnabled(content: string, pluginId: string, enabled: boolean): string {
+  const escapedPluginId = pluginId.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
+  return setTomlBooleanInTable(content, `[plugins."${escapedPluginId}"]`, 'enabled', enabled);
+}
+
+export function setTomlFeatureEnabled(content: string, featureName: string, enabled: boolean): string {
+  return setTomlBooleanInTable(content, '[features]', featureName, enabled);
+}
+
+function writeCodexPluginConfig(enabled: boolean): boolean {
+  if (!enabled && !existsSync(CODEX_CONFIG_PATH)) return false;
+  mkdirSync(CODEX_DIR, { recursive: true });
+  const current = existsSync(CODEX_CONFIG_PATH) ? readFileSync(CODEX_CONFIG_PATH, 'utf-8') : '';
+  let next = current;
+
+  if (enabled) {
+    next = setTomlFeatureEnabled(next, 'hooks', true);
+  }
+  for (const legacyPluginId of LEGACY_CODEX_PLUGIN_IDS) {
+    next = setTomlPluginEnabled(next, legacyPluginId, false);
+  }
+  next = setTomlPluginEnabled(next, CODEX_PLUGIN_ID, enabled);
+
+  if (next === current) return false;
+  writeFileSync(CODEX_CONFIG_PATH, next);
+  return true;
+}
+
+function enableCodexPluginConfig(): void {
+  const changed = writeCodexPluginConfig(true);
+  console.log(`  Enabled Codex plugin: ${CODEX_PLUGIN_ID}${changed ? '' : ' (already enabled)'}`);
+}
+
+function disableCodexPluginConfig(): void {
+  const changed = writeCodexPluginConfig(false);
+  console.log(`  Disabled Codex plugin: ${CODEX_PLUGIN_ID}${changed ? '' : ' (already disabled)'}`);
+}
+
 function parseSemver(value: string): [number, number, number] | null {
  const match = value.match(/(\d+)\.(\d+)\.(\d+)/);
  if (!match) return null;
@@ -284,16 +355,12 @@ export async function installCodexCli(marketplaceRootOverride?: string): Promise

    console.log(`  Registering Codex plugin marketplace: ${marketplaceRoot}`);
    registerCodexMarketplace(marketplaceRoot);
+    enableCodexPluginConfig();
    runCodexBestEffort(
      ['plugin', 'marketplace', 'upgrade', MARKETPLACE_NAME],
      'Refreshed Codex marketplace and installed plugin cache.',
      'Could not refresh Codex marketplace cache; reinstall or upgrade claude-mem from /plugins if Codex still uses old MCP config',
    );
-    runCodexBestEffort(
-      ['features', 'enable', 'plugin_hooks'],
-      'Enabled Codex plugin_hooks so claude-mem hooks can run.',
-      'Could not enable Codex plugin_hooks; run `codex features enable plugin_hooks` if context hooks do not appear',
-    );
    if (!cleanupLegacyCodexAgentsMdContext()) {
      console.warn(`  Native Codex hooks registered, but failed to remove legacy AGENTS.md context from ${CODEX_AGENTS_MD_PATH}.`);
    }
@@ -309,9 +376,7 @@ Plugin source:     ${marketplaceRoot}

 Next steps:
  1. Open Codex CLI in your project
-  2. Run /plugins
-  3. Install claude-mem from the claude-mem (local) marketplace
-  4. Restart Codex CLI after install so MCP tools and plugin hooks reload
+  2. Restart any running Codex sessions so native hooks are loaded

 For a fresh setup, the supported entry point is:
  npx claude-mem@latest install
@@ -329,6 +394,14 @@ export function uninstallCodexCli(): number {

  let failed = false;

+  try {
+    disableCodexPluginConfig();
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    console.error(`\nCodex plugin config update failed: ${message}`);
+    failed = true;
+  }
+
  try {
    if (commandExists('codex')) {
      runCodex(['plugin', 'marketplace', 'remove', MARKETPLACE_NAME]);
@@ -7,10 +7,10 @@ import type { TranscriptSchema, TranscriptWatchConfig } from './types.js';
 export const DEFAULT_CONFIG_PATH = paths.transcriptsConfig();
 export const DEFAULT_STATE_PATH = paths.transcriptsState();

-const CODEX_SAMPLE_SCHEMA: TranscriptSchema = {
+export const CODEX_SAMPLE_SCHEMA: TranscriptSchema = {
  name: 'codex',
  version: '0.3',
-  description: 'Schema for Codex session JSONL files under ~/.codex/sessions.',
+  description: 'Legacy schema for Codex session JSONL files. Codex native hooks are preferred.',
  events: [
    {
      name: 'session-meta',
@@ -109,20 +109,39 @@ const CODEX_SAMPLE_SCHEMA: TranscriptSchema = {

 export const SAMPLE_CONFIG: TranscriptWatchConfig = {
  version: 1,
-  schemas: {
-    codex: CODEX_SAMPLE_SCHEMA
-  },
-  watches: [
-    {
-      name: 'codex',
-      path: '~/.codex/sessions/**/*.jsonl',
-      schema: 'codex',
-      startAtEnd: true
-    }
-  ],
+  schemas: {},
+  watches: [],
  stateFile: DEFAULT_STATE_PATH
 };

+export function isNativeHookBackedCodexWatch(watch: { name?: string; path?: string; schema?: string | TranscriptSchema }): boolean {
+  const schemaName = typeof watch.schema === 'string' ? watch.schema : watch.schema?.name;
+  const nameOrSchemaIsCodex = watch.name === 'codex' || schemaName === 'codex';
+  if (!nameOrSchemaIsCodex || !watch.path) return false;
+
+  const normalizedPath = expandHomePath(watch.path).replace(/\\/g, '/');
+  const codexSessionsRoot = join(homedir(), '.codex', 'sessions').replace(/\\/g, '/');
+  return normalizedPath === `${codexSessionsRoot}/**/*.jsonl`;
+}
+
+export function filterNativeHookBackedCodexWatches(
+  config: TranscriptWatchConfig,
+  allowCodexTranscriptIngestion: boolean
+): { config: TranscriptWatchConfig; removed: number } {
+  if (allowCodexTranscriptIngestion) {
+    return { config, removed: 0 };
+  }
+
+  const watches = config.watches.filter(watch => !isNativeHookBackedCodexWatch(watch));
+  return {
+    config: {
+      ...config,
+      watches,
+    },
+    removed: config.watches.length - watches.length,
+  };
+}
+
 export function expandHomePath(inputPath: string): string {
  if (!inputPath) return inputPath;
  if (inputPath.startsWith('~')) {
@@ -122,7 +122,7 @@ export class TranscriptWatcher {
    const files = this.resolveWatchFiles(resolvedPath);

    for (const filePath of files) {
-      await this.addTailer(filePath, watch, schema, true);
+      await this.addTailer(filePath, watch, schema);
    }

    const watchRoot = this.deepestNonGlobAncestor(resolvedPath);
@@ -143,7 +143,7 @@ export class TranscriptWatcher {
        const matches = this.resolveWatchFiles(resolvedPath);
        for (const filePath of matches) {
          if (!this.tailers.has(filePath)) {
-            void this.addTailer(filePath, watch, schema, false);
+            void this.addTailer(filePath, watch, schema);
          }
        }
      });
@@ -223,15 +223,14 @@ export class TranscriptWatcher {
  private async addTailer(
    filePath: string,
    watch: WatchTarget,
-    schema: TranscriptSchema,
-    initialDiscovery: boolean
+    schema: TranscriptSchema
  ): Promise<void> {
    if (this.tailers.has(filePath)) return;

    const sessionIdOverride = this.extractSessionIdFromPath(filePath);

    let offset = this.state.offsets[filePath] ?? 0;
-    if (offset === 0 && watch.startAtEnd && initialDiscovery) {
+    if (offset === 0 && watch.startAtEnd) {
      try {
        offset = statSync(filePath).size;
      } catch (error: unknown) {
@@ -81,7 +81,7 @@ import { TimelineService } from './worker/TimelineService.js';
 import { SessionEventBroadcaster } from './worker/events/SessionEventBroadcaster.js';
 import { SessionCompletionHandler } from './worker/session/SessionCompletionHandler.js';
 import { setIngestContext, attachIngestGeneratorStarter } from './worker/http/shared.js';
-import { DEFAULT_CONFIG_PATH, DEFAULT_STATE_PATH, expandHomePath, loadTranscriptWatchConfig } from './transcripts/config.js';
+import { DEFAULT_CONFIG_PATH, DEFAULT_STATE_PATH, expandHomePath, filterNativeHookBackedCodexWatches, loadTranscriptWatchConfig } from './transcripts/config.js';
 import { TranscriptWatcher } from './transcripts/watcher.js';

 import { ViewerRoutes } from './worker/http/routes/ViewerRoutes.js';
@@ -471,9 +471,27 @@ export class WorkerService implements WorkerRef {
      return;
    }

-    const transcriptConfig = loadTranscriptWatchConfig(configPath);
+    const allowCodexTranscriptIngestion = settings.CLAUDE_MEM_CODEX_TRANSCRIPT_INGESTION === 'true';
+    const { config: transcriptConfig, removed } = filterNativeHookBackedCodexWatches(
+      loadTranscriptWatchConfig(configPath),
+      allowCodexTranscriptIngestion
+    );
    const statePath = expandHomePath(transcriptConfig.stateFile ?? DEFAULT_STATE_PATH);

+    if (removed > 0) {
+      logger.warn('TRANSCRIPT', 'Skipped Codex transcript watch because native Codex hooks are authoritative', {
+        removed,
+        optInSetting: 'CLAUDE_MEM_CODEX_TRANSCRIPT_INGESTION=true',
+      });
+    }
+
+    if (transcriptConfig.watches.length === 0) {
+      logger.info('TRANSCRIPT', 'Transcript watcher config has no active watches; skipping automatic transcript capture', {
+        configPath: resolvedConfigPath,
+      });
+      return;
+    }
+
    try {
      this.transcriptWatcher = new TranscriptWatcher(transcriptConfig, statePath);
      await this.transcriptWatcher.start();
@@ -42,6 +42,7 @@ export interface SettingsDefaults {
  CLAUDE_MEM_FOLDER_USE_LOCAL_MD: string;  
  CLAUDE_MEM_TRANSCRIPTS_ENABLED: string;  
  CLAUDE_MEM_TRANSCRIPTS_CONFIG_PATH: string;  
+  CLAUDE_MEM_CODEX_TRANSCRIPT_INGESTION: string;
  CLAUDE_MEM_MAX_CONCURRENT_AGENTS: string;  
  CLAUDE_MEM_HOOK_FAIL_LOUD_THRESHOLD: string;  
  CLAUDE_MEM_EXCLUDED_PROJECTS: string;  
@@ -117,6 +118,7 @@ export class SettingsDefaultsManager {
    CLAUDE_MEM_FOLDER_USE_LOCAL_MD: 'false',  // When true, writes to CLAUDE.local.md instead of CLAUDE.md
    CLAUDE_MEM_TRANSCRIPTS_ENABLED: 'true',
    CLAUDE_MEM_TRANSCRIPTS_CONFIG_PATH: join(homedir(), '.claude-mem', 'transcript-watch.json'),
+    CLAUDE_MEM_CODEX_TRANSCRIPT_INGESTION: 'false',
    CLAUDE_MEM_MAX_CONCURRENT_AGENTS: '2',  // Max concurrent Claude SDK agent subprocesses
    CLAUDE_MEM_HOOK_FAIL_LOUD_THRESHOLD: '3',  // Plan 05 Phase 8 — escalate to exit code 2 after N consecutive worker-unreachable hook invocations
    CLAUDE_MEM_EXCLUDED_PROJECTS: '',  // Comma-separated glob patterns for excluded project paths
@@ -90,13 +90,6 @@ describe('Plugin Distribution - Codex Marketplace', () => {
    expect(command).toContain('plugins/cache/thedotmack/claude-mem');
    expect(command).toContain('claude-mem: mcp server not found');
  });
-
-  it('keeps root and bundled MCP launchers in sync', () => {
-    const rootMcp = JSON.parse(readFileSync(path.join(projectRoot, '.mcp.json'), 'utf-8'));
-    const bundledMcp = JSON.parse(readFileSync(path.join(projectRoot, 'plugin/.mcp.json'), 'utf-8'));
-
-    expect(rootMcp.mcpServers['mcp-search']).toEqual(bundledMcp.mcpServers['mcp-search']);
-  });
 });

 describe('Plugin Distribution - hooks.json Integrity', () => {
@@ -134,7 +127,7 @@ describe('Plugin Distribution - hooks.json Integrity', () => {

 describe('Plugin Distribution - Startup Root Resolution', () => {
  it('MCP startup commands should have config-dir based non-empty fallbacks', () => {
-    for (const relativePath of ['.mcp.json', 'plugin/.mcp.json']) {
+    for (const relativePath of ['plugin/.mcp.json']) {
      const command = mcpStartupCommandFrom(relativePath);

      expect(command).toContain('${CLAUDE_CONFIG_DIR:-$HOME/.claude}');
@@ -151,13 +151,15 @@ describe('Install Non-TTY Support', () => {
      expect(registerRegion).toContain("['plugin', 'marketplace', 'add', marketplaceRoot]");
    });

-    it('enables Codex plugin hooks during install', () => {
+    it('enables Codex hooks and claude-mem plugin config during install', () => {
      const installRegion = codexInstallerSource.slice(
        codexInstallerSource.indexOf('export async function installCodexCli'),
        codexInstallerSource.indexOf('export function uninstallCodexCli'),
      );
-      expect(installRegion).toContain("['features', 'enable', 'plugin_hooks']");
-      expect(installRegion).toContain('codex features enable plugin_hooks');
+      expect(codexInstallerSource).toContain("setTomlFeatureEnabled(next, 'hooks', true)");
+      expect(codexInstallerSource).toContain("const CODEX_PLUGIN_ID = `claude-mem@${MARKETPLACE_NAME}`");
+      expect(installRegion).toContain('enableCodexPluginConfig()');
+      expect(installRegion).not.toContain('plugin_hooks');
    });

    it('captures Codex CLI output for install failure reporting', () => {
@@ -211,13 +213,14 @@ describe('Install Non-TTY Support', () => {

    it('does not seed new Codex transcript watcher configs with AGENTS context injection', () => {
      expect(transcriptConfigSource).toContain("name: 'codex'");
-      const codexWatchRegion = transcriptConfigSource.slice(
-        transcriptConfigSource.indexOf("name: 'codex'"),
+      const sampleConfigRegion = transcriptConfigSource.slice(
+        transcriptConfigSource.indexOf('export const SAMPLE_CONFIG'),
        transcriptConfigSource.indexOf('stateFile: DEFAULT_STATE_PATH'),
      );
-      expect(codexWatchRegion).toContain("path: '~/.codex/sessions/**/*.jsonl'");
-      expect(codexWatchRegion).not.toContain("mode: 'agents'");
-      expect(codexWatchRegion).not.toContain('updateOn');
+      expect(sampleConfigRegion).toContain('watches: []');
+      expect(sampleConfigRegion).not.toContain("path: '~/.codex/sessions/**/*.jsonl'");
+      expect(sampleConfigRegion).not.toContain("mode: 'agents'");
+      expect(sampleConfigRegion).not.toContain('updateOn');
    });
  });

@@ -0,0 +1,61 @@
+import { describe, expect, it } from 'bun:test';
+import {
+  setTomlFeatureEnabled,
+  setTomlPluginEnabled,
+} from '../../src/services/integrations/CodexCliInstaller.js';
+
+describe('Codex CLI installer config repair', () => {
+  it('adds claude-mem plugin enablement when missing', () => {
+    const result = setTomlPluginEnabled('model = "gpt-5.5"\n', 'claude-mem@claude-mem-local', true);
+
+    expect(result).toContain('[plugins."claude-mem@claude-mem-local"]');
+    expect(result).toContain('enabled = true');
+  });
+
+  it('updates existing plugin enablement in place', () => {
+    const input = [
+      '[plugins."claude-mem@thedotmack"]',
+      'enabled = true',
+      '',
+      '[marketplaces.claude-mem-local]',
+      'source_type = "git"',
+      '',
+    ].join('\n');
+
+    const result = setTomlPluginEnabled(input, 'claude-mem@thedotmack', false);
+
+    expect(result).toContain('[plugins."claude-mem@thedotmack"]\nenabled = false');
+    expect(result).toContain('[marketplaces.claude-mem-local]');
+  });
+
+  it('inserts enabled into an existing plugin section without touching the next section', () => {
+    const input = [
+      '[plugins."claude-mem@claude-mem-local"]',
+      '',
+      '[hooks.state]',
+      '',
+    ].join('\n');
+
+    const result = setTomlPluginEnabled(input, 'claude-mem@claude-mem-local', true);
+
+    expect(result).toContain('[plugins."claude-mem@claude-mem-local"]\nenabled = true\n');
+    expect(result).toContain('[hooks.state]');
+  });
+
+  it('enables the current Codex hooks feature flag', () => {
+    const input = [
+      '[features]',
+      'shell_snapshot = true',
+      '',
+      '[plugins."claude-mem@claude-mem-local"]',
+      'enabled = true',
+      '',
+    ].join('\n');
+
+    const result = setTomlFeatureEnabled(input, 'hooks', true);
+
+    expect(result).toContain('[features]\nhooks = true\nshell_snapshot = true');
+    expect(result).toContain('[plugins."claude-mem@claude-mem-local"]');
+    expect(result).not.toContain('codex_hooks');
+  });
+});
@@ -0,0 +1,75 @@
+import { describe, expect, it } from 'bun:test';
+import { homedir } from 'os';
+import { join } from 'path';
+import {
+  CODEX_SAMPLE_SCHEMA,
+  SAMPLE_CONFIG,
+  filterNativeHookBackedCodexWatches,
+  isNativeHookBackedCodexWatch,
+} from '../../src/services/transcripts/config.js';
+import type { TranscriptWatchConfig } from '../../src/services/transcripts/types.js';
+
+describe('transcript watcher config', () => {
+  it('does not auto-watch Codex transcripts in the sample config', () => {
+    expect(SAMPLE_CONFIG.watches).toEqual([]);
+  });
+
+  it('recognizes the legacy Codex session transcript watch', () => {
+    expect(isNativeHookBackedCodexWatch({
+      name: 'codex',
+      path: '~/.codex/sessions/**/*.jsonl',
+      schema: 'codex',
+    })).toBe(true);
+
+    expect(isNativeHookBackedCodexWatch({
+      name: 'codex',
+      path: join(homedir(), '.codex', 'sessions', '**', '*.jsonl'),
+      schema: CODEX_SAMPLE_SCHEMA,
+    })).toBe(true);
+  });
+
+  it('does not treat custom transcript watches as native Codex hooks', () => {
+    expect(isNativeHookBackedCodexWatch({
+      name: 'codex-archive',
+      path: '~/custom-codex-export/**/*.jsonl',
+      schema: 'codex',
+    })).toBe(false);
+
+    expect(isNativeHookBackedCodexWatch({
+      name: 'other',
+      path: '~/.codex/sessions/**/*.jsonl',
+      schema: 'other',
+    })).toBe(false);
+  });
+
+  it('strips legacy Codex watches unless explicitly opted in', () => {
+    const config: TranscriptWatchConfig = {
+      version: 1,
+      schemas: {
+        codex: CODEX_SAMPLE_SCHEMA,
+      },
+      watches: [
+        {
+          name: 'codex',
+          path: '~/.codex/sessions/**/*.jsonl',
+          schema: 'codex',
+          startAtEnd: true,
+        },
+        {
+          name: 'custom',
+          path: '~/custom/**/*.jsonl',
+          schema: 'codex',
+          startAtEnd: true,
+        },
+      ],
+    };
+
+    const filtered = filterNativeHookBackedCodexWatches(config, false);
+    expect(filtered.removed).toBe(1);
+    expect(filtered.config.watches.map(watch => watch.name)).toEqual(['custom']);
+
+    const allowed = filterNativeHookBackedCodexWatches(config, true);
+    expect(allowed.removed).toBe(0);
+    expect(allowed.config.watches).toHaveLength(2);
+  });
+});
@@ -0,0 +1,111 @@
+import { afterEach, beforeEach, describe, expect, it, mock, spyOn } from 'bun:test';
+import { appendFileSync, mkdirSync, rmSync, writeFileSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
+import type { NormalizedHookInput } from '../../src/cli/types.js';
+import type { TranscriptSchema, WatchTarget } from '../../src/services/transcripts/types.js';
+
+const sessionInitCalls: NormalizedHookInput[] = [];
+
+mock.module('../../src/cli/handlers/session-init.js', () => ({
+  sessionInitHandler: {
+    execute: async (input: NormalizedHookInput) => {
+      sessionInitCalls.push(input);
+      return { continue: true, suppressOutput: true };
+    },
+  },
+}));
+
+import { logger } from '../../src/utils/logger.js';
+import { TranscriptWatcher } from '../../src/services/transcripts/watcher.js';
+
+const waitForAsyncTail = () => new Promise(resolve => setTimeout(resolve, 50));
+
+describe('TranscriptWatcher startAtEnd', () => {
+  let tmpRoot: string;
+  let loggerSpies: ReturnType<typeof spyOn>[] = [];
+
+  beforeEach(() => {
+    sessionInitCalls.length = 0;
+    tmpRoot = join(tmpdir(), `claude-mem-transcript-watch-${Date.now()}-${Math.random().toString(16).slice(2)}`);
+    mkdirSync(tmpRoot, { recursive: true });
+    loggerSpies = [
+      spyOn(logger, 'info').mockImplementation(() => {}),
+      spyOn(logger, 'debug').mockImplementation(() => {}),
+      spyOn(logger, 'warn').mockImplementation(() => {}),
+      spyOn(logger, 'error').mockImplementation(() => {}),
+    ];
+  });
+
+  afterEach(() => {
+    loggerSpies.forEach(spy => spy.mockRestore());
+    rmSync(tmpRoot, { recursive: true, force: true });
+  });
+
+  it('does not replay history from transcript files discovered after startup', async () => {
+    const sessionId = '019e050e-7ae0-71b2-b19f-6cc428e5763a';
+    const filePath = join(tmpRoot, `${sessionId}.jsonl`);
+    const statePath = join(tmpRoot, 'state.json');
+
+    writeFileSync(
+      filePath,
+      `${JSON.stringify({
+        type: 'event',
+        payload: {
+          type: 'user_message',
+          session_id: sessionId,
+          message: 'historical prompt that must not be replayed',
+        },
+      })}\n`,
+      'utf8',
+    );
+
+    const schema: TranscriptSchema = {
+      name: 'codex-test',
+      events: [
+        {
+          name: 'user-message',
+          match: { path: 'payload.type', equals: 'user_message' },
+          action: 'session_init',
+          fields: {
+            sessionId: 'payload.session_id',
+            prompt: 'payload.message',
+          },
+        },
+      ],
+    };
+    const watch: WatchTarget = {
+      name: 'codex',
+      path: join(tmpRoot, '*.jsonl'),
+      schema,
+      startAtEnd: true,
+    };
+    const watcher = new TranscriptWatcher({ version: 1, watches: [watch] }, statePath);
+
+    await (watcher as any).addTailer(filePath, watch, schema);
+    await waitForAsyncTail();
+
+    expect(sessionInitCalls).toHaveLength(0);
+
+    appendFileSync(
+      filePath,
+      `${JSON.stringify({
+        type: 'event',
+        payload: {
+          type: 'user_message',
+          session_id: sessionId,
+          message: 'live prompt',
+        },
+      })}\n`,
+      'utf8',
+    );
+
+    (watcher as any).tailers.get(filePath)?.poke();
+    await waitForAsyncTail();
+    watcher.stop();
+
+    const prompts = sessionInitCalls.map(call => call.prompt);
+    expect(prompts).toContain('live prompt');
+    expect(prompts).not.toContain('historical prompt that must not be replayed');
+  });
+});