From 2a2008bac210d763fa3f8721100290b358a40cef Mon Sep 17 00:00:00 2001
From: Tran Quang <107824159+quangtran88@users.noreply.github.com>
Date: Wed, 15 Apr 2026 14:57:57 +0700
Subject: [PATCH] fix(file-context): preserve targeted reads + invalidate on
 mtime (#1719) (#1729)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(file-context): preserve targeted reads + invalidate on mtime (#1719)

The PreToolUse:Read hook unconditionally rewrote tool input to
{file_path, limit:1}, which interacted with two failure modes:

1. Subagent edits a file → parent's next Read still gets truncated
   because the observation snapshot predates the change.
2. Claude requests a different section with offset/limit → the hook
   strips them, so the Claude Code harness's read-dedup cache returns
   "File unchanged" against the prior 1-line read. The file becomes
   unreadable for the rest of the conversation, even though the hook's
   own recovery hint says "Read again with offset/limit for the
   section you need."

Two complementary fixes:

- **mtime invalidation**: stat the file (we already stat for the size
  gate) and compare mtimeMs to the newest observation's created_at_epoch.
  If the file is newer, pass the read through unchanged so fresh content
  reaches Claude.

- **Targeted-read pass-through**: when toolInput already specifies
  offset and/or limit, preserve them in updatedInput instead of
  collapsing to {limit:1}. The harness's dedup cache then sees a
  distinct input and lets the read proceed.

The unconstrained-read path (no offset, no limit) is unchanged: still
truncated to 1 line plus the observation timeline, so token economics
are preserved for the common case.

Tests cover all three branches: existing truncation, targeted-read
pass-through (offset+limit, limit-only), and mtime-driven bypass.

Fixes #1719

* refactor(file-context): address review findings on #1719 fix

- Add offset-only test case for full targeted-read branch coverage
- Use >= for mtime comparison to handle same-millisecond edge case
- Add Number.isFinite() + bounds guards on offset/limit pass-through
- Trim over-verbose comments to concise single-line summaries
- Remove redundant `as number` casts after typeof narrowing
- Add comment explaining fileMtimeMs=0 sentinel invariant
---
 src/cli/handlers/file-context.ts |  61 ++++++--
 tests/hooks/file-context.test.ts | 239 +++++++++++++++++++++++++++++++
 2 files changed, 288 insertions(+), 12 deletions(-)
 create mode 100644 tests/hooks/file-context.test.ts

diff --git a/src/cli/handlers/file-context.ts b/src/cli/handlers/file-context.ts
index c53818fd..b0a81c27 100644
--- a/src/cli/handlers/file-context.ts
+++ b/src/cli/handlers/file-context.ts
@@ -106,7 +106,11 @@ function deduplicateObservations(
   return scored.slice(0, displayLimit).map(s => s.obs);
 }
 
-function formatFileTimeline(observations: ObservationRow[], filePath: string): string {
+function formatFileTimeline(
+  observations: ObservationRow[],
+  filePath: string,
+  truncated: boolean
+): string {
   // Escape filePath for safe interpolation into recovery hints (quotes, backslashes, newlines)
   const safePath = filePath.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
   // Group observations by day
@@ -136,9 +140,13 @@ function formatFileTimeline(observations: ObservationRow[], filePath: string): s
   }).toLowerCase().replace(' ', '');
   const currentTimezone = now.toLocaleTimeString('en-US', { timeZoneName: 'short' }).split(' ').pop();
 
+  const headerLine = truncated
+    ? `This file has prior observations. Only line 1 was read to save tokens.`
+    : `This file has prior observations. The requested section was read normally.`;
+
   const lines: string[] = [
     `Current: ${currentDate} ${currentTime} ${currentTimezone}`,
-    `This file has prior observations. Only line 1 was read to save tokens.`,
+    headerLine,
     `- **Already know enough?** The timeline below may be all you need (semantic priming).`,
     `- **Need details?** get_observations([IDs]) — ~300 tokens each.`,
     `- **Need full file?** Read again with offset/limit for the section you need.`,
@@ -170,16 +178,27 @@ export const fileContextHandler: EventHandler = {
       return { continue: true, suppressOutput: true };
     }
 
-    // Skip gate for files below the token-economics threshold — timeline (~370 tokens)
-    // costs more than reading small files directly.
+    // Preserve user-supplied offset/limit to avoid read-dedup collisions (fixes #1719)
+    const userOffset = typeof toolInput?.offset === 'number' && Number.isFinite(toolInput.offset) && toolInput.offset >= 0
+      ? Math.floor(toolInput.offset) : undefined;
+    const userLimit = typeof toolInput?.limit === 'number' && Number.isFinite(toolInput.limit) && toolInput.limit > 0
+      ? Math.floor(toolInput.limit) : undefined;
+    const isTargetedRead = userOffset !== undefined || userLimit !== undefined;
+
+    // Stat the file once: size (gate) + mtime (cache invalidation).
+    // 0 = stat failed non-fatally (e.g. EPERM) — skip mtime check, fall through to truncation.
+    let fileMtimeMs = 0;
     try {
       const statPath = path.isAbsolute(filePath)
         ? filePath
         : path.resolve(input.cwd || process.cwd(), filePath);
       const stat = statSync(statPath);
+      // Skip gate for files below the token-economics threshold — timeline (~370 tokens)
+      // costs more than reading small files directly.
       if (stat.size < FILE_READ_GATE_MIN_BYTES) {
         return { continue: true, suppressOutput: true };
       }
+      fileMtimeMs = stat.mtimeMs;
     } catch (err: any) {
       if (err.code === 'ENOENT') return { continue: true, suppressOutput: true };
       // Other errors (symlink, permission denied) — fall through and let gate proceed
@@ -227,25 +246,43 @@ export const fileContextHandler: EventHandler = {
         return { continue: true, suppressOutput: true };
       }
 
+      // mtime invalidation: bypass truncation when the file is newer than the latest observation.
+      // Uses >= to handle same-millisecond edits (cost: one extra full read vs risk of stuck truncation).
+      if (fileMtimeMs > 0) {
+        const newestObservationMs = Math.max(...data.observations.map(o => o.created_at_epoch));
+        if (fileMtimeMs >= newestObservationMs) {
+          logger.debug('HOOK', 'File modified since last observation, skipping truncation', {
+            filePath: relativePath,
+            fileMtimeMs,
+            newestObservationMs,
+          });
+          return { continue: true, suppressOutput: true };
+        }
+      }
+
       // Deduplicate: one per session, ranked by specificity to this file
       const dedupedObservations = deduplicateObservations(data.observations, relativePath, DISPLAY_LIMIT);
       if (dedupedObservations.length === 0) {
         return { continue: true, suppressOutput: true };
       }
 
-      // Allow the read with limit: 1 line — just enough for Edit's "file must be read"
-      // check to pass, while keeping token cost near zero. The observation timeline
-      // gives Claude full context about prior work on this file.
-      const timeline = formatFileTimeline(dedupedObservations, filePath);
+      // Unconstrained → truncate to 1 line; targeted → preserve offset/limit.
+      const truncated = !isTargetedRead;
+      const timeline = formatFileTimeline(dedupedObservations, filePath, truncated);
+      const updatedInput: Record<string, unknown> = { file_path: filePath };
+      if (isTargetedRead) {
+        if (userOffset !== undefined) updatedInput.offset = userOffset;
+        if (userLimit !== undefined) updatedInput.limit = userLimit;
+      } else {
+        updatedInput.limit = 1;
+      }
+
       return {
         hookSpecificOutput: {
           hookEventName: 'PreToolUse',
           additionalContext: timeline,
           permissionDecision: 'allow',
-          updatedInput: {
-            file_path: filePath,
-            limit: 1,
-          },
+          updatedInput,
         },
       };
     } catch (error) {
diff --git a/tests/hooks/file-context.test.ts b/tests/hooks/file-context.test.ts
new file mode 100644
index 00000000..7c42cf5f
--- /dev/null
+++ b/tests/hooks/file-context.test.ts
@@ -0,0 +1,239 @@
+// Tests for file-context cache validation fix (#1719)
+import { describe, it, expect, beforeEach, afterEach, spyOn, mock } from 'bun:test';
+import { mkdtempSync, writeFileSync, utimesSync, rmSync } from 'fs';
+import { tmpdir, homedir } from 'os';
+import { join } from 'path';
+
+// Mock modules that cause import chain issues — MUST be before handler imports
+mock.module('../../src/shared/SettingsDefaultsManager.js', () => ({
+  SettingsDefaultsManager: {
+    get: (key: string) => {
+      if (key === 'CLAUDE_MEM_DATA_DIR') return join(homedir(), '.claude-mem');
+      return '';
+    },
+    getInt: () => 0,
+    loadFromFile: () => ({ CLAUDE_MEM_EXCLUDED_PROJECTS: [] }),
+  },
+}));
+
+mock.module('../../src/shared/worker-utils.js', () => ({
+  ensureWorkerRunning: () => Promise.resolve(true),
+  getWorkerPort: () => 37777,
+  workerHttpRequest: (apiPath: string, options?: any) => {
+    const url = `http://127.0.0.1:37777${apiPath}`;
+    return globalThis.fetch(url, {
+      method: options?.method ?? 'GET',
+      headers: options?.headers,
+      body: options?.body,
+    });
+  },
+}));
+
+mock.module('../../src/utils/project-name.js', () => ({
+  getProjectName: () => 'test-project',
+  getProjectContext: () => ({ allProjects: ['test-project'] }),
+}));
+
+mock.module('../../src/utils/project-filter.js', () => ({
+  isProjectExcluded: () => false,
+}));
+
+// Import after mocks
+import { fileContextHandler } from '../../src/cli/handlers/file-context.js';
+import { logger } from '../../src/utils/logger.js';
+
+const PADDING = 'x'.repeat(2_000); // ensures file > FILE_READ_GATE_MIN_BYTES (1500)
+
+let tmpDir: string;
+let testFile: string;
+let loggerSpies: ReturnType<typeof spyOn>[] = [];
+let fetchSpy: ReturnType<typeof spyOn> | null = null;
+
+function makeObservationsResponse(observations: Array<{ id: number; created_at_epoch: number; type?: string; title?: string }>) {
+  return new Response(
+    JSON.stringify({
+      observations: observations.map(o => ({
+        id: o.id,
+        memory_session_id: `session-${o.id}`,
+        title: o.title ?? `Observation ${o.id}`,
+        type: o.type ?? 'discovery',
+        created_at_epoch: o.created_at_epoch,
+        files_read: JSON.stringify([]),
+        files_modified: JSON.stringify(['test.md']),
+      })),
+      count: observations.length,
+    }),
+    { status: 200, headers: { 'Content-Type': 'application/json' } }
+  );
+}
+
+beforeEach(() => {
+  tmpDir = mkdtempSync(join(tmpdir(), 'file-context-test-'));
+  testFile = join(tmpDir, 'test.md');
+  writeFileSync(testFile, PADDING);
+
+  loggerSpies = [
+    spyOn(logger, 'info').mockImplementation(() => {}),
+    spyOn(logger, 'debug').mockImplementation(() => {}),
+    spyOn(logger, 'warn').mockImplementation(() => {}),
+    spyOn(logger, 'error').mockImplementation(() => {}),
+  ];
+});
+
+afterEach(() => {
+  loggerSpies.forEach(s => s.mockRestore());
+  if (fetchSpy) {
+    fetchSpy.mockRestore();
+    fetchSpy = null;
+  }
+  try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
+});
+
+describe('fileContextHandler — cache validation fix (#1719)', () => {
+  it('truncates to limit:1 for an unconstrained Read (existing behavior)', async () => {
+    // File mtime is "now" (just written). Make observations newer to avoid mtime bypass.
+    const future = Date.now() + 60_000;
+    fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue(
+      makeObservationsResponse([{ id: 1, created_at_epoch: future }])
+    );
+
+    const result = await fileContextHandler.execute({
+      sessionId: 'sess',
+      cwd: tmpDir,
+      toolName: 'Read',
+      toolInput: { file_path: testFile },
+    });
+
+    expect(result.hookSpecificOutput).toBeDefined();
+    expect(result.hookSpecificOutput!.updatedInput).toEqual({
+      file_path: testFile,
+      limit: 1,
+    });
+  });
+
+  it('preserves user-supplied offset/limit on a targeted Read (#1719 fix)', async () => {
+    const future = Date.now() + 60_000;
+    fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue(
+      makeObservationsResponse([{ id: 1, created_at_epoch: future }])
+    );
+
+    const result = await fileContextHandler.execute({
+      sessionId: 'sess',
+      cwd: tmpDir,
+      toolName: 'Read',
+      toolInput: { file_path: testFile, offset: 289, limit: 140 },
+    });
+
+    expect(result.hookSpecificOutput).toBeDefined();
+    expect(result.hookSpecificOutput!.updatedInput).toEqual({
+      file_path: testFile,
+      offset: 289,
+      limit: 140,
+    });
+  });
+
+  it('preserves user-supplied offset only', async () => {
+    const future = Date.now() + 60_000;
+    fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue(
+      makeObservationsResponse([{ id: 1, created_at_epoch: future }])
+    );
+
+    const result = await fileContextHandler.execute({
+      sessionId: 'sess',
+      cwd: tmpDir,
+      toolName: 'Read',
+      toolInput: { file_path: testFile, offset: 100 },
+    });
+
+    expect(result.hookSpecificOutput!.updatedInput).toEqual({
+      file_path: testFile,
+      offset: 100,
+    });
+    expect((result.hookSpecificOutput!.updatedInput as any).limit).toBeUndefined();
+  });
+
+  it('preserves user-supplied limit only', async () => {
+    const future = Date.now() + 60_000;
+    fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue(
+      makeObservationsResponse([{ id: 1, created_at_epoch: future }])
+    );
+
+    const result = await fileContextHandler.execute({
+      sessionId: 'sess',
+      cwd: tmpDir,
+      toolName: 'Read',
+      toolInput: { file_path: testFile, limit: 50 },
+    });
+
+    expect(result.hookSpecificOutput!.updatedInput).toEqual({
+      file_path: testFile,
+      limit: 50,
+    });
+    // offset must NOT be present
+    expect((result.hookSpecificOutput!.updatedInput as any).offset).toBeUndefined();
+  });
+
+  it('bypasses truncation when file mtime is newer than newest observation (#1719 fix)', async () => {
+    // Backdate observations 1 hour into the past so the just-written file is newer.
+    const stale = Date.now() - 3_600_000;
+    fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue(
+      makeObservationsResponse([
+        { id: 1, created_at_epoch: stale },
+        { id: 2, created_at_epoch: stale - 1000 },
+      ])
+    );
+
+    const result = await fileContextHandler.execute({
+      sessionId: 'sess',
+      cwd: tmpDir,
+      toolName: 'Read',
+      toolInput: { file_path: testFile },
+    });
+
+    // Pass-through: no hookSpecificOutput, no updatedInput rewrite
+    expect(result.continue).toBe(true);
+    expect(result.hookSpecificOutput).toBeUndefined();
+  });
+
+  it('still truncates when file mtime is older than newest observation', async () => {
+    // Backdate the file by 1 hour, observations stamped "now"
+    const past = (Date.now() - 3_600_000) / 1000;
+    utimesSync(testFile, past, past);
+
+    const now = Date.now();
+    fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue(
+      makeObservationsResponse([{ id: 1, created_at_epoch: now }])
+    );
+
+    const result = await fileContextHandler.execute({
+      sessionId: 'sess',
+      cwd: tmpDir,
+      toolName: 'Read',
+      toolInput: { file_path: testFile },
+    });
+
+    expect(result.hookSpecificOutput).toBeDefined();
+    expect(result.hookSpecificOutput!.updatedInput).toEqual({
+      file_path: testFile,
+      limit: 1,
+    });
+  });
+
+  it('targeted-read header line reflects that the section was read normally', async () => {
+    const future = Date.now() + 60_000;
+    fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue(
+      makeObservationsResponse([{ id: 1, created_at_epoch: future }])
+    );
+
+    const result = await fileContextHandler.execute({
+      sessionId: 'sess',
+      cwd: tmpDir,
+      toolName: 'Read',
+      toolInput: { file_path: testFile, offset: 10, limit: 20 },
+    });
+
+    const ctx = result.hookSpecificOutput!.additionalContext;
+    expect(ctx).toContain('The requested section was read normally');
+    expect(ctx).not.toContain('Only line 1 was read');
+  });
+});