fix(file-context): preserve targeted reads + invalidate on mtime (#1719) (#1729)

* fix(file-context): preserve targeted reads + invalidate on mtime (#1719) The PreToolUse:Read hook unconditionally rewrote tool input to {file_path, limit:1}, which interacted with two failure modes: 1. Subagent edits a file → parent's next Read still gets truncated because the observation snapshot predates the change. 2. Claude requests a different section with offset/limit → the hook strips them, so the Claude Code harness's read-dedup cache returns "File unchanged" against the prior 1-line read. The file becomes unreadable for the rest of the conversation, even though the hook's own recovery hint says "Read again with offset/limit for the section you need." Two complementary fixes: - **mtime invalidation**: stat the file (we already stat for the size gate) and compare mtimeMs to the newest observation's created_at_epoch. If the file is newer, pass the read through unchanged so fresh content reaches Claude. - **Targeted-read pass-through**: when toolInput already specifies offset and/or limit, preserve them in updatedInput instead of collapsing to {limit:1}. The harness's dedup cache then sees a distinct input and lets the read proceed. The unconstrained-read path (no offset, no limit) is unchanged: still truncated to 1 line plus the observation timeline, so token economics are preserved for the common case. Tests cover all three branches: existing truncation, targeted-read pass-through (offset+limit, limit-only), and mtime-driven bypass. Fixes #1719 * refactor(file-context): address review findings on #1719 fix - Add offset-only test case for full targeted-read branch coverage - Use >= for mtime comparison to handle same-millisecond edge case - Add Number.isFinite() + bounds guards on offset/limit pass-through - Trim over-verbose comments to concise single-line summaries - Remove redundant `as number` casts after typeof narrowing - Add comment explaining fileMtimeMs=0 sentinel invariant
2026-04-15 14:57:57 +07:00
parent d64c252f4d
commit 2a2008bac2
2 changed files with 288 additions and 12 deletions
@@ -106,7 +106,11 @@ function deduplicateObservations(
  return scored.slice(0, displayLimit).map(s => s.obs);
 }

-function formatFileTimeline(observations: ObservationRow[], filePath: string): string {
+function formatFileTimeline(
+  observations: ObservationRow[],
+  filePath: string,
+  truncated: boolean
+): string {
  // Escape filePath for safe interpolation into recovery hints (quotes, backslashes, newlines)
  const safePath = filePath.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
  // Group observations by day
@@ -136,9 +140,13 @@ function formatFileTimeline(observations: ObservationRow[], filePath: string): s
  }).toLowerCase().replace(' ', '');
  const currentTimezone = now.toLocaleTimeString('en-US', { timeZoneName: 'short' }).split(' ').pop();

+  const headerLine = truncated
+    ? `This file has prior observations. Only line 1 was read to save tokens.`
+    : `This file has prior observations. The requested section was read normally.`;
+
  const lines: string[] = [
    `Current: ${currentDate} ${currentTime} ${currentTimezone}`,
-    `This file has prior observations. Only line 1 was read to save tokens.`,
+    headerLine,
    `- **Already know enough?** The timeline below may be all you need (semantic priming).`,
    `- **Need details?** get_observations([IDs]) — ~300 tokens each.`,
    `- **Need full file?** Read again with offset/limit for the section you need.`,
@@ -170,16 +178,27 @@ export const fileContextHandler: EventHandler = {
      return { continue: true, suppressOutput: true };
    }

-    // Skip gate for files below the token-economics threshold — timeline (~370 tokens)
-    // costs more than reading small files directly.
+    // Preserve user-supplied offset/limit to avoid read-dedup collisions (fixes #1719)
+    const userOffset = typeof toolInput?.offset === 'number' && Number.isFinite(toolInput.offset) && toolInput.offset >= 0
+      ? Math.floor(toolInput.offset) : undefined;
+    const userLimit = typeof toolInput?.limit === 'number' && Number.isFinite(toolInput.limit) && toolInput.limit > 0
+      ? Math.floor(toolInput.limit) : undefined;
+    const isTargetedRead = userOffset !== undefined || userLimit !== undefined;
+
+    // Stat the file once: size (gate) + mtime (cache invalidation).
+    // 0 = stat failed non-fatally (e.g. EPERM) — skip mtime check, fall through to truncation.
+    let fileMtimeMs = 0;
    try {
      const statPath = path.isAbsolute(filePath)
        ? filePath
        : path.resolve(input.cwd || process.cwd(), filePath);
      const stat = statSync(statPath);
+      // Skip gate for files below the token-economics threshold — timeline (~370 tokens)
+      // costs more than reading small files directly.
      if (stat.size < FILE_READ_GATE_MIN_BYTES) {
        return { continue: true, suppressOutput: true };
      }
+      fileMtimeMs = stat.mtimeMs;
    } catch (err: any) {
      if (err.code === 'ENOENT') return { continue: true, suppressOutput: true };
      // Other errors (symlink, permission denied) — fall through and let gate proceed
@@ -227,25 +246,43 @@ export const fileContextHandler: EventHandler = {
        return { continue: true, suppressOutput: true };
      }

+      // mtime invalidation: bypass truncation when the file is newer than the latest observation.
+      // Uses >= to handle same-millisecond edits (cost: one extra full read vs risk of stuck truncation).
+      if (fileMtimeMs > 0) {
+        const newestObservationMs = Math.max(...data.observations.map(o => o.created_at_epoch));
+        if (fileMtimeMs >= newestObservationMs) {
+          logger.debug('HOOK', 'File modified since last observation, skipping truncation', {
+            filePath: relativePath,
+            fileMtimeMs,
+            newestObservationMs,
+          });
+          return { continue: true, suppressOutput: true };
+        }
+      }
+
      // Deduplicate: one per session, ranked by specificity to this file
      const dedupedObservations = deduplicateObservations(data.observations, relativePath, DISPLAY_LIMIT);
      if (dedupedObservations.length === 0) {
        return { continue: true, suppressOutput: true };
      }

-      // Allow the read with limit: 1 line — just enough for Edit's "file must be read"
-      // check to pass, while keeping token cost near zero. The observation timeline
-      // gives Claude full context about prior work on this file.
-      const timeline = formatFileTimeline(dedupedObservations, filePath);
+      // Unconstrained → truncate to 1 line; targeted → preserve offset/limit.
+      const truncated = !isTargetedRead;
+      const timeline = formatFileTimeline(dedupedObservations, filePath, truncated);
+      const updatedInput: Record<string, unknown> = { file_path: filePath };
+      if (isTargetedRead) {
+        if (userOffset !== undefined) updatedInput.offset = userOffset;
+        if (userLimit !== undefined) updatedInput.limit = userLimit;
+      } else {
+        updatedInput.limit = 1;
+      }
+
      return {
        hookSpecificOutput: {
          hookEventName: 'PreToolUse',
          additionalContext: timeline,
          permissionDecision: 'allow',
-          updatedInput: {
-            file_path: filePath,
-            limit: 1,
-          },
+          updatedInput,
        },
      };
    } catch (error) {