From 2a2008bac210d763fa3f8721100290b358a40cef Mon Sep 17 00:00:00 2001 From: Tran Quang <107824159+quangtran88@users.noreply.github.com> Date: Wed, 15 Apr 2026 14:57:57 +0700 Subject: [PATCH] fix(file-context): preserve targeted reads + invalidate on mtime (#1719) (#1729) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(file-context): preserve targeted reads + invalidate on mtime (#1719) The PreToolUse:Read hook unconditionally rewrote tool input to {file_path, limit:1}, which interacted with two failure modes: 1. Subagent edits a file → parent's next Read still gets truncated because the observation snapshot predates the change. 2. Claude requests a different section with offset/limit → the hook strips them, so the Claude Code harness's read-dedup cache returns "File unchanged" against the prior 1-line read. The file becomes unreadable for the rest of the conversation, even though the hook's own recovery hint says "Read again with offset/limit for the section you need." Two complementary fixes: - **mtime invalidation**: stat the file (we already stat for the size gate) and compare mtimeMs to the newest observation's created_at_epoch. If the file is newer, pass the read through unchanged so fresh content reaches Claude. - **Targeted-read pass-through**: when toolInput already specifies offset and/or limit, preserve them in updatedInput instead of collapsing to {limit:1}. The harness's dedup cache then sees a distinct input and lets the read proceed. The unconstrained-read path (no offset, no limit) is unchanged: still truncated to 1 line plus the observation timeline, so token economics are preserved for the common case. Tests cover all three branches: existing truncation, targeted-read pass-through (offset+limit, limit-only), and mtime-driven bypass. Fixes #1719 * refactor(file-context): address review findings on #1719 fix - Add offset-only test case for full targeted-read branch coverage - Use >= for mtime comparison to handle same-millisecond edge case - Add Number.isFinite() + bounds guards on offset/limit pass-through - Trim over-verbose comments to concise single-line summaries - Remove redundant `as number` casts after typeof narrowing - Add comment explaining fileMtimeMs=0 sentinel invariant --- src/cli/handlers/file-context.ts | 61 ++++++-- tests/hooks/file-context.test.ts | 239 +++++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+), 12 deletions(-) create mode 100644 tests/hooks/file-context.test.ts diff --git a/src/cli/handlers/file-context.ts b/src/cli/handlers/file-context.ts index c53818fd..b0a81c27 100644 --- a/src/cli/handlers/file-context.ts +++ b/src/cli/handlers/file-context.ts @@ -106,7 +106,11 @@ function deduplicateObservations( return scored.slice(0, displayLimit).map(s => s.obs); } -function formatFileTimeline(observations: ObservationRow[], filePath: string): string { +function formatFileTimeline( + observations: ObservationRow[], + filePath: string, + truncated: boolean +): string { // Escape filePath for safe interpolation into recovery hints (quotes, backslashes, newlines) const safePath = filePath.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n'); // Group observations by day @@ -136,9 +140,13 @@ function formatFileTimeline(observations: ObservationRow[], filePath: string): s }).toLowerCase().replace(' ', ''); const currentTimezone = now.toLocaleTimeString('en-US', { timeZoneName: 'short' }).split(' ').pop(); + const headerLine = truncated + ? `This file has prior observations. Only line 1 was read to save tokens.` + : `This file has prior observations. The requested section was read normally.`; + const lines: string[] = [ `Current: ${currentDate} ${currentTime} ${currentTimezone}`, - `This file has prior observations. Only line 1 was read to save tokens.`, + headerLine, `- **Already know enough?** The timeline below may be all you need (semantic priming).`, `- **Need details?** get_observations([IDs]) — ~300 tokens each.`, `- **Need full file?** Read again with offset/limit for the section you need.`, @@ -170,16 +178,27 @@ export const fileContextHandler: EventHandler = { return { continue: true, suppressOutput: true }; } - // Skip gate for files below the token-economics threshold — timeline (~370 tokens) - // costs more than reading small files directly. + // Preserve user-supplied offset/limit to avoid read-dedup collisions (fixes #1719) + const userOffset = typeof toolInput?.offset === 'number' && Number.isFinite(toolInput.offset) && toolInput.offset >= 0 + ? Math.floor(toolInput.offset) : undefined; + const userLimit = typeof toolInput?.limit === 'number' && Number.isFinite(toolInput.limit) && toolInput.limit > 0 + ? Math.floor(toolInput.limit) : undefined; + const isTargetedRead = userOffset !== undefined || userLimit !== undefined; + + // Stat the file once: size (gate) + mtime (cache invalidation). + // 0 = stat failed non-fatally (e.g. EPERM) — skip mtime check, fall through to truncation. + let fileMtimeMs = 0; try { const statPath = path.isAbsolute(filePath) ? filePath : path.resolve(input.cwd || process.cwd(), filePath); const stat = statSync(statPath); + // Skip gate for files below the token-economics threshold — timeline (~370 tokens) + // costs more than reading small files directly. if (stat.size < FILE_READ_GATE_MIN_BYTES) { return { continue: true, suppressOutput: true }; } + fileMtimeMs = stat.mtimeMs; } catch (err: any) { if (err.code === 'ENOENT') return { continue: true, suppressOutput: true }; // Other errors (symlink, permission denied) — fall through and let gate proceed @@ -227,25 +246,43 @@ export const fileContextHandler: EventHandler = { return { continue: true, suppressOutput: true }; } + // mtime invalidation: bypass truncation when the file is newer than the latest observation. + // Uses >= to handle same-millisecond edits (cost: one extra full read vs risk of stuck truncation). + if (fileMtimeMs > 0) { + const newestObservationMs = Math.max(...data.observations.map(o => o.created_at_epoch)); + if (fileMtimeMs >= newestObservationMs) { + logger.debug('HOOK', 'File modified since last observation, skipping truncation', { + filePath: relativePath, + fileMtimeMs, + newestObservationMs, + }); + return { continue: true, suppressOutput: true }; + } + } + // Deduplicate: one per session, ranked by specificity to this file const dedupedObservations = deduplicateObservations(data.observations, relativePath, DISPLAY_LIMIT); if (dedupedObservations.length === 0) { return { continue: true, suppressOutput: true }; } - // Allow the read with limit: 1 line — just enough for Edit's "file must be read" - // check to pass, while keeping token cost near zero. The observation timeline - // gives Claude full context about prior work on this file. - const timeline = formatFileTimeline(dedupedObservations, filePath); + // Unconstrained → truncate to 1 line; targeted → preserve offset/limit. + const truncated = !isTargetedRead; + const timeline = formatFileTimeline(dedupedObservations, filePath, truncated); + const updatedInput: Record = { file_path: filePath }; + if (isTargetedRead) { + if (userOffset !== undefined) updatedInput.offset = userOffset; + if (userLimit !== undefined) updatedInput.limit = userLimit; + } else { + updatedInput.limit = 1; + } + return { hookSpecificOutput: { hookEventName: 'PreToolUse', additionalContext: timeline, permissionDecision: 'allow', - updatedInput: { - file_path: filePath, - limit: 1, - }, + updatedInput, }, }; } catch (error) { diff --git a/tests/hooks/file-context.test.ts b/tests/hooks/file-context.test.ts new file mode 100644 index 00000000..7c42cf5f --- /dev/null +++ b/tests/hooks/file-context.test.ts @@ -0,0 +1,239 @@ +// Tests for file-context cache validation fix (#1719) +import { describe, it, expect, beforeEach, afterEach, spyOn, mock } from 'bun:test'; +import { mkdtempSync, writeFileSync, utimesSync, rmSync } from 'fs'; +import { tmpdir, homedir } from 'os'; +import { join } from 'path'; + +// Mock modules that cause import chain issues — MUST be before handler imports +mock.module('../../src/shared/SettingsDefaultsManager.js', () => ({ + SettingsDefaultsManager: { + get: (key: string) => { + if (key === 'CLAUDE_MEM_DATA_DIR') return join(homedir(), '.claude-mem'); + return ''; + }, + getInt: () => 0, + loadFromFile: () => ({ CLAUDE_MEM_EXCLUDED_PROJECTS: [] }), + }, +})); + +mock.module('../../src/shared/worker-utils.js', () => ({ + ensureWorkerRunning: () => Promise.resolve(true), + getWorkerPort: () => 37777, + workerHttpRequest: (apiPath: string, options?: any) => { + const url = `http://127.0.0.1:37777${apiPath}`; + return globalThis.fetch(url, { + method: options?.method ?? 'GET', + headers: options?.headers, + body: options?.body, + }); + }, +})); + +mock.module('../../src/utils/project-name.js', () => ({ + getProjectName: () => 'test-project', + getProjectContext: () => ({ allProjects: ['test-project'] }), +})); + +mock.module('../../src/utils/project-filter.js', () => ({ + isProjectExcluded: () => false, +})); + +// Import after mocks +import { fileContextHandler } from '../../src/cli/handlers/file-context.js'; +import { logger } from '../../src/utils/logger.js'; + +const PADDING = 'x'.repeat(2_000); // ensures file > FILE_READ_GATE_MIN_BYTES (1500) + +let tmpDir: string; +let testFile: string; +let loggerSpies: ReturnType[] = []; +let fetchSpy: ReturnType | null = null; + +function makeObservationsResponse(observations: Array<{ id: number; created_at_epoch: number; type?: string; title?: string }>) { + return new Response( + JSON.stringify({ + observations: observations.map(o => ({ + id: o.id, + memory_session_id: `session-${o.id}`, + title: o.title ?? `Observation ${o.id}`, + type: o.type ?? 'discovery', + created_at_epoch: o.created_at_epoch, + files_read: JSON.stringify([]), + files_modified: JSON.stringify(['test.md']), + })), + count: observations.length, + }), + { status: 200, headers: { 'Content-Type': 'application/json' } } + ); +} + +beforeEach(() => { + tmpDir = mkdtempSync(join(tmpdir(), 'file-context-test-')); + testFile = join(tmpDir, 'test.md'); + writeFileSync(testFile, PADDING); + + loggerSpies = [ + spyOn(logger, 'info').mockImplementation(() => {}), + spyOn(logger, 'debug').mockImplementation(() => {}), + spyOn(logger, 'warn').mockImplementation(() => {}), + spyOn(logger, 'error').mockImplementation(() => {}), + ]; +}); + +afterEach(() => { + loggerSpies.forEach(s => s.mockRestore()); + if (fetchSpy) { + fetchSpy.mockRestore(); + fetchSpy = null; + } + try { rmSync(tmpDir, { recursive: true, force: true }); } catch {} +}); + +describe('fileContextHandler — cache validation fix (#1719)', () => { + it('truncates to limit:1 for an unconstrained Read (existing behavior)', async () => { + // File mtime is "now" (just written). Make observations newer to avoid mtime bypass. + const future = Date.now() + 60_000; + fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue( + makeObservationsResponse([{ id: 1, created_at_epoch: future }]) + ); + + const result = await fileContextHandler.execute({ + sessionId: 'sess', + cwd: tmpDir, + toolName: 'Read', + toolInput: { file_path: testFile }, + }); + + expect(result.hookSpecificOutput).toBeDefined(); + expect(result.hookSpecificOutput!.updatedInput).toEqual({ + file_path: testFile, + limit: 1, + }); + }); + + it('preserves user-supplied offset/limit on a targeted Read (#1719 fix)', async () => { + const future = Date.now() + 60_000; + fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue( + makeObservationsResponse([{ id: 1, created_at_epoch: future }]) + ); + + const result = await fileContextHandler.execute({ + sessionId: 'sess', + cwd: tmpDir, + toolName: 'Read', + toolInput: { file_path: testFile, offset: 289, limit: 140 }, + }); + + expect(result.hookSpecificOutput).toBeDefined(); + expect(result.hookSpecificOutput!.updatedInput).toEqual({ + file_path: testFile, + offset: 289, + limit: 140, + }); + }); + + it('preserves user-supplied offset only', async () => { + const future = Date.now() + 60_000; + fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue( + makeObservationsResponse([{ id: 1, created_at_epoch: future }]) + ); + + const result = await fileContextHandler.execute({ + sessionId: 'sess', + cwd: tmpDir, + toolName: 'Read', + toolInput: { file_path: testFile, offset: 100 }, + }); + + expect(result.hookSpecificOutput!.updatedInput).toEqual({ + file_path: testFile, + offset: 100, + }); + expect((result.hookSpecificOutput!.updatedInput as any).limit).toBeUndefined(); + }); + + it('preserves user-supplied limit only', async () => { + const future = Date.now() + 60_000; + fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue( + makeObservationsResponse([{ id: 1, created_at_epoch: future }]) + ); + + const result = await fileContextHandler.execute({ + sessionId: 'sess', + cwd: tmpDir, + toolName: 'Read', + toolInput: { file_path: testFile, limit: 50 }, + }); + + expect(result.hookSpecificOutput!.updatedInput).toEqual({ + file_path: testFile, + limit: 50, + }); + // offset must NOT be present + expect((result.hookSpecificOutput!.updatedInput as any).offset).toBeUndefined(); + }); + + it('bypasses truncation when file mtime is newer than newest observation (#1719 fix)', async () => { + // Backdate observations 1 hour into the past so the just-written file is newer. + const stale = Date.now() - 3_600_000; + fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue( + makeObservationsResponse([ + { id: 1, created_at_epoch: stale }, + { id: 2, created_at_epoch: stale - 1000 }, + ]) + ); + + const result = await fileContextHandler.execute({ + sessionId: 'sess', + cwd: tmpDir, + toolName: 'Read', + toolInput: { file_path: testFile }, + }); + + // Pass-through: no hookSpecificOutput, no updatedInput rewrite + expect(result.continue).toBe(true); + expect(result.hookSpecificOutput).toBeUndefined(); + }); + + it('still truncates when file mtime is older than newest observation', async () => { + // Backdate the file by 1 hour, observations stamped "now" + const past = (Date.now() - 3_600_000) / 1000; + utimesSync(testFile, past, past); + + const now = Date.now(); + fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue( + makeObservationsResponse([{ id: 1, created_at_epoch: now }]) + ); + + const result = await fileContextHandler.execute({ + sessionId: 'sess', + cwd: tmpDir, + toolName: 'Read', + toolInput: { file_path: testFile }, + }); + + expect(result.hookSpecificOutput).toBeDefined(); + expect(result.hookSpecificOutput!.updatedInput).toEqual({ + file_path: testFile, + limit: 1, + }); + }); + + it('targeted-read header line reflects that the section was read normally', async () => { + const future = Date.now() + 60_000; + fetchSpy = spyOn(globalThis, 'fetch').mockResolvedValue( + makeObservationsResponse([{ id: 1, created_at_epoch: future }]) + ); + + const result = await fileContextHandler.execute({ + sessionId: 'sess', + cwd: tmpDir, + toolName: 'Read', + toolInput: { file_path: testFile, offset: 10, limit: 20 }, + }); + + const ctx = result.hookSpecificOutput!.additionalContext; + expect(ctx).toContain('The requested section was read normally'); + expect(ctx).not.toContain('Only line 1 was read'); + }); +});