From e39821298358b5e870f675988a1196cd29d4dbe4 Mon Sep 17 00:00:00 2001 From: Ousama Ben Younes Date: Fri, 10 Apr 2026 09:40:40 +0000 Subject: [PATCH] fix: filter ghost observations with no content fields (#1625) When the LLM is overwhelmed by large context it can emit bare blocks (or ones containing only ). These are stored as rows where title, narrative, facts and concepts are all null/empty, appearing as meaningless "Untitled" entries in the context window. Add a guard in parseObservations() that skips any observation where every content field is null/empty before pushing it to the result array. Generated by Claude Code Vibe coded by ousamabenyounes Co-Authored-By: Claude --- src/sdk/parser.ts | 18 ++++- tests/sdk/parser.test.ts | 155 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 3 deletions(-) create mode 100644 tests/sdk/parser.test.ts diff --git a/src/sdk/parser.ts b/src/sdk/parser.ts index d9028dab..4686902c 100644 --- a/src/sdk/parser.ts +++ b/src/sdk/parser.ts @@ -50,9 +50,8 @@ export function parseObservations(text: string, correlationId?: string): ParsedO const files_read = extractArrayElements(obsContent, 'files_read', 'file'); const files_modified = extractArrayElements(obsContent, 'files_modified', 'file'); - // NOTE FROM THEDOTMACK: ALWAYS save observations - never skip. 10/24/2025 - // All fields except type are nullable in schema - // If type is missing or invalid, use first type from mode as fallback + // All fields except type are nullable in schema. + // If type is missing or invalid, use first type from mode as fallback. // Determine final type using active mode's valid types const mode = ModeManager.getInstance().getActiveMode(); @@ -83,6 +82,19 @@ export function parseObservations(text: string, correlationId?: string): ParsedO }); } + // Skip ghost observations — records where every content field is null/empty. + // These accumulate when the LLM emits a bare (or one with only ) + // due to context overflow. They carry no information and pollute the context window. + // (subtitle and file lists are intentionally excluded from this guard: an observation + // with only a subtitle is still too thin to be useful on its own.) + if (!title && !narrative && facts.length === 0 && cleanedConcepts.length === 0) { + logger.warn('PARSER', 'Skipping empty observation (all content fields null)', { + correlationId, + type: finalType + }); + continue; + } + observations.push({ type: finalType, title, diff --git a/tests/sdk/parser.test.ts b/tests/sdk/parser.test.ts new file mode 100644 index 00000000..d43b3cc8 --- /dev/null +++ b/tests/sdk/parser.test.ts @@ -0,0 +1,155 @@ +import { describe, it, expect, mock } from 'bun:test'; + +// Mock ModeManager before importing parser (it's used at module load time) +mock.module('../../src/services/domain/ModeManager.js', () => ({ + ModeManager: { + getInstance: () => ({ + getActiveMode: () => ({ + observation_types: [{ id: 'bugfix' }, { id: 'discovery' }, { id: 'refactor' }], + }), + }), + }, +})); + +import { parseObservations } from '../../src/sdk/parser.js'; + +describe('parseObservations', () => { + it('returns a populated observation when title is present', () => { + const xml = ` + discovery + Found a bug in auth module + The token refresh logic skips expired tokens. + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].title).toBe('Found a bug in auth module'); + expect(result[0].type).toBe('discovery'); + expect(result[0].narrative).toBe('The token refresh logic skips expired tokens.'); + }); + + it('returns a populated observation when only narrative is present (no title)', () => { + const xml = ` + bugfix + Patched the null pointer dereference in session handler. + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].title).toBeNull(); + expect(result[0].narrative).toBe('Patched the null pointer dereference in session handler.'); + }); + + it('returns a populated observation when only facts are present', () => { + const xml = ` + discovery + File limit is hardcoded to 5 + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].facts).toEqual(['File limit is hardcoded to 5']); + }); + + it('returns a populated observation when only concepts are present', () => { + const xml = ` + refactor + dependency-injection + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].concepts).toEqual(['dependency-injection']); + }); + + // Regression test for issue #1625: + // Ghost observations (all content fields null/empty) must be filtered out. + it('filters out ghost observations where all content fields are null (#1625)', () => { + const xml = ` + bugfix + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(0); + }); + + it('filters out ghost observation with empty tags but no text content (#1625)', () => { + const xml = ` + discovery + + + + + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(0); + }); + + it('filters out multiple ghost observations while keeping valid ones (#1625)', () => { + const xml = ` + bugfix + + discovery + Real observation + + refactor + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].title).toBe('Real observation'); + }); + + // Subtitle alone is explicitly excluded from the content guard (see parser comment). + // An observation with only a subtitle is too thin to be useful and must be filtered. + it('filters out observation with only a subtitle (excluded from survival criteria) (#1625)', () => { + const xml = ` + discovery + Only a subtitle, no real content + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(0); + }); + + it('uses first mode type as fallback when type is missing', () => { + const xml = ` + Missing type field + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + // First type in mocked mode is 'bugfix' + expect(result[0].type).toBe('bugfix'); + }); + + it('returns empty array when no observation blocks are present', () => { + const result = parseObservations('Some text without any observations.'); + expect(result).toHaveLength(0); + }); + + it('parses files_read and files_modified arrays correctly', () => { + const xml = ` + bugfix + File read tracking + src/utils.tssrc/parser.ts + src/utils.ts + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].files_read).toEqual(['src/utils.ts', 'src/parser.ts']); + expect(result[0].files_modified).toEqual(['src/utils.ts']); + }); +});