diff --git a/src/sdk/parser.ts b/src/sdk/parser.ts index eca04b92..7dbd8bcd 100644 --- a/src/sdk/parser.ts +++ b/src/sdk/parser.ts @@ -50,9 +50,8 @@ export function parseObservations(text: string, correlationId?: string): ParsedO const files_read = extractArrayElements(obsContent, 'files_read', 'file'); const files_modified = extractArrayElements(obsContent, 'files_modified', 'file'); - // NOTE FROM THEDOTMACK: ALWAYS save observations - never skip. 10/24/2025 - // All fields except type are nullable in schema - // If type is missing or invalid, use first type from mode as fallback + // All fields except type are nullable in schema. + // If type is missing or invalid, use first type from mode as fallback. // Determine final type using active mode's valid types const mode = ModeManager.getInstance().getActiveMode(); @@ -83,6 +82,19 @@ export function parseObservations(text: string, correlationId?: string): ParsedO }); } + // Skip ghost observations — records where every content field is null/empty. + // These accumulate when the LLM emits a bare (or one with only ) + // due to context overflow. They carry no information and pollute the context window. + // (subtitle and file lists are intentionally excluded from this guard: an observation + // with only a subtitle is still too thin to be useful on its own.) + if (!title && !narrative && facts.length === 0 && cleanedConcepts.length === 0) { + logger.warn('PARSER', 'Skipping empty observation (all content fields null)', { + correlationId, + type: finalType + }); + continue; + } + observations.push({ type: finalType, title, diff --git a/tests/sdk/parser.test.ts b/tests/sdk/parser.test.ts new file mode 100644 index 00000000..d43b3cc8 --- /dev/null +++ b/tests/sdk/parser.test.ts @@ -0,0 +1,155 @@ +import { describe, it, expect, mock } from 'bun:test'; + +// Mock ModeManager before importing parser (it's used at module load time) +mock.module('../../src/services/domain/ModeManager.js', () => ({ + ModeManager: { + getInstance: () => ({ + getActiveMode: () => ({ + observation_types: [{ id: 'bugfix' }, { id: 'discovery' }, { id: 'refactor' }], + }), + }), + }, +})); + +import { parseObservations } from '../../src/sdk/parser.js'; + +describe('parseObservations', () => { + it('returns a populated observation when title is present', () => { + const xml = ` + discovery + Found a bug in auth module + The token refresh logic skips expired tokens. + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].title).toBe('Found a bug in auth module'); + expect(result[0].type).toBe('discovery'); + expect(result[0].narrative).toBe('The token refresh logic skips expired tokens.'); + }); + + it('returns a populated observation when only narrative is present (no title)', () => { + const xml = ` + bugfix + Patched the null pointer dereference in session handler. + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].title).toBeNull(); + expect(result[0].narrative).toBe('Patched the null pointer dereference in session handler.'); + }); + + it('returns a populated observation when only facts are present', () => { + const xml = ` + discovery + File limit is hardcoded to 5 + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].facts).toEqual(['File limit is hardcoded to 5']); + }); + + it('returns a populated observation when only concepts are present', () => { + const xml = ` + refactor + dependency-injection + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].concepts).toEqual(['dependency-injection']); + }); + + // Regression test for issue #1625: + // Ghost observations (all content fields null/empty) must be filtered out. + it('filters out ghost observations where all content fields are null (#1625)', () => { + const xml = ` + bugfix + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(0); + }); + + it('filters out ghost observation with empty tags but no text content (#1625)', () => { + const xml = ` + discovery + + + + + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(0); + }); + + it('filters out multiple ghost observations while keeping valid ones (#1625)', () => { + const xml = ` + bugfix + + discovery + Real observation + + refactor + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].title).toBe('Real observation'); + }); + + // Subtitle alone is explicitly excluded from the content guard (see parser comment). + // An observation with only a subtitle is too thin to be useful and must be filtered. + it('filters out observation with only a subtitle (excluded from survival criteria) (#1625)', () => { + const xml = ` + discovery + Only a subtitle, no real content + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(0); + }); + + it('uses first mode type as fallback when type is missing', () => { + const xml = ` + Missing type field + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + // First type in mocked mode is 'bugfix' + expect(result[0].type).toBe('bugfix'); + }); + + it('returns empty array when no observation blocks are present', () => { + const result = parseObservations('Some text without any observations.'); + expect(result).toHaveLength(0); + }); + + it('parses files_read and files_modified arrays correctly', () => { + const xml = ` + bugfix + File read tracking + src/utils.tssrc/parser.ts + src/utils.ts + `; + + const result = parseObservations(xml); + + expect(result).toHaveLength(1); + expect(result[0].files_read).toEqual(['src/utils.ts', 'src/parser.ts']); + expect(result[0].files_modified).toEqual(['src/utils.ts']); + }); +});