Files
claude-mem/tests/worker/search/strategies/chroma-search-strategy.test.ts
T
Abdelkarim Mateos Sanchez 9bd56c993c fix: align IDs with metadatas in ChromaSearchStrategy
ChromaSync.queryChroma() returns deduplicated sqlite_ids but the
metadatas array contains multiple entries per observation (narrative +
facts). The filterByRecency() method was iterating over metadatas and
using the index to access ids, causing array out-of-bounds access.

The fix builds a Map from sqlite_id to metadata, then iterates over
the deduplicated ids array to ensure proper alignment.

Symptoms before fix:
- Semantic search returning incorrect/empty results
- Search only working with near-exact queries
- Recent items (same day) not being found

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 02:07:03 -05:00

387 lines
13 KiB
TypeScript

import { describe, it, expect, mock, beforeEach } from 'bun:test';
import { ChromaSearchStrategy } from '../../../../src/services/worker/search/strategies/ChromaSearchStrategy.js';
import type { StrategySearchOptions, ObservationSearchResult, SessionSummarySearchResult, UserPromptSearchResult } from '../../../../src/services/worker/search/types.js';
// Mock observation data
const mockObservation: ObservationSearchResult = {
id: 1,
memory_session_id: 'session-123',
project: 'test-project',
text: 'Test observation text',
type: 'decision',
title: 'Test Decision',
subtitle: 'A test subtitle',
facts: '["fact1", "fact2"]',
narrative: 'Test narrative',
concepts: '["concept1", "concept2"]',
files_read: '["file1.ts"]',
files_modified: '["file2.ts"]',
prompt_number: 1,
discovery_tokens: 100,
created_at: '2025-01-01T12:00:00.000Z',
created_at_epoch: Date.now() - 1000 * 60 * 60 * 24 // 1 day ago
};
const mockSession: SessionSummarySearchResult = {
id: 2,
memory_session_id: 'session-123',
project: 'test-project',
request: 'Test request',
investigated: 'Test investigated',
learned: 'Test learned',
completed: 'Test completed',
next_steps: 'Test next steps',
files_read: '["file1.ts"]',
files_edited: '["file2.ts"]',
notes: 'Test notes',
prompt_number: 1,
discovery_tokens: 500,
created_at: '2025-01-01T12:00:00.000Z',
created_at_epoch: Date.now() - 1000 * 60 * 60 * 24
};
const mockPrompt: UserPromptSearchResult = {
id: 3,
content_session_id: 'content-session-123',
prompt_number: 1,
prompt_text: 'Test prompt text',
created_at: '2025-01-01T12:00:00.000Z',
created_at_epoch: Date.now() - 1000 * 60 * 60 * 24
};
describe('ChromaSearchStrategy', () => {
let strategy: ChromaSearchStrategy;
let mockChromaSync: any;
let mockSessionStore: any;
beforeEach(() => {
const recentEpoch = Date.now() - 1000 * 60 * 60 * 24; // 1 day ago (within 90-day window)
mockChromaSync = {
queryChroma: mock(() => Promise.resolve({
ids: [1, 2, 3],
distances: [0.1, 0.2, 0.3],
metadatas: [
{ sqlite_id: 1, doc_type: 'observation', created_at_epoch: recentEpoch },
{ sqlite_id: 2, doc_type: 'session_summary', created_at_epoch: recentEpoch },
{ sqlite_id: 3, doc_type: 'user_prompt', created_at_epoch: recentEpoch }
]
}))
};
mockSessionStore = {
getObservationsByIds: mock(() => [mockObservation]),
getSessionSummariesByIds: mock(() => [mockSession]),
getUserPromptsByIds: mock(() => [mockPrompt])
};
strategy = new ChromaSearchStrategy(mockChromaSync, mockSessionStore);
});
describe('canHandle', () => {
it('should return true when query text is present', () => {
const options: StrategySearchOptions = {
query: 'semantic search query'
};
expect(strategy.canHandle(options)).toBe(true);
});
it('should return false for filter-only (no query)', () => {
const options: StrategySearchOptions = {
project: 'test-project'
};
expect(strategy.canHandle(options)).toBe(false);
});
it('should return false when query is empty string', () => {
const options: StrategySearchOptions = {
query: ''
};
expect(strategy.canHandle(options)).toBe(false);
});
it('should return false when query is undefined', () => {
const options: StrategySearchOptions = {};
expect(strategy.canHandle(options)).toBe(false);
});
});
describe('search', () => {
it('should call Chroma with query text', async () => {
const options: StrategySearchOptions = {
query: 'test query',
limit: 10
};
await strategy.search(options);
expect(mockChromaSync.queryChroma).toHaveBeenCalledWith(
'test query',
100, // CHROMA_BATCH_SIZE
undefined // no where filter for 'all'
);
});
it('should return usedChroma: true on success', async () => {
const options: StrategySearchOptions = {
query: 'test query'
};
const result = await strategy.search(options);
expect(result.usedChroma).toBe(true);
expect(result.fellBack).toBe(false);
expect(result.strategy).toBe('chroma');
});
it('should hydrate observations from SQLite', async () => {
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'observations'
};
const result = await strategy.search(options);
expect(mockSessionStore.getObservationsByIds).toHaveBeenCalled();
expect(result.results.observations).toHaveLength(1);
});
it('should hydrate sessions from SQLite', async () => {
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'sessions'
};
await strategy.search(options);
expect(mockSessionStore.getSessionSummariesByIds).toHaveBeenCalled();
});
it('should hydrate prompts from SQLite', async () => {
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'prompts'
};
await strategy.search(options);
expect(mockSessionStore.getUserPromptsByIds).toHaveBeenCalled();
});
it('should filter by doc_type when searchType is observations', async () => {
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'observations'
};
await strategy.search(options);
expect(mockChromaSync.queryChroma).toHaveBeenCalledWith(
'test query',
100,
{ doc_type: 'observation' }
);
});
it('should filter by doc_type when searchType is sessions', async () => {
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'sessions'
};
await strategy.search(options);
expect(mockChromaSync.queryChroma).toHaveBeenCalledWith(
'test query',
100,
{ doc_type: 'session_summary' }
);
});
it('should filter by doc_type when searchType is prompts', async () => {
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'prompts'
};
await strategy.search(options);
expect(mockChromaSync.queryChroma).toHaveBeenCalledWith(
'test query',
100,
{ doc_type: 'user_prompt' }
);
});
it('should return empty result when no query provided', async () => {
const options: StrategySearchOptions = {
query: undefined
};
const result = await strategy.search(options);
expect(result.results.observations).toHaveLength(0);
expect(result.results.sessions).toHaveLength(0);
expect(result.results.prompts).toHaveLength(0);
expect(mockChromaSync.queryChroma).not.toHaveBeenCalled();
});
it('should return empty result when Chroma returns no matches', async () => {
mockChromaSync.queryChroma = mock(() => Promise.resolve({
ids: [],
distances: [],
metadatas: []
}));
const options: StrategySearchOptions = {
query: 'no matches query'
};
const result = await strategy.search(options);
expect(result.results.observations).toHaveLength(0);
expect(result.usedChroma).toBe(true); // Still used Chroma, just no results
});
it('should filter out old results (beyond 90-day window)', async () => {
const oldEpoch = Date.now() - 1000 * 60 * 60 * 24 * 100; // 100 days ago
mockChromaSync.queryChroma = mock(() => Promise.resolve({
ids: [1],
distances: [0.1],
metadatas: [
{ sqlite_id: 1, doc_type: 'observation', created_at_epoch: oldEpoch }
]
}));
const options: StrategySearchOptions = {
query: 'old data query'
};
const result = await strategy.search(options);
// Old results should be filtered out
expect(mockSessionStore.getObservationsByIds).not.toHaveBeenCalled();
});
it('should handle Chroma errors gracefully (returns usedChroma: false)', async () => {
mockChromaSync.queryChroma = mock(() => Promise.reject(new Error('Chroma connection failed')));
const options: StrategySearchOptions = {
query: 'test query'
};
const result = await strategy.search(options);
expect(result.usedChroma).toBe(false);
expect(result.fellBack).toBe(false);
expect(result.results.observations).toHaveLength(0);
expect(result.results.sessions).toHaveLength(0);
expect(result.results.prompts).toHaveLength(0);
});
it('should handle SQLite hydration errors gracefully', async () => {
mockSessionStore.getObservationsByIds = mock(() => {
throw new Error('SQLite error');
});
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'observations'
};
const result = await strategy.search(options);
expect(result.usedChroma).toBe(false); // Error occurred
expect(result.results.observations).toHaveLength(0);
});
it('should correctly align IDs with metadatas when Chroma returns duplicate sqlite_ids (multiple docs per observation)', async () => {
// BUG SCENARIO: One observation (id=100) has 3 documents in Chroma (narrative + 2 facts)
// Another observation (id=200) has 1 document
// Chroma returns 4 metadatas but after deduplication we have 2 unique IDs
// The metadatas MUST be deduplicated/aligned to match the unique IDs
const recentEpoch = Date.now() - 1000 * 60 * 60 * 24; // 1 day ago
mockChromaSync.queryChroma = mock(() => Promise.resolve({
// After deduplication in ChromaSync.queryChroma, ids should be [100, 200]
// But metadatas array has 4 elements - THIS IS THE BUG
ids: [100, 200], // Deduplicated
distances: [0.3, 0.4, 0.5, 0.6], // Original 4 distances
metadatas: [
// Original 4 metadatas - not aligned with deduplicated ids!
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
{ sqlite_id: 200, doc_type: 'observation', created_at_epoch: recentEpoch }
]
}));
// Mock that returns observations when called with correct IDs
const mockObs100 = { ...mockObservation, id: 100 };
const mockObs200 = { ...mockObservation, id: 200, title: 'Second observation' };
mockSessionStore.getObservationsByIds = mock((ids: number[]) => {
// Should receive [100, 200]
return ids.map(id => id === 100 ? mockObs100 : mockObs200);
});
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'observations'
};
const result = await strategy.search(options);
// The strategy should correctly identify BOTH observations
// Before the fix: idx=2 and idx=3 would access ids[2] and ids[3] which are undefined
expect(result.usedChroma).toBe(true);
expect(mockSessionStore.getObservationsByIds).toHaveBeenCalled();
// Verify the correct IDs were passed to SQLite hydration
const calledWith = mockSessionStore.getObservationsByIds.mock.calls[0][0];
expect(calledWith).toContain(100);
expect(calledWith).toContain(200);
expect(calledWith.length).toBe(2); // Should have exactly 2 unique IDs
});
it('should handle misaligned arrays gracefully without undefined access', async () => {
// Edge case: metadatas array longer than ids array
// This simulates the actual bug condition
const recentEpoch = Date.now() - 1000 * 60 * 60 * 24;
mockChromaSync.queryChroma = mock(() => Promise.resolve({
ids: [100], // Only 1 ID after deduplication
distances: [0.3, 0.4, 0.5], // 3 distances
metadatas: [
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch }
] // 3 metadatas for same observation
}));
mockSessionStore.getObservationsByIds = mock(() => [mockObservation]);
const options: StrategySearchOptions = {
query: 'test query',
searchType: 'observations'
};
// Before fix: This would try to access ids[1], ids[2] which are undefined
// causing incorrect filtering or crashes
const result = await strategy.search(options);
expect(result.usedChroma).toBe(true);
// Should still find the one observation correctly
expect(mockSessionStore.getObservationsByIds).toHaveBeenCalled();
const calledWith = mockSessionStore.getObservationsByIds.mock.calls[0][0];
expect(calledWith).toEqual([100]);
});
});
describe('strategy name', () => {
it('should have name "chroma"', () => {
expect(strategy.name).toBe('chroma');
});
});
});