fix: align IDs with metadatas in ChromaSearchStrategy
ChromaSync.queryChroma() returns deduplicated sqlite_ids but the metadatas array contains multiple entries per observation (narrative + facts). The filterByRecency() method was iterating over metadatas and using the index to access ids, causing array out-of-bounds access. The fix builds a Map from sqlite_id to metadata, then iterates over the deduplicated ids array to ensure proper alignment. Symptoms before fix: - Semantic search returning incorrect/empty results - Search only working with near-exact queries - Recent items (same day) not being found Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Alex Newman
parent
f62b5d248c
commit
9bd56c993c
@@ -167,6 +167,13 @@ export class ChromaSearchStrategy extends BaseSearchStrategy implements SearchSt
|
||||
|
||||
/**
|
||||
* Filter results by recency (90-day window)
|
||||
*
|
||||
* IMPORTANT: ChromaSync.queryChroma() returns deduplicated `ids` (unique sqlite_ids)
|
||||
* but the `metadatas` array may contain multiple entries per sqlite_id (e.g., one
|
||||
* observation can have narrative + multiple facts as separate Chroma documents).
|
||||
*
|
||||
* This method iterates over the deduplicated `ids` and finds the first matching
|
||||
* metadata for each ID to avoid array misalignment issues.
|
||||
*/
|
||||
private filterByRecency(chromaResults: {
|
||||
ids: number[];
|
||||
@@ -174,10 +181,19 @@ export class ChromaSearchStrategy extends BaseSearchStrategy implements SearchSt
|
||||
}): Array<{ id: number; meta: ChromaMetadata }> {
|
||||
const cutoff = Date.now() - SEARCH_CONSTANTS.RECENCY_WINDOW_MS;
|
||||
|
||||
return chromaResults.metadatas
|
||||
.map((meta, idx) => ({
|
||||
id: chromaResults.ids[idx],
|
||||
meta
|
||||
// Build a map from sqlite_id to first metadata for efficient lookup
|
||||
const metadataByIdMap = new Map<number, ChromaMetadata>();
|
||||
for (const meta of chromaResults.metadatas) {
|
||||
if (meta?.sqlite_id !== undefined && !metadataByIdMap.has(meta.sqlite_id)) {
|
||||
metadataByIdMap.set(meta.sqlite_id, meta);
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate over deduplicated ids and get corresponding metadata
|
||||
return chromaResults.ids
|
||||
.map(id => ({
|
||||
id,
|
||||
meta: metadataByIdMap.get(id) as ChromaMetadata
|
||||
}))
|
||||
.filter(item => item.meta && item.meta.created_at_epoch > cutoff);
|
||||
}
|
||||
|
||||
@@ -295,6 +295,87 @@ describe('ChromaSearchStrategy', () => {
|
||||
expect(result.usedChroma).toBe(false); // Error occurred
|
||||
expect(result.results.observations).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('should correctly align IDs with metadatas when Chroma returns duplicate sqlite_ids (multiple docs per observation)', async () => {
|
||||
// BUG SCENARIO: One observation (id=100) has 3 documents in Chroma (narrative + 2 facts)
|
||||
// Another observation (id=200) has 1 document
|
||||
// Chroma returns 4 metadatas but after deduplication we have 2 unique IDs
|
||||
// The metadatas MUST be deduplicated/aligned to match the unique IDs
|
||||
const recentEpoch = Date.now() - 1000 * 60 * 60 * 24; // 1 day ago
|
||||
|
||||
mockChromaSync.queryChroma = mock(() => Promise.resolve({
|
||||
// After deduplication in ChromaSync.queryChroma, ids should be [100, 200]
|
||||
// But metadatas array has 4 elements - THIS IS THE BUG
|
||||
ids: [100, 200], // Deduplicated
|
||||
distances: [0.3, 0.4, 0.5, 0.6], // Original 4 distances
|
||||
metadatas: [
|
||||
// Original 4 metadatas - not aligned with deduplicated ids!
|
||||
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
|
||||
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
|
||||
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
|
||||
{ sqlite_id: 200, doc_type: 'observation', created_at_epoch: recentEpoch }
|
||||
]
|
||||
}));
|
||||
|
||||
// Mock that returns observations when called with correct IDs
|
||||
const mockObs100 = { ...mockObservation, id: 100 };
|
||||
const mockObs200 = { ...mockObservation, id: 200, title: 'Second observation' };
|
||||
mockSessionStore.getObservationsByIds = mock((ids: number[]) => {
|
||||
// Should receive [100, 200]
|
||||
return ids.map(id => id === 100 ? mockObs100 : mockObs200);
|
||||
});
|
||||
|
||||
const options: StrategySearchOptions = {
|
||||
query: 'test query',
|
||||
searchType: 'observations'
|
||||
};
|
||||
|
||||
const result = await strategy.search(options);
|
||||
|
||||
// The strategy should correctly identify BOTH observations
|
||||
// Before the fix: idx=2 and idx=3 would access ids[2] and ids[3] which are undefined
|
||||
expect(result.usedChroma).toBe(true);
|
||||
expect(mockSessionStore.getObservationsByIds).toHaveBeenCalled();
|
||||
|
||||
// Verify the correct IDs were passed to SQLite hydration
|
||||
const calledWith = mockSessionStore.getObservationsByIds.mock.calls[0][0];
|
||||
expect(calledWith).toContain(100);
|
||||
expect(calledWith).toContain(200);
|
||||
expect(calledWith.length).toBe(2); // Should have exactly 2 unique IDs
|
||||
});
|
||||
|
||||
it('should handle misaligned arrays gracefully without undefined access', async () => {
|
||||
// Edge case: metadatas array longer than ids array
|
||||
// This simulates the actual bug condition
|
||||
const recentEpoch = Date.now() - 1000 * 60 * 60 * 24;
|
||||
|
||||
mockChromaSync.queryChroma = mock(() => Promise.resolve({
|
||||
ids: [100], // Only 1 ID after deduplication
|
||||
distances: [0.3, 0.4, 0.5], // 3 distances
|
||||
metadatas: [
|
||||
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
|
||||
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch },
|
||||
{ sqlite_id: 100, doc_type: 'observation', created_at_epoch: recentEpoch }
|
||||
] // 3 metadatas for same observation
|
||||
}));
|
||||
|
||||
mockSessionStore.getObservationsByIds = mock(() => [mockObservation]);
|
||||
|
||||
const options: StrategySearchOptions = {
|
||||
query: 'test query',
|
||||
searchType: 'observations'
|
||||
};
|
||||
|
||||
// Before fix: This would try to access ids[1], ids[2] which are undefined
|
||||
// causing incorrect filtering or crashes
|
||||
const result = await strategy.search(options);
|
||||
|
||||
expect(result.usedChroma).toBe(true);
|
||||
// Should still find the one observation correctly
|
||||
expect(mockSessionStore.getObservationsByIds).toHaveBeenCalled();
|
||||
const calledWith = mockSessionStore.getObservationsByIds.mock.calls[0][0];
|
||||
expect(calledWith).toEqual([100]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('strategy name', () => {
|
||||
|
||||
Reference in New Issue
Block a user