From 0a40c4c596ac2af1d5e84317e13ffca8b0776b29 Mon Sep 17 00:00:00 2001 From: TerrifiedBug <35064668+TerrifiedBug@users.noreply.github.com> Date: Mon, 16 Feb 2026 05:30:29 +0000 Subject: [PATCH] fix: include project in ChromaDB where clause for vector search (#1112) When searching with a project parameter, the ChromaDB vector query was not filtering by project. It only filtered by doc_type. This caused larger projects to dominate the top-N results returned by ChromaDB, effectively crowding out results from smaller projects before the post-hoc SQLite project filter could take effect. For example, with project A having 19,000 embeddings and project B having 700, a search scoped to project B would return mostly project A results from ChromaDB. After SQLite filtered by project, only 1-3 results from B would survive instead of the expected 20+. The fix adds the project to the ChromaDB where clause using $and when both doc_type and project filters are needed. This is applied in both ChromaSearchStrategy.buildWhereFilter() and SearchManager.search(). Co-authored-by: TARS --- src/services/worker/SearchManager.ts | 14 +++++- .../search/strategies/ChromaSearchStrategy.ts | 35 ++++++++++---- .../strategies/chroma-search-strategy.test.ts | 46 +++++++++++++++++++ 3 files changed, 85 insertions(+), 10 deletions(-) diff --git a/src/services/worker/SearchManager.ts b/src/services/worker/SearchManager.ts index 243cd4a4..cdc7bc93 100644 --- a/src/services/worker/SearchManager.ts +++ b/src/services/worker/SearchManager.ts @@ -154,7 +154,7 @@ export class SearchManager { let chromaSucceeded = false; logger.debug('SEARCH', 'Using ChromaDB semantic search', { typeFilter: type || 'all' }); - // Build Chroma where filter for doc_type + // Build Chroma where filter for doc_type and project let whereFilter: Record | undefined; if (type === 'observations') { whereFilter = { doc_type: 'observation' }; @@ -164,7 +164,17 @@ export class SearchManager { whereFilter = { doc_type: 'user_prompt' }; } - // Step 1: Chroma semantic search with optional type filter + // Include project in the Chroma where clause to scope vector search. + // Without this, larger projects dominate the top-N results and smaller + // projects get crowded out before the post-hoc SQLite filter. + if (options.project) { + const projectFilter = { project: options.project }; + whereFilter = whereFilter + ? { $and: [whereFilter, projectFilter] } + : projectFilter; + } + + // Step 1: Chroma semantic search with optional type + project filter const chromaResults = await this.queryChroma(query, 100, whereFilter); chromaSucceeded = true; // Chroma didn't throw error logger.debug('SEARCH', 'ChromaDB returned semantic matches', { matchCount: chromaResults.ids.length }); diff --git a/src/services/worker/search/strategies/ChromaSearchStrategy.ts b/src/services/worker/search/strategies/ChromaSearchStrategy.ts index 633032c0..fc7d4188 100644 --- a/src/services/worker/search/strategies/ChromaSearchStrategy.ts +++ b/src/services/worker/search/strategies/ChromaSearchStrategy.ts @@ -64,8 +64,8 @@ export class ChromaSearchStrategy extends BaseSearchStrategy implements SearchSt let prompts: UserPromptSearchResult[] = []; try { - // Build Chroma where filter for doc_type - const whereFilter = this.buildWhereFilter(searchType); + // Build Chroma where filter for doc_type and project + const whereFilter = this.buildWhereFilter(searchType, project); // Step 1: Chroma semantic search logger.debug('SEARCH', 'ChromaSearchStrategy: Querying Chroma', { query, searchType }); @@ -150,19 +150,38 @@ export class ChromaSearchStrategy extends BaseSearchStrategy implements SearchSt } /** - * Build Chroma where filter for document type + * Build Chroma where filter for document type and project + * + * When a project is specified, includes it in the ChromaDB where clause + * so that vector search is scoped to the target project. Without this, + * larger projects dominate the top-N results and smaller projects get + * crowded out before the post-hoc SQLite project filter can take effect. */ - private buildWhereFilter(searchType: string): Record | undefined { + private buildWhereFilter(searchType: string, project?: string): Record | undefined { + let docTypeFilter: Record | undefined; switch (searchType) { case 'observations': - return { doc_type: 'observation' }; + docTypeFilter = { doc_type: 'observation' }; + break; case 'sessions': - return { doc_type: 'session_summary' }; + docTypeFilter = { doc_type: 'session_summary' }; + break; case 'prompts': - return { doc_type: 'user_prompt' }; + docTypeFilter = { doc_type: 'user_prompt' }; + break; default: - return undefined; + docTypeFilter = undefined; } + + if (project) { + const projectFilter = { project }; + if (docTypeFilter) { + return { $and: [docTypeFilter, projectFilter] }; + } + return projectFilter; + } + + return docTypeFilter; } /** diff --git a/tests/worker/search/strategies/chroma-search-strategy.test.ts b/tests/worker/search/strategies/chroma-search-strategy.test.ts index 09a8078c..d99a13ae 100644 --- a/tests/worker/search/strategies/chroma-search-strategy.test.ts +++ b/tests/worker/search/strategies/chroma-search-strategy.test.ts @@ -213,6 +213,52 @@ describe('ChromaSearchStrategy', () => { ); }); + it('should include project in Chroma where clause when specified', async () => { + const options: StrategySearchOptions = { + query: 'test query', + project: 'my-project' + }; + + await strategy.search(options); + + expect(mockChromaSync.queryChroma).toHaveBeenCalledWith( + 'test query', + 100, + { project: 'my-project' } + ); + }); + + it('should combine doc_type and project with $and when both specified', async () => { + const options: StrategySearchOptions = { + query: 'test query', + searchType: 'observations', + project: 'my-project' + }; + + await strategy.search(options); + + expect(mockChromaSync.queryChroma).toHaveBeenCalledWith( + 'test query', + 100, + { $and: [{ doc_type: 'observation' }, { project: 'my-project' }] } + ); + }); + + it('should not include project filter when project is not specified', async () => { + const options: StrategySearchOptions = { + query: 'test query', + searchType: 'observations' + }; + + await strategy.search(options); + + expect(mockChromaSync.queryChroma).toHaveBeenCalledWith( + 'test query', + 100, + { doc_type: 'observation' } + ); + }); + it('should return empty result when no query provided', async () => { const options: StrategySearchOptions = { query: undefined