fix: include project in ChromaDB where clause for vector search (#1112)

When searching with a project parameter, the ChromaDB vector query was
not filtering by project. It only filtered by doc_type. This caused
larger projects to dominate the top-N results returned by ChromaDB,
effectively crowding out results from smaller projects before the
post-hoc SQLite project filter could take effect.

For example, with project A having 19,000 embeddings and project B
having 700, a search scoped to project B would return mostly project A
results from ChromaDB. After SQLite filtered by project, only 1-3
results from B would survive instead of the expected 20+.

The fix adds the project to the ChromaDB where clause using $and when
both doc_type and project filters are needed. This is applied in both
ChromaSearchStrategy.buildWhereFilter() and SearchManager.search().

Co-authored-by: TARS <tars@openclaw.local>
This commit is contained in:
TerrifiedBug
2026-02-16 05:30:29 +00:00
committed by GitHub
parent cef15011c2
commit 0a40c4c596
3 changed files with 85 additions and 10 deletions
+12 -2
View File
@@ -154,7 +154,7 @@ export class SearchManager {
let chromaSucceeded = false;
logger.debug('SEARCH', 'Using ChromaDB semantic search', { typeFilter: type || 'all' });
// Build Chroma where filter for doc_type
// Build Chroma where filter for doc_type and project
let whereFilter: Record<string, any> | undefined;
if (type === 'observations') {
whereFilter = { doc_type: 'observation' };
@@ -164,7 +164,17 @@ export class SearchManager {
whereFilter = { doc_type: 'user_prompt' };
}
// Step 1: Chroma semantic search with optional type filter
// Include project in the Chroma where clause to scope vector search.
// Without this, larger projects dominate the top-N results and smaller
// projects get crowded out before the post-hoc SQLite filter.
if (options.project) {
const projectFilter = { project: options.project };
whereFilter = whereFilter
? { $and: [whereFilter, projectFilter] }
: projectFilter;
}
// Step 1: Chroma semantic search with optional type + project filter
const chromaResults = await this.queryChroma(query, 100, whereFilter);
chromaSucceeded = true; // Chroma didn't throw error
logger.debug('SEARCH', 'ChromaDB returned semantic matches', { matchCount: chromaResults.ids.length });
@@ -64,8 +64,8 @@ export class ChromaSearchStrategy extends BaseSearchStrategy implements SearchSt
let prompts: UserPromptSearchResult[] = [];
try {
// Build Chroma where filter for doc_type
const whereFilter = this.buildWhereFilter(searchType);
// Build Chroma where filter for doc_type and project
const whereFilter = this.buildWhereFilter(searchType, project);
// Step 1: Chroma semantic search
logger.debug('SEARCH', 'ChromaSearchStrategy: Querying Chroma', { query, searchType });
@@ -150,19 +150,38 @@ export class ChromaSearchStrategy extends BaseSearchStrategy implements SearchSt
}
/**
* Build Chroma where filter for document type
* Build Chroma where filter for document type and project
*
* When a project is specified, includes it in the ChromaDB where clause
* so that vector search is scoped to the target project. Without this,
* larger projects dominate the top-N results and smaller projects get
* crowded out before the post-hoc SQLite project filter can take effect.
*/
private buildWhereFilter(searchType: string): Record<string, any> | undefined {
private buildWhereFilter(searchType: string, project?: string): Record<string, any> | undefined {
let docTypeFilter: Record<string, any> | undefined;
switch (searchType) {
case 'observations':
return { doc_type: 'observation' };
docTypeFilter = { doc_type: 'observation' };
break;
case 'sessions':
return { doc_type: 'session_summary' };
docTypeFilter = { doc_type: 'session_summary' };
break;
case 'prompts':
return { doc_type: 'user_prompt' };
docTypeFilter = { doc_type: 'user_prompt' };
break;
default:
return undefined;
docTypeFilter = undefined;
}
if (project) {
const projectFilter = { project };
if (docTypeFilter) {
return { $and: [docTypeFilter, projectFilter] };
}
return projectFilter;
}
return docTypeFilter;
}
/**