fix: include project in ChromaDB where clause for vector search (#1112)
When searching with a project parameter, the ChromaDB vector query was not filtering by project. It only filtered by doc_type. This caused larger projects to dominate the top-N results returned by ChromaDB, effectively crowding out results from smaller projects before the post-hoc SQLite project filter could take effect. For example, with project A having 19,000 embeddings and project B having 700, a search scoped to project B would return mostly project A results from ChromaDB. After SQLite filtered by project, only 1-3 results from B would survive instead of the expected 20+. The fix adds the project to the ChromaDB where clause using $and when both doc_type and project filters are needed. This is applied in both ChromaSearchStrategy.buildWhereFilter() and SearchManager.search(). Co-authored-by: TARS <tars@openclaw.local>
This commit is contained in:
@@ -154,7 +154,7 @@ export class SearchManager {
|
|||||||
let chromaSucceeded = false;
|
let chromaSucceeded = false;
|
||||||
logger.debug('SEARCH', 'Using ChromaDB semantic search', { typeFilter: type || 'all' });
|
logger.debug('SEARCH', 'Using ChromaDB semantic search', { typeFilter: type || 'all' });
|
||||||
|
|
||||||
// Build Chroma where filter for doc_type
|
// Build Chroma where filter for doc_type and project
|
||||||
let whereFilter: Record<string, any> | undefined;
|
let whereFilter: Record<string, any> | undefined;
|
||||||
if (type === 'observations') {
|
if (type === 'observations') {
|
||||||
whereFilter = { doc_type: 'observation' };
|
whereFilter = { doc_type: 'observation' };
|
||||||
@@ -164,7 +164,17 @@ export class SearchManager {
|
|||||||
whereFilter = { doc_type: 'user_prompt' };
|
whereFilter = { doc_type: 'user_prompt' };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 1: Chroma semantic search with optional type filter
|
// Include project in the Chroma where clause to scope vector search.
|
||||||
|
// Without this, larger projects dominate the top-N results and smaller
|
||||||
|
// projects get crowded out before the post-hoc SQLite filter.
|
||||||
|
if (options.project) {
|
||||||
|
const projectFilter = { project: options.project };
|
||||||
|
whereFilter = whereFilter
|
||||||
|
? { $and: [whereFilter, projectFilter] }
|
||||||
|
: projectFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: Chroma semantic search with optional type + project filter
|
||||||
const chromaResults = await this.queryChroma(query, 100, whereFilter);
|
const chromaResults = await this.queryChroma(query, 100, whereFilter);
|
||||||
chromaSucceeded = true; // Chroma didn't throw error
|
chromaSucceeded = true; // Chroma didn't throw error
|
||||||
logger.debug('SEARCH', 'ChromaDB returned semantic matches', { matchCount: chromaResults.ids.length });
|
logger.debug('SEARCH', 'ChromaDB returned semantic matches', { matchCount: chromaResults.ids.length });
|
||||||
|
|||||||
@@ -64,8 +64,8 @@ export class ChromaSearchStrategy extends BaseSearchStrategy implements SearchSt
|
|||||||
let prompts: UserPromptSearchResult[] = [];
|
let prompts: UserPromptSearchResult[] = [];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Build Chroma where filter for doc_type
|
// Build Chroma where filter for doc_type and project
|
||||||
const whereFilter = this.buildWhereFilter(searchType);
|
const whereFilter = this.buildWhereFilter(searchType, project);
|
||||||
|
|
||||||
// Step 1: Chroma semantic search
|
// Step 1: Chroma semantic search
|
||||||
logger.debug('SEARCH', 'ChromaSearchStrategy: Querying Chroma', { query, searchType });
|
logger.debug('SEARCH', 'ChromaSearchStrategy: Querying Chroma', { query, searchType });
|
||||||
@@ -150,19 +150,38 @@ export class ChromaSearchStrategy extends BaseSearchStrategy implements SearchSt
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build Chroma where filter for document type
|
* Build Chroma where filter for document type and project
|
||||||
|
*
|
||||||
|
* When a project is specified, includes it in the ChromaDB where clause
|
||||||
|
* so that vector search is scoped to the target project. Without this,
|
||||||
|
* larger projects dominate the top-N results and smaller projects get
|
||||||
|
* crowded out before the post-hoc SQLite project filter can take effect.
|
||||||
*/
|
*/
|
||||||
private buildWhereFilter(searchType: string): Record<string, any> | undefined {
|
private buildWhereFilter(searchType: string, project?: string): Record<string, any> | undefined {
|
||||||
|
let docTypeFilter: Record<string, any> | undefined;
|
||||||
switch (searchType) {
|
switch (searchType) {
|
||||||
case 'observations':
|
case 'observations':
|
||||||
return { doc_type: 'observation' };
|
docTypeFilter = { doc_type: 'observation' };
|
||||||
|
break;
|
||||||
case 'sessions':
|
case 'sessions':
|
||||||
return { doc_type: 'session_summary' };
|
docTypeFilter = { doc_type: 'session_summary' };
|
||||||
|
break;
|
||||||
case 'prompts':
|
case 'prompts':
|
||||||
return { doc_type: 'user_prompt' };
|
docTypeFilter = { doc_type: 'user_prompt' };
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return undefined;
|
docTypeFilter = undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (project) {
|
||||||
|
const projectFilter = { project };
|
||||||
|
if (docTypeFilter) {
|
||||||
|
return { $and: [docTypeFilter, projectFilter] };
|
||||||
|
}
|
||||||
|
return projectFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
return docTypeFilter;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -213,6 +213,52 @@ describe('ChromaSearchStrategy', () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should include project in Chroma where clause when specified', async () => {
|
||||||
|
const options: StrategySearchOptions = {
|
||||||
|
query: 'test query',
|
||||||
|
project: 'my-project'
|
||||||
|
};
|
||||||
|
|
||||||
|
await strategy.search(options);
|
||||||
|
|
||||||
|
expect(mockChromaSync.queryChroma).toHaveBeenCalledWith(
|
||||||
|
'test query',
|
||||||
|
100,
|
||||||
|
{ project: 'my-project' }
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should combine doc_type and project with $and when both specified', async () => {
|
||||||
|
const options: StrategySearchOptions = {
|
||||||
|
query: 'test query',
|
||||||
|
searchType: 'observations',
|
||||||
|
project: 'my-project'
|
||||||
|
};
|
||||||
|
|
||||||
|
await strategy.search(options);
|
||||||
|
|
||||||
|
expect(mockChromaSync.queryChroma).toHaveBeenCalledWith(
|
||||||
|
'test query',
|
||||||
|
100,
|
||||||
|
{ $and: [{ doc_type: 'observation' }, { project: 'my-project' }] }
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not include project filter when project is not specified', async () => {
|
||||||
|
const options: StrategySearchOptions = {
|
||||||
|
query: 'test query',
|
||||||
|
searchType: 'observations'
|
||||||
|
};
|
||||||
|
|
||||||
|
await strategy.search(options);
|
||||||
|
|
||||||
|
expect(mockChromaSync.queryChroma).toHaveBeenCalledWith(
|
||||||
|
'test query',
|
||||||
|
100,
|
||||||
|
{ doc_type: 'observation' }
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it('should return empty result when no query provided', async () => {
|
it('should return empty result when no query provided', async () => {
|
||||||
const options: StrategySearchOptions = {
|
const options: StrategySearchOptions = {
|
||||||
query: undefined
|
query: undefined
|
||||||
|
|||||||
Reference in New Issue
Block a user