From e7ba9acaa763b9e4ce67d2b1c1e322f68b51dc7d Mon Sep 17 00:00:00 2001 From: enzoricciulli Date: Fri, 13 Mar 2026 04:01:53 +0100 Subject: [PATCH] fix: add content-hash dedup to batch observation store methods (#1302) storeObservations() and storeObservationsAndMarkComplete() were missing the content-hash deduplication that storeObservation() (singular) already had via computeObservationContentHash() and findDuplicateObservation(). This caused the Gemini provider (and potentially others that return multiple observations per response) to insert 2-10x duplicate rows per tool use, since the batch methods inserted unconditionally without checking content_hash. The fix adds the same dedup pattern from storeObservation() to both batch methods: 1. Compute content hash via computeObservationContentHash() 2. Check for existing observation within 30s window via findDuplicateObservation() 3. Skip insert and reuse existing ID if duplicate found 4. Include content_hash column in INSERT statement Fixes #1158 (duplicate observations with Gemini provider) Co-authored-by: Enzo Ricciulli --- src/services/sqlite/SessionStore.ts | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/services/sqlite/SessionStore.ts b/src/services/sqlite/SessionStore.ts index 29c4beed..cb22fdca 100644 --- a/src/services/sqlite/SessionStore.ts +++ b/src/services/sqlite/SessionStore.ts @@ -1659,15 +1659,23 @@ export class SessionStore { const storeTx = this.db.transaction(() => { const observationIds: number[] = []; - // 1. Store all observations + // 1. Store all observations (with content-hash deduplication) const obsStmt = this.db.prepare(` INSERT INTO observations (memory_session_id, project, type, title, subtitle, facts, narrative, concepts, - files_read, files_modified, prompt_number, discovery_tokens, created_at, created_at_epoch) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + files_read, files_modified, prompt_number, discovery_tokens, content_hash, created_at, created_at_epoch) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `); for (const observation of observations) { + // Content-hash deduplication (same logic as storeObservation singular) + const contentHash = computeObservationContentHash(memorySessionId, observation.title, observation.narrative); + const existing = findDuplicateObservation(this.db, contentHash, timestampEpoch); + if (existing) { + observationIds.push(existing.id); + continue; + } + const result = obsStmt.run( memorySessionId, project, @@ -1681,6 +1689,7 @@ export class SessionStore { JSON.stringify(observation.files_modified), promptNumber || null, discoveryTokens, + contentHash, timestampIso, timestampEpoch ); @@ -1779,15 +1788,23 @@ export class SessionStore { const storeAndMarkTx = this.db.transaction(() => { const observationIds: number[] = []; - // 1. Store all observations + // 1. Store all observations (with content-hash deduplication) const obsStmt = this.db.prepare(` INSERT INTO observations (memory_session_id, project, type, title, subtitle, facts, narrative, concepts, - files_read, files_modified, prompt_number, discovery_tokens, created_at, created_at_epoch) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + files_read, files_modified, prompt_number, discovery_tokens, content_hash, created_at, created_at_epoch) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `); for (const observation of observations) { + // Content-hash deduplication (same logic as storeObservation singular) + const contentHash = computeObservationContentHash(memorySessionId, observation.title, observation.narrative); + const existing = findDuplicateObservation(this.db, contentHash, timestampEpoch); + if (existing) { + observationIds.push(existing.id); + continue; + } + const result = obsStmt.run( memorySessionId, project, @@ -1801,6 +1818,7 @@ export class SessionStore { JSON.stringify(observation.files_modified), promptNumber || null, discoveryTokens, + contentHash, timestampIso, timestampEpoch );