fix: add content-hash dedup to batch observation store methods (#1302)

storeObservations() and storeObservationsAndMarkComplete() were missing
the content-hash deduplication that storeObservation() (singular) already
had via computeObservationContentHash() and findDuplicateObservation().

This caused the Gemini provider (and potentially others that return
multiple observations per response) to insert 2-10x duplicate rows per
tool use, since the batch methods inserted unconditionally without
checking content_hash.

The fix adds the same dedup pattern from storeObservation() to both
batch methods:
1. Compute content hash via computeObservationContentHash()
2. Check for existing observation within 30s window via findDuplicateObservation()
3. Skip insert and reuse existing ID if duplicate found
4. Include content_hash column in INSERT statement

Fixes #1158 (duplicate observations with Gemini provider)

Co-authored-by: Enzo Ricciulli <e.ricciulli@systhema.ai>
This commit is contained in:
enzoricciulli
2026-03-13 04:01:53 +01:00
committed by GitHub
parent ad902bedd9
commit e7ba9acaa7
+24 -6
View File
@@ -1659,15 +1659,23 @@ export class SessionStore {
const storeTx = this.db.transaction(() => { const storeTx = this.db.transaction(() => {
const observationIds: number[] = []; const observationIds: number[] = [];
// 1. Store all observations // 1. Store all observations (with content-hash deduplication)
const obsStmt = this.db.prepare(` const obsStmt = this.db.prepare(`
INSERT INTO observations INSERT INTO observations
(memory_session_id, project, type, title, subtitle, facts, narrative, concepts, (memory_session_id, project, type, title, subtitle, facts, narrative, concepts,
files_read, files_modified, prompt_number, discovery_tokens, created_at, created_at_epoch) files_read, files_modified, prompt_number, discovery_tokens, content_hash, created_at, created_at_epoch)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`); `);
for (const observation of observations) { for (const observation of observations) {
// Content-hash deduplication (same logic as storeObservation singular)
const contentHash = computeObservationContentHash(memorySessionId, observation.title, observation.narrative);
const existing = findDuplicateObservation(this.db, contentHash, timestampEpoch);
if (existing) {
observationIds.push(existing.id);
continue;
}
const result = obsStmt.run( const result = obsStmt.run(
memorySessionId, memorySessionId,
project, project,
@@ -1681,6 +1689,7 @@ export class SessionStore {
JSON.stringify(observation.files_modified), JSON.stringify(observation.files_modified),
promptNumber || null, promptNumber || null,
discoveryTokens, discoveryTokens,
contentHash,
timestampIso, timestampIso,
timestampEpoch timestampEpoch
); );
@@ -1779,15 +1788,23 @@ export class SessionStore {
const storeAndMarkTx = this.db.transaction(() => { const storeAndMarkTx = this.db.transaction(() => {
const observationIds: number[] = []; const observationIds: number[] = [];
// 1. Store all observations // 1. Store all observations (with content-hash deduplication)
const obsStmt = this.db.prepare(` const obsStmt = this.db.prepare(`
INSERT INTO observations INSERT INTO observations
(memory_session_id, project, type, title, subtitle, facts, narrative, concepts, (memory_session_id, project, type, title, subtitle, facts, narrative, concepts,
files_read, files_modified, prompt_number, discovery_tokens, created_at, created_at_epoch) files_read, files_modified, prompt_number, discovery_tokens, content_hash, created_at, created_at_epoch)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`); `);
for (const observation of observations) { for (const observation of observations) {
// Content-hash deduplication (same logic as storeObservation singular)
const contentHash = computeObservationContentHash(memorySessionId, observation.title, observation.narrative);
const existing = findDuplicateObservation(this.db, contentHash, timestampEpoch);
if (existing) {
observationIds.push(existing.id);
continue;
}
const result = obsStmt.run( const result = obsStmt.run(
memorySessionId, memorySessionId,
project, project,
@@ -1801,6 +1818,7 @@ export class SessionStore {
JSON.stringify(observation.files_modified), JSON.stringify(observation.files_modified),
promptNumber || null, promptNumber || null,
discoveryTokens, discoveryTokens,
contentHash,
timestampIso, timestampIso,
timestampEpoch timestampEpoch
); );