fix: add content-hash dedup to batch observation store methods (#1302)
storeObservations() and storeObservationsAndMarkComplete() were missing the content-hash deduplication that storeObservation() (singular) already had via computeObservationContentHash() and findDuplicateObservation(). This caused the Gemini provider (and potentially others that return multiple observations per response) to insert 2-10x duplicate rows per tool use, since the batch methods inserted unconditionally without checking content_hash. The fix adds the same dedup pattern from storeObservation() to both batch methods: 1. Compute content hash via computeObservationContentHash() 2. Check for existing observation within 30s window via findDuplicateObservation() 3. Skip insert and reuse existing ID if duplicate found 4. Include content_hash column in INSERT statement Fixes #1158 (duplicate observations with Gemini provider) Co-authored-by: Enzo Ricciulli <e.ricciulli@systhema.ai>
This commit is contained in:
@@ -1659,15 +1659,23 @@ export class SessionStore {
|
||||
const storeTx = this.db.transaction(() => {
|
||||
const observationIds: number[] = [];
|
||||
|
||||
// 1. Store all observations
|
||||
// 1. Store all observations (with content-hash deduplication)
|
||||
const obsStmt = this.db.prepare(`
|
||||
INSERT INTO observations
|
||||
(memory_session_id, project, type, title, subtitle, facts, narrative, concepts,
|
||||
files_read, files_modified, prompt_number, discovery_tokens, created_at, created_at_epoch)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
files_read, files_modified, prompt_number, discovery_tokens, content_hash, created_at, created_at_epoch)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
|
||||
for (const observation of observations) {
|
||||
// Content-hash deduplication (same logic as storeObservation singular)
|
||||
const contentHash = computeObservationContentHash(memorySessionId, observation.title, observation.narrative);
|
||||
const existing = findDuplicateObservation(this.db, contentHash, timestampEpoch);
|
||||
if (existing) {
|
||||
observationIds.push(existing.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
const result = obsStmt.run(
|
||||
memorySessionId,
|
||||
project,
|
||||
@@ -1681,6 +1689,7 @@ export class SessionStore {
|
||||
JSON.stringify(observation.files_modified),
|
||||
promptNumber || null,
|
||||
discoveryTokens,
|
||||
contentHash,
|
||||
timestampIso,
|
||||
timestampEpoch
|
||||
);
|
||||
@@ -1779,15 +1788,23 @@ export class SessionStore {
|
||||
const storeAndMarkTx = this.db.transaction(() => {
|
||||
const observationIds: number[] = [];
|
||||
|
||||
// 1. Store all observations
|
||||
// 1. Store all observations (with content-hash deduplication)
|
||||
const obsStmt = this.db.prepare(`
|
||||
INSERT INTO observations
|
||||
(memory_session_id, project, type, title, subtitle, facts, narrative, concepts,
|
||||
files_read, files_modified, prompt_number, discovery_tokens, created_at, created_at_epoch)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
files_read, files_modified, prompt_number, discovery_tokens, content_hash, created_at, created_at_epoch)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
|
||||
for (const observation of observations) {
|
||||
// Content-hash deduplication (same logic as storeObservation singular)
|
||||
const contentHash = computeObservationContentHash(memorySessionId, observation.title, observation.narrative);
|
||||
const existing = findDuplicateObservation(this.db, contentHash, timestampEpoch);
|
||||
if (existing) {
|
||||
observationIds.push(existing.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
const result = obsStmt.run(
|
||||
memorySessionId,
|
||||
project,
|
||||
@@ -1801,6 +1818,7 @@ export class SessionStore {
|
||||
JSON.stringify(observation.files_modified),
|
||||
promptNumber || null,
|
||||
discoveryTokens,
|
||||
contentHash,
|
||||
timestampIso,
|
||||
timestampEpoch
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user