fix: add content-hash dedup to batch observation store methods (#1302)
storeObservations() and storeObservationsAndMarkComplete() were missing the content-hash deduplication that storeObservation() (singular) already had via computeObservationContentHash() and findDuplicateObservation(). This caused the Gemini provider (and potentially others that return multiple observations per response) to insert 2-10x duplicate rows per tool use, since the batch methods inserted unconditionally without checking content_hash. The fix adds the same dedup pattern from storeObservation() to both batch methods: 1. Compute content hash via computeObservationContentHash() 2. Check for existing observation within 30s window via findDuplicateObservation() 3. Skip insert and reuse existing ID if duplicate found 4. Include content_hash column in INSERT statement Fixes #1158 (duplicate observations with Gemini provider) Co-authored-by: Enzo Ricciulli <e.ricciulli@systhema.ai>
This commit is contained in:
@@ -1659,15 +1659,23 @@ export class SessionStore {
|
|||||||
const storeTx = this.db.transaction(() => {
|
const storeTx = this.db.transaction(() => {
|
||||||
const observationIds: number[] = [];
|
const observationIds: number[] = [];
|
||||||
|
|
||||||
// 1. Store all observations
|
// 1. Store all observations (with content-hash deduplication)
|
||||||
const obsStmt = this.db.prepare(`
|
const obsStmt = this.db.prepare(`
|
||||||
INSERT INTO observations
|
INSERT INTO observations
|
||||||
(memory_session_id, project, type, title, subtitle, facts, narrative, concepts,
|
(memory_session_id, project, type, title, subtitle, facts, narrative, concepts,
|
||||||
files_read, files_modified, prompt_number, discovery_tokens, created_at, created_at_epoch)
|
files_read, files_modified, prompt_number, discovery_tokens, content_hash, created_at, created_at_epoch)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
`);
|
`);
|
||||||
|
|
||||||
for (const observation of observations) {
|
for (const observation of observations) {
|
||||||
|
// Content-hash deduplication (same logic as storeObservation singular)
|
||||||
|
const contentHash = computeObservationContentHash(memorySessionId, observation.title, observation.narrative);
|
||||||
|
const existing = findDuplicateObservation(this.db, contentHash, timestampEpoch);
|
||||||
|
if (existing) {
|
||||||
|
observationIds.push(existing.id);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const result = obsStmt.run(
|
const result = obsStmt.run(
|
||||||
memorySessionId,
|
memorySessionId,
|
||||||
project,
|
project,
|
||||||
@@ -1681,6 +1689,7 @@ export class SessionStore {
|
|||||||
JSON.stringify(observation.files_modified),
|
JSON.stringify(observation.files_modified),
|
||||||
promptNumber || null,
|
promptNumber || null,
|
||||||
discoveryTokens,
|
discoveryTokens,
|
||||||
|
contentHash,
|
||||||
timestampIso,
|
timestampIso,
|
||||||
timestampEpoch
|
timestampEpoch
|
||||||
);
|
);
|
||||||
@@ -1779,15 +1788,23 @@ export class SessionStore {
|
|||||||
const storeAndMarkTx = this.db.transaction(() => {
|
const storeAndMarkTx = this.db.transaction(() => {
|
||||||
const observationIds: number[] = [];
|
const observationIds: number[] = [];
|
||||||
|
|
||||||
// 1. Store all observations
|
// 1. Store all observations (with content-hash deduplication)
|
||||||
const obsStmt = this.db.prepare(`
|
const obsStmt = this.db.prepare(`
|
||||||
INSERT INTO observations
|
INSERT INTO observations
|
||||||
(memory_session_id, project, type, title, subtitle, facts, narrative, concepts,
|
(memory_session_id, project, type, title, subtitle, facts, narrative, concepts,
|
||||||
files_read, files_modified, prompt_number, discovery_tokens, created_at, created_at_epoch)
|
files_read, files_modified, prompt_number, discovery_tokens, content_hash, created_at, created_at_epoch)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
`);
|
`);
|
||||||
|
|
||||||
for (const observation of observations) {
|
for (const observation of observations) {
|
||||||
|
// Content-hash deduplication (same logic as storeObservation singular)
|
||||||
|
const contentHash = computeObservationContentHash(memorySessionId, observation.title, observation.narrative);
|
||||||
|
const existing = findDuplicateObservation(this.db, contentHash, timestampEpoch);
|
||||||
|
if (existing) {
|
||||||
|
observationIds.push(existing.id);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const result = obsStmt.run(
|
const result = obsStmt.run(
|
||||||
memorySessionId,
|
memorySessionId,
|
||||||
project,
|
project,
|
||||||
@@ -1801,6 +1818,7 @@ export class SessionStore {
|
|||||||
JSON.stringify(observation.files_modified),
|
JSON.stringify(observation.files_modified),
|
||||||
promptNumber || null,
|
promptNumber || null,
|
||||||
discoveryTokens,
|
discoveryTokens,
|
||||||
|
contentHash,
|
||||||
timestampIso,
|
timestampIso,
|
||||||
timestampEpoch
|
timestampEpoch
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user