fix: use null-byte delimiter in observation content hash to prevent collisions
Fields concatenated without separators allowed different tuples to produce identical hashes (e.g. session="ab", title="cd" vs session="abc", title="d"). This could cause legitimate observations to be silently deduplicated. Join with \x00 so field boundaries are unambiguous. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -22,7 +22,7 @@ export function computeObservationContentHash(
|
|||||||
narrative: string | null
|
narrative: string | null
|
||||||
): string {
|
): string {
|
||||||
return createHash('sha256')
|
return createHash('sha256')
|
||||||
.update((memorySessionId || '') + (title || '') + (narrative || ''))
|
.update([memorySessionId || '', title || '', narrative || ''].join('\x00'))
|
||||||
.digest('hex')
|
.digest('hex')
|
||||||
.slice(0, 16);
|
.slice(0, 16);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -69,6 +69,16 @@ describe('TRIAGE-03: Data Integrity', () => {
|
|||||||
expect(hash.length).toBe(16);
|
expect(hash.length).toBe(16);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('computeObservationContentHash avoids collision from field boundary ambiguity', () => {
|
||||||
|
// These tuples would collide without a delimiter between fields
|
||||||
|
const hash1 = computeObservationContentHash('session-abc', 'debug log', '');
|
||||||
|
const hash2 = computeObservationContentHash('session-ab', 'cdebug log', '');
|
||||||
|
const hash3 = computeObservationContentHash('session-', 'abcdebug log', '');
|
||||||
|
const hash4 = computeObservationContentHash('', 'session-abcdebug log', '');
|
||||||
|
const hashes = new Set([hash1, hash2, hash3, hash4]);
|
||||||
|
expect(hashes.size).toBe(4);
|
||||||
|
});
|
||||||
|
|
||||||
it('storeObservation deduplicates identical observations within 30s window', () => {
|
it('storeObservation deduplicates identical observations within 30s window', () => {
|
||||||
const memId = createSessionWithMemoryId(db, 'content-dedup-1', 'mem-dedup-1');
|
const memId = createSessionWithMemoryId(db, 'content-dedup-1', 'mem-dedup-1');
|
||||||
const obs = createObservationInput({ title: 'Same Title', narrative: 'Same Narrative' });
|
const obs = createObservationInput({ title: 'Same Title', narrative: 'Same Narrative' });
|
||||||
|
|||||||
Reference in New Issue
Block a user