fix: strip <system-reminder> tags from persisted memory and DRY up regex
System reminders (CLAUDE.md contents, deferred tool lists) were being stored in memory observations. Add system-reminder to the tag stripping pipeline alongside <private> and <system_instruction>, and extract the duplicated regex into a shared SYSTEM_REMINDER_REGEX constant. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -8,6 +8,7 @@ import path from 'path';
|
||||
import { existsSync, readFileSync } from 'fs';
|
||||
import { SessionStore } from '../sqlite/SessionStore.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { SYSTEM_REMINDER_REGEX } from '../../utils/tag-stripping.js';
|
||||
import { CLAUDE_CONFIG_DIR } from '../../shared/paths.js';
|
||||
import type {
|
||||
ContextConfig,
|
||||
@@ -164,7 +165,7 @@ export function extractPriorMessages(transcriptPath: string): PriorMessages {
|
||||
text += block.text;
|
||||
}
|
||||
}
|
||||
text = text.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g, '').trim();
|
||||
text = text.replace(SYSTEM_REMINDER_REGEX, '').trim();
|
||||
if (text) {
|
||||
lastAssistantMessage = text;
|
||||
break;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { readFileSync, existsSync } from 'fs';
|
||||
import { logger } from '../utils/logger.js';
|
||||
import { SYSTEM_REMINDER_REGEX } from '../utils/tag-stripping.js';
|
||||
|
||||
/**
|
||||
* Extract last message of specified role from transcript JSONL file
|
||||
@@ -48,7 +49,7 @@ export function extractLastMessage(
|
||||
}
|
||||
|
||||
if (stripSystemReminders) {
|
||||
text = text.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g, '');
|
||||
text = text.replace(SYSTEM_REMINDER_REGEX, '');
|
||||
text = text.replace(/\n{3,}/g, '\n\n').trim();
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
* (allows users to mark content they don't want persisted)
|
||||
* 3. <system_instruction> / <system-instruction> - Conductor-injected system instructions
|
||||
* (should not be persisted to memory)
|
||||
* 4. <system-reminder> - Claude Code-injected system reminders
|
||||
* (CLAUDE.md contents, deferred tool lists, etc. — should not be persisted)
|
||||
*
|
||||
* EDGE PROCESSING PATTERN: Filter at hook layer before sending to worker/storage.
|
||||
* This keeps the worker service simple and follows one-way data stream.
|
||||
@@ -15,6 +17,12 @@
|
||||
|
||||
import { logger } from './logger.js';
|
||||
|
||||
/**
|
||||
* Regex to match <system-reminder> tags and their content.
|
||||
* Exported for use by transcript parsers that strip system-reminder at read-time.
|
||||
*/
|
||||
export const SYSTEM_REMINDER_REGEX = /<system-reminder>[\s\S]*?<\/system-reminder>/g;
|
||||
|
||||
/**
|
||||
* Maximum number of tags allowed in a single content block
|
||||
* This protects against ReDoS (Regular Expression Denial of Service) attacks
|
||||
@@ -31,7 +39,8 @@ function countTags(content: string): number {
|
||||
const contextCount = (content.match(/<claude-mem-context>/g) || []).length;
|
||||
const systemInstructionCount = (content.match(/<system_instruction>/g) || []).length;
|
||||
const systemInstructionHyphenCount = (content.match(/<system-instruction>/g) || []).length;
|
||||
return privateCount + contextCount + systemInstructionCount + systemInstructionHyphenCount;
|
||||
const systemReminderCount = (content.match(/<system-reminder>/g) || []).length;
|
||||
return privateCount + contextCount + systemInstructionCount + systemInstructionHyphenCount + systemReminderCount;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -55,6 +64,7 @@ function stripTagsInternal(content: string): string {
|
||||
.replace(/<private>[\s\S]*?<\/private>/g, '')
|
||||
.replace(/<system_instruction>[\s\S]*?<\/system_instruction>/g, '')
|
||||
.replace(/<system-instruction>[\s\S]*?<\/system-instruction>/g, '')
|
||||
.replace(SYSTEM_REMINDER_REGEX, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
import { readFileSync } from 'fs';
|
||||
import { logger } from './logger.js';
|
||||
import { SYSTEM_REMINDER_REGEX } from './tag-stripping.js';
|
||||
import type {
|
||||
TranscriptEntry,
|
||||
UserTranscriptEntry,
|
||||
@@ -163,7 +164,7 @@ export class TranscriptParser {
|
||||
|
||||
if (filterSystemReminders) {
|
||||
// Filter out system-reminder tags and their content
|
||||
text = text.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/g, '');
|
||||
text = text.replace(SYSTEM_REMINDER_REGEX, '');
|
||||
// Clean up excessive whitespace
|
||||
text = text.replace(/\n{3,}/g, '\n\n').trim();
|
||||
}
|
||||
|
||||
@@ -325,6 +325,56 @@ after`;
|
||||
});
|
||||
});
|
||||
|
||||
describe('system-reminder tag stripping', () => {
|
||||
it('should strip single <system-reminder> tag from prompt', () => {
|
||||
const input = 'user content <system-reminder>CLAUDE.md contents here</system-reminder> more content';
|
||||
const result = stripMemoryTagsFromPrompt(input);
|
||||
expect(result).toBe('user content more content');
|
||||
});
|
||||
|
||||
it('should strip <system-reminder> mixed with other tag types', () => {
|
||||
const input = '<system-reminder>reminder</system-reminder> public <private>secret</private> <claude-mem-context>ctx</claude-mem-context> end';
|
||||
const result = stripMemoryTagsFromPrompt(input);
|
||||
expect(result).toBe('public end');
|
||||
});
|
||||
|
||||
it('should return empty string for entirely <system-reminder> content', () => {
|
||||
const input = '<system-reminder>entire content is a system reminder</system-reminder>';
|
||||
const result = stripMemoryTagsFromPrompt(input);
|
||||
expect(result).toBe('');
|
||||
});
|
||||
|
||||
it('should strip <system-reminder> tags from JSON content', () => {
|
||||
const jsonContent = JSON.stringify({
|
||||
data: '<system-reminder>injected reminder</system-reminder> real data'
|
||||
});
|
||||
const result = stripMemoryTagsFromJson(jsonContent);
|
||||
const parsed = JSON.parse(result);
|
||||
expect(parsed.data).toBe(' real data');
|
||||
});
|
||||
|
||||
it('should strip multiline content within <system-reminder> tags', () => {
|
||||
const input = `before
|
||||
<system-reminder>
|
||||
Contents of /path/to/CLAUDE.md:
|
||||
|
||||
<claude-mem-context>
|
||||
# Recent Activity
|
||||
- Item 1
|
||||
</claude-mem-context>
|
||||
</system-reminder>
|
||||
after`;
|
||||
const result = stripMemoryTagsFromPrompt(input);
|
||||
expect(result).toBe('before\n\nafter');
|
||||
});
|
||||
|
||||
it('should strip realistic tool result with nested CLAUDE.md content', () => {
|
||||
const input = `Here is the file content.\n\n<system-reminder>\nContents of /project/src/CLAUDE.md:\n\n<claude-mem-context>\n# Recent Activity\n\n### Dec 14, 2025\n| ID | Time | Title |\n|-----|------|-------|\n| #123 | 11:30 PM | Some observation |\n</claude-mem-context>\n</system-reminder>`;
|
||||
const result = stripMemoryTagsFromPrompt(input);
|
||||
expect(result).toBe('Here is the file content.');
|
||||
});
|
||||
});
|
||||
|
||||
describe('privacy enforcement integration', () => {
|
||||
it('should allow empty result to trigger privacy skip', () => {
|
||||
// Simulates what SessionRoutes does with private-only prompts
|
||||
|
||||
Reference in New Issue
Block a user