feat: strip <system_instruction> tags before DB storage (#1398)
* feat: strip <system_instruction> tags before database storage Extends the existing tag-stripping mechanism (used for <private> and <claude-mem-context>) to also filter Conductor-injected system instructions, preventing them from being persisted in the observation database. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: also strip <system-instruction> (hyphen variant) before DB storage Conductor uses both <system_instruction> and <system-instruction> tag formats. This adds the hyphen variant to the same stripping mechanism. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+68123
-789
File diff suppressed because one or more lines are too long
@@ -1,11 +1,13 @@
|
|||||||
/**
|
/**
|
||||||
* Tag Stripping Utilities
|
* Tag Stripping Utilities
|
||||||
*
|
*
|
||||||
* Implements the dual-tag system for meta-observation control:
|
* Implements the tag system for meta-observation control:
|
||||||
* 1. <claude-mem-context> - System-level tag for auto-injected observations
|
* 1. <claude-mem-context> - System-level tag for auto-injected observations
|
||||||
* (prevents recursive storage when context injection is active)
|
* (prevents recursive storage when context injection is active)
|
||||||
* 2. <private> - User-level tag for manual privacy control
|
* 2. <private> - User-level tag for manual privacy control
|
||||||
* (allows users to mark content they don't want persisted)
|
* (allows users to mark content they don't want persisted)
|
||||||
|
* 3. <system_instruction> / <system-instruction> - Conductor-injected system instructions
|
||||||
|
* (should not be persisted to memory)
|
||||||
*
|
*
|
||||||
* EDGE PROCESSING PATTERN: Filter at hook layer before sending to worker/storage.
|
* EDGE PROCESSING PATTERN: Filter at hook layer before sending to worker/storage.
|
||||||
* This keeps the worker service simple and follows one-way data stream.
|
* This keeps the worker service simple and follows one-way data stream.
|
||||||
@@ -27,7 +29,9 @@ const MAX_TAG_COUNT = 100;
|
|||||||
function countTags(content: string): number {
|
function countTags(content: string): number {
|
||||||
const privateCount = (content.match(/<private>/g) || []).length;
|
const privateCount = (content.match(/<private>/g) || []).length;
|
||||||
const contextCount = (content.match(/<claude-mem-context>/g) || []).length;
|
const contextCount = (content.match(/<claude-mem-context>/g) || []).length;
|
||||||
return privateCount + contextCount;
|
const systemInstructionCount = (content.match(/<system_instruction>/g) || []).length;
|
||||||
|
const systemInstructionHyphenCount = (content.match(/<system-instruction>/g) || []).length;
|
||||||
|
return privateCount + contextCount + systemInstructionCount + systemInstructionHyphenCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -49,6 +53,8 @@ function stripTagsInternal(content: string): string {
|
|||||||
return content
|
return content
|
||||||
.replace(/<claude-mem-context>[\s\S]*?<\/claude-mem-context>/g, '')
|
.replace(/<claude-mem-context>[\s\S]*?<\/claude-mem-context>/g, '')
|
||||||
.replace(/<private>[\s\S]*?<\/private>/g, '')
|
.replace(/<private>[\s\S]*?<\/private>/g, '')
|
||||||
|
.replace(/<system_instruction>[\s\S]*?<\/system_instruction>/g, '')
|
||||||
|
.replace(/<system-instruction>[\s\S]*?<\/system-instruction>/g, '')
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* Tag Stripping Utility Tests
|
* Tag Stripping Utility Tests
|
||||||
*
|
*
|
||||||
* Tests the dual-tag privacy system for <private> and <claude-mem-context> tags.
|
* Tests the tag privacy system for <private>, <claude-mem-context>, and <system_instruction> tags.
|
||||||
* These tags enable users and the system to exclude content from memory storage.
|
* These tags enable users and the system to exclude content from memory storage.
|
||||||
*
|
*
|
||||||
* Sources:
|
* Sources:
|
||||||
@@ -257,6 +257,74 @@ finish`;
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('system_instruction tag stripping', () => {
|
||||||
|
describe('basic system_instruction removal', () => {
|
||||||
|
it('should strip single <system_instruction> tag from prompt', () => {
|
||||||
|
const input = 'user content <system_instruction>injected instructions</system_instruction> more content';
|
||||||
|
const result = stripMemoryTagsFromPrompt(input);
|
||||||
|
expect(result).toBe('user content more content');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should strip <system_instruction> mixed with <private> tags', () => {
|
||||||
|
const input = '<system_instruction>instructions</system_instruction> public <private>secret</private> end';
|
||||||
|
const result = stripMemoryTagsFromPrompt(input);
|
||||||
|
expect(result).toBe('public end');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return empty string for entirely <system_instruction> content', () => {
|
||||||
|
const input = '<system_instruction>entire prompt is system instructions</system_instruction>';
|
||||||
|
const result = stripMemoryTagsFromPrompt(input);
|
||||||
|
expect(result).toBe('');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should strip <system_instruction> tags from JSON content', () => {
|
||||||
|
const jsonContent = JSON.stringify({
|
||||||
|
data: '<system_instruction>injected</system_instruction> real data'
|
||||||
|
});
|
||||||
|
const result = stripMemoryTagsFromJson(jsonContent);
|
||||||
|
const parsed = JSON.parse(result);
|
||||||
|
expect(parsed.data).toBe(' real data');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should strip multiline content within <system_instruction> tags', () => {
|
||||||
|
const input = `before
|
||||||
|
<system_instruction>
|
||||||
|
line one
|
||||||
|
line two
|
||||||
|
line three
|
||||||
|
</system_instruction>
|
||||||
|
after`;
|
||||||
|
const result = stripMemoryTagsFromPrompt(input);
|
||||||
|
expect(result).toBe('before\n\nafter');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('system-instruction (hyphen variant) tag stripping', () => {
|
||||||
|
it('should strip single <system-instruction> tag from prompt', () => {
|
||||||
|
const input = 'user content <system-instruction>injected instructions</system-instruction> more content';
|
||||||
|
const result = stripMemoryTagsFromPrompt(input);
|
||||||
|
expect(result).toBe('user content more content');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should strip both underscore and hyphen variants in same prompt', () => {
|
||||||
|
const input = '<system_instruction>underscore</system_instruction> middle <system-instruction>hyphen</system-instruction> end';
|
||||||
|
const result = stripMemoryTagsFromPrompt(input);
|
||||||
|
expect(result).toBe('middle end');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should strip multiline <system-instruction> content', () => {
|
||||||
|
const input = `before
|
||||||
|
<system-instruction>
|
||||||
|
line one
|
||||||
|
line two
|
||||||
|
</system-instruction>
|
||||||
|
after`;
|
||||||
|
const result = stripMemoryTagsFromPrompt(input);
|
||||||
|
expect(result).toBe('before\n\nafter');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('privacy enforcement integration', () => {
|
describe('privacy enforcement integration', () => {
|
||||||
it('should allow empty result to trigger privacy skip', () => {
|
it('should allow empty result to trigger privacy skip', () => {
|
||||||
// Simulates what SessionRoutes does with private-only prompts
|
// Simulates what SessionRoutes does with private-only prompts
|
||||||
|
|||||||
Reference in New Issue
Block a user