diff --git a/plugin/modes/code.json b/plugin/modes/code.json
index 9ec033c8..47f76886 100644
--- a/plugin/modes/code.json
+++ b/plugin/modes/code.json
@@ -87,8 +87,8 @@
"system_identity": "You are a Claude-Mem, a specialized observer tool for creating searchable memory FOR FUTURE SESSIONS.\n\nCRITICAL: Record what was LEARNED/BUILT/FIXED/DEPLOYED/CONFIGURED, not what you (the observer) are doing.\n\nYou do not have access to tools. All information you need is provided in messages. Create observations from what you observe - no investigation needed.",
"spatial_awareness": "SPATIAL AWARENESS: Tool executions include the working directory (tool_cwd) to help you understand:\n- Which repository/project is being worked on\n- Where files are located relative to the project root\n- How to match requested paths to actual execution paths",
"observer_role": "Your job is to monitor a different Claude Code session happening RIGHT NOW, with the goal of creating observations and progress summaries as the work is being done LIVE by the user. You are NOT the one doing the work - you are ONLY observing and recording what is being built, fixed, deployed, or configured in the other session.",
- "recording_focus": "WHAT TO RECORD\n--------------\nFocus on deliverables and capabilities:\n- What the system NOW DOES differently (new capabilities)\n- What shipped to users/production (features, fixes, configs, docs)\n- Changes in technical domains (auth, data, UI, infra, DevOps, docs)\n\nUse verbs like: implemented, fixed, deployed, configured, migrated, optimized, added, refactored\n\n✅ GOOD EXAMPLES (describes what was built):\n- \"Authentication now supports OAuth2 with PKCE flow\"\n- \"Deployment pipeline runs canary releases with auto-rollback\"\n- \"Database indexes optimized for common query patterns\"\n\n❌ BAD EXAMPLES (describes observation process - DO NOT DO THIS):\n- \"Analyzed authentication implementation and stored findings\"\n- \"Tracked deployment steps and logged outcomes\"\n- \"Monitored database performance and recorded metrics\"",
- "skip_guidance": "WHEN TO SKIP\n------------\nSkip routine operations:\n- Empty status checks\n- Package installations with no errors\n- Simple file listings\n- Repetitive operations you've already documented\n- If file related research comes back as empty or not found\n- **No output necessary if skipping.**",
+ "recording_focus": "WHAT TO RECORD\n--------------\nFocus on durable technical signal:\n- What the system NOW DOES differently (new capabilities)\n- What shipped to users/production (features, fixes, configs, docs)\n- Changes in technical domains (auth, data, UI, infra, DevOps, docs)\n- Concrete debugging or investigative findings from logs, traces, queue state, database rows, and code-path inspection\n\nUse verbs like: implemented, fixed, deployed, configured, migrated, optimized, added, refactored, discovered, confirmed, traced\n\n✅ GOOD EXAMPLES (describes what was built or learned):\n- \"Authentication now supports OAuth2 with PKCE flow\"\n- \"Deployment pipeline runs canary releases with auto-rollback\"\n- \"Database indexes optimized for common query patterns\"\n- \"Observation queue for claude-mem session timed out waiting for an agent pool slot\"\n- \"Fallback processing abandoned pending messages after Gemini and OpenRouter returned 404\"\n\n❌ BAD EXAMPLES (describes observation process - DO NOT DO THIS):\n- \"Analyzed authentication implementation and stored findings\"\n- \"Tracked deployment steps and logged outcomes\"\n- \"Monitored database performance and recorded metrics\"",
+ "skip_guidance": "WHEN TO SKIP\n------------\nSkip routine operations:\n- Empty status checks\n- Package installations with no errors\n- Simple file listings with no follow-on finding\n- Repetitive operations you've already documented\n- File related research that comes back empty or not found\n\nIf skipping, return an empty response only. Do not explain the skip in prose.",
"type_guidance": "**type**: MUST be EXACTLY one of these 6 options (no other values allowed):\n - bugfix: something was broken, now fixed\n - feature: new capability or functionality added\n - refactor: code restructured, behavior unchanged\n - change: generic modification (docs, config, misc)\n - discovery: learning about existing system\n - decision: architectural/design choice with rationale",
"concept_guidance": "**concepts**: 2-5 knowledge-type categories. MUST use ONLY these exact keywords:\n - how-it-works: understanding mechanisms\n - why-it-exists: purpose or rationale\n - what-changed: modifications made\n - problem-solution: issues and their fixes\n - gotcha: traps or edge cases\n - pattern: reusable approach\n - trade-off: pros/cons of a decision\n\n IMPORTANT: Do NOT include the observation type (change/discovery/decision) as a concept.\n Types and concepts are separate dimensions.",
"field_guidance": "**facts**: Concise, self-contained statements\nEach fact is ONE piece of information\n No pronouns - each fact must stand alone\n Include specific details: filenames, functions, values\n\n**files**: All files touched (full paths from project root)",
@@ -122,4 +122,4 @@
"summary_format_instruction": "Respond in this XML format:",
"summary_footer": "IMPORTANT! DO NOT do any work right now other than generating this next PROGRESS SUMMARY - and remember that you are a memory agent designed to summarize a DIFFERENT claude code session, not this one.\n\nNever reference yourself or your own actions. Do not output anything other than the summary content formatted in the XML structure above. All other output is ignored by the system, and the system has been designed to be smart about token usage. Please spend your tokens wisely on useful summary content.\n\nThank you, this summary will be very useful for keeping track of our progress!"
}
-}
\ No newline at end of file
+}
diff --git a/src/sdk/prompts.ts b/src/sdk/prompts.ts
index 774b86c7..023d528e 100644
--- a/src/sdk/prompts.ts
+++ b/src/sdk/prompts.ts
@@ -116,7 +116,11 @@ export function buildObservationPrompt(obs: Observation): string {
${new Date(obs.created_at_epoch).toISOString()}${obs.cwd ? `\n ${obs.cwd}` : ''}
${JSON.stringify(toolInput, null, 2)}
${JSON.stringify(toolOutput, null, 2)}
-`;
+
+
+Return either one or more ... blocks, or an empty response if this tool use should be skipped.
+Concrete debugging findings from logs, queue state, database rows, session routing, or code-path inspection count as durable discoveries and should be recorded.
+Never reply with prose such as "Skipping", "No substantive tool executions", or any explanation outside XML. Non-XML text is discarded.`;
}
/**
@@ -235,4 +239,4 @@ ${mode.prompts.format_examples}
${mode.prompts.footer}
${mode.prompts.header_memory_continued}`;
-}
\ No newline at end of file
+}
diff --git a/src/services/worker/agents/ResponseProcessor.ts b/src/services/worker/agents/ResponseProcessor.ts
index 8573ea8f..9a059ce6 100644
--- a/src/services/worker/agents/ResponseProcessor.ts
+++ b/src/services/worker/agents/ResponseProcessor.ts
@@ -68,6 +68,19 @@ export async function processAgentResponse(
const observations = parseObservations(text, session.contentSessionId);
const summary = parseSummary(text, session.sessionDbId);
+ if (
+ text.trim() &&
+ observations.length === 0 &&
+ !summary &&
+ !/|| 200 ? `${text.slice(0, 200)}...` : text;
+ logger.warn('PARSER', `${agentName} returned non-XML response; observation content was discarded`, {
+ sessionId: session.sessionDbId,
+ preview
+ });
+ }
+
// Convert nullable fields to empty strings for storeSummary (if summary exists)
const summaryForStore = normalizeSummaryForStorage(summary);
diff --git a/tests/sdk/prompts.test.ts b/tests/sdk/prompts.test.ts
new file mode 100644
index 00000000..b998f1fd
--- /dev/null
+++ b/tests/sdk/prompts.test.ts
@@ -0,0 +1,20 @@
+import { describe, expect, it } from 'bun:test';
+
+import { buildObservationPrompt } from '../../src/sdk/prompts.js';
+
+describe('buildObservationPrompt', () => {
+ it('instructs the observer to avoid prose skip responses', () => {
+ const prompt = buildObservationPrompt({
+ id: 1,
+ tool_name: 'exec_command',
+ tool_input: JSON.stringify({ cmd: 'pwd' }),
+ tool_output: JSON.stringify({ output: '/repo' }),
+ created_at_epoch: Date.now(),
+ cwd: '/repo',
+ });
+
+ expect(prompt).toContain('Return either one or more ... blocks, or an empty response');
+ expect(prompt).toContain('Concrete debugging findings from logs, queue state, database rows, session routing, or code-path inspection');
+ expect(prompt).toContain('Never reply with prose such as "Skipping", "No substantive tool executions"');
+ });
+});
diff --git a/tests/worker/agents/response-processor.test.ts b/tests/worker/agents/response-processor.test.ts
index a14f2ffc..1638bfdb 100644
--- a/tests/worker/agents/response-processor.test.ts
+++ b/tests/worker/agents/response-processor.test.ts
@@ -212,6 +212,36 @@ describe('ResponseProcessor', () => {
});
});
+ describe('non-XML observer responses', () => {
+ it('warns when the observer returns prose that will be discarded', async () => {
+ const session = createMockSession();
+ const responseText = 'Skipping — repeated log scan with no new findings.';
+
+ await processAgentResponse(
+ responseText,
+ session,
+ mockDbManager,
+ mockSessionManager,
+ mockWorker,
+ 100,
+ null,
+ 'TestAgent'
+ );
+
+ expect(logger.warn).toHaveBeenCalledWith(
+ 'PARSER',
+ 'TestAgent returned non-XML response; observation content was discarded',
+ expect.objectContaining({
+ sessionId: 1,
+ preview: responseText
+ })
+ );
+ const [, , observations, summary] = mockStoreObservations.mock.calls[0];
+ expect(observations).toHaveLength(0);
+ expect(summary).toBeNull();
+ });
+ });
+
describe('parsing summary from XML response', () => {
it('should parse summary from response', async () => {
const session = createMockSession();