feat: Implement Phase 2 of SDK Worker Process

- Added background agent architecture for processing tool observations and generating session summaries.
- Created SDK Prompts Module for generating prompts for the Claude Agent SDK.
- Developed XML Parser Module for parsing observation and summary XML blocks from SDK responses.
- Implemented SDK Worker Process to handle observation processing and session management.
- Updated newHook implementation to spawn the SDK worker as a detached process with path resolution for development and production.
- Created comprehensive test suite for SDK prompts, XML parsing, and HooksDatabase integration, ensuring all tests pass.
- Documented Phase 2 implementation details, architecture validation, and success criteria in PHASE2-COMPLETE.md.
This commit is contained in:
Alex Newman
2025-10-15 19:18:38 -04:00
parent d07a40616d
commit 78fd1368db
8 changed files with 1150 additions and 91 deletions
+8
View File
@@ -0,0 +1,8 @@
/**
* SDK Module Exports
*/
export { buildInitPrompt, buildObservationPrompt, buildFinalizePrompt } from './prompts.js';
export { parseObservations, parseSummary } from './parser.js';
export type { Observation, SDKSession } from './prompts.js';
export type { ParsedObservation, ParsedSummary } from './parser.js';
+132
View File
@@ -0,0 +1,132 @@
/**
* XML Parser Module
* Parses observation and summary XML blocks from SDK responses
*/
export interface ParsedObservation {
type: string;
text: string;
}
export interface ParsedSummary {
request: string;
investigated: string;
learned: string;
completed: string;
next_steps: string;
files_read: string[];
files_edited: string[];
notes: string;
}
/**
* Parse observation XML blocks from SDK response
* Returns all observations found in the response
*/
export function parseObservations(text: string): ParsedObservation[] {
const observations: ParsedObservation[] = [];
// Match <observation>...</observation> blocks (non-greedy)
const observationRegex = /<observation>\s*<type>([^<]+)<\/type>\s*<text>([^<]+)<\/text>\s*<\/observation>/g;
let match;
while ((match = observationRegex.exec(text)) !== null) {
const type = match[1].trim();
const observationText = match[2].trim();
// Validate type
const validTypes = ['decision', 'bugfix', 'feature', 'refactor', 'discovery'];
if (!validTypes.includes(type)) {
console.warn(`[SDK Parser] Invalid observation type: ${type}, skipping`);
continue;
}
observations.push({
type,
text: observationText
});
}
return observations;
}
/**
* Parse summary XML block from SDK response
* Returns null if no valid summary found
*/
export function parseSummary(text: string): ParsedSummary | null {
// Match <summary>...</summary> block (non-greedy)
const summaryRegex = /<summary>([\s\S]*?)<\/summary>/;
const summaryMatch = summaryRegex.exec(text);
if (!summaryMatch) {
return null;
}
const summaryContent = summaryMatch[1];
// Extract required fields
const request = extractField(summaryContent, 'request');
const investigated = extractField(summaryContent, 'investigated');
const learned = extractField(summaryContent, 'learned');
const completed = extractField(summaryContent, 'completed');
const next_steps = extractField(summaryContent, 'next_steps');
const notes = extractField(summaryContent, 'notes');
// Extract file arrays
const files_read = extractFileArray(summaryContent, 'files_read');
const files_edited = extractFileArray(summaryContent, 'files_edited');
// Validate all required fields are present
if (!request || !investigated || !learned || !completed || !next_steps || !notes) {
console.warn('[SDK Parser] Summary missing required fields');
return null;
}
return {
request,
investigated,
learned,
completed,
next_steps,
files_read,
files_edited,
notes
};
}
/**
* Extract a simple field value from XML content
*/
function extractField(content: string, fieldName: string): string | null {
const regex = new RegExp(`<${fieldName}>([^<]*)</${fieldName}>`);
const match = regex.exec(content);
return match ? match[1].trim() : null;
}
/**
* Extract file array from XML content
* Handles both <file> children and empty tags
*/
function extractFileArray(content: string, arrayName: string): string[] {
const files: string[] = [];
// Match the array block
const arrayRegex = new RegExp(`<${arrayName}>(.*?)</${arrayName}>`, 's');
const arrayMatch = arrayRegex.exec(content);
if (!arrayMatch) {
return files;
}
const arrayContent = arrayMatch[1];
// Extract individual <file> elements
const fileRegex = /<file>([^<]+)<\/file>/g;
let fileMatch;
while ((fileMatch = fileRegex.exec(arrayContent)) !== null) {
files.push(fileMatch[1].trim());
}
return files;
}
+177
View File
@@ -0,0 +1,177 @@
/**
* SDK Prompts Module
* Generates prompts for the Claude Agent SDK memory worker
*/
export interface Observation {
id: number;
tool_name: string;
tool_input: string;
tool_output: string;
created_at_epoch: number;
}
export interface SDKSession {
id: number;
sdk_session_id: string | null;
project: string;
user_prompt: string;
}
/**
* Build initial prompt to initialize the SDK agent
*/
export function buildInitPrompt(project: string, sessionId: string, userPrompt: string): string {
return `You are a memory assistant for the "${project}" project.
SESSION CONTEXT
---------------
Session ID: ${sessionId}
User's Goal: ${userPrompt}
Date: ${new Date().toISOString().split('T')[0]}
YOUR ROLE
---------
You will observe tool executions during this Claude Code session. Your job is to:
1. Extract meaningful insights (not just raw data)
2. Store atomic observations in SQLite
3. Focus on: key decisions, patterns discovered, problems solved, technical insights
WHAT TO CAPTURE
----------------
✓ Architecture decisions (e.g., "chose PostgreSQL over MongoDB for ACID guarantees")
✓ Bug fixes (e.g., "fixed race condition in auth middleware by adding mutex")
✓ New features (e.g., "implemented JWT refresh token flow")
✓ Refactorings (e.g., "extracted validation logic into separate service")
✓ Discoveries (e.g., "found that API rate limit is 100 req/min")
✗ NOT routine operations (reading files, listing directories)
✗ NOT work-in-progress (only completed work)
✗ NOT obvious facts (e.g., "TypeScript file has types")
HOW TO STORE OBSERVATIONS
--------------------------
When you identify something worth remembering, output your observation in this EXACT XML format:
\`\`\`xml
<observation>
<type>feature</type>
<text>Implemented JWT token refresh flow with 7-day expiry</text>
</observation>
\`\`\`
Valid types: decision, bugfix, feature, refactor, discovery
Structure requirements:
- <observation> is the root element
- <type> must be one of the 5 valid types (single word)
- <text> contains your concise observation (one sentence preferred)
- No additional fields or nesting
The SDK worker will parse all <observation> blocks from your response using regex and store them in SQLite.
You can include your reasoning before or after the observation block, or just output the observation by itself.
EXAMPLE
-------
Bad: "Read src/auth.ts file"
Good: "Implemented JWT token refresh flow with 7-day expiry"
Wait for tool observations. Acknowledge this message briefly.`;
}
/**
* Build prompt to send tool observation to SDK agent
*/
export function buildObservationPrompt(obs: Observation): string {
return `TOOL OBSERVATION
================
Tool: ${obs.tool_name}
Time: ${new Date(obs.created_at_epoch).toISOString()}
Input:
${JSON.stringify(JSON.parse(obs.tool_input), null, 2)}
Output:
${JSON.stringify(JSON.parse(obs.tool_output), null, 2)}
ANALYSIS TASK
-------------
1. Does this observation contain something worth remembering?
2. If YES: Output the observation in this EXACT XML format:
\`\`\`xml
<observation>
<type>feature</type>
<text>Your concise observation here</text>
</observation>
\`\`\`
Requirements:
- Use one of these types: decision, bugfix, feature, refactor, discovery
- Keep text concise (one sentence preferred)
- No markdown formatting inside <text>
- No additional XML fields
3. If NO: Just acknowledge and wait for next observation
Remember: Quality over quantity. Only store meaningful insights.`;
}
/**
* Build finalization prompt to generate session summary
*/
export function buildFinalizePrompt(session: SDKSession): string {
return `SESSION ENDING
==============
The Claude Code session is finishing.
FINAL TASK
----------
1. Review the observations you've stored this session
2. Generate a structured summary that answers these questions:
- What did user request?
- What did you investigate?
- What did you learn?
- What did you do?
- What's next?
- Files read
- Files edited
- Notes
3. Generate the structured summary and output it in this EXACT XML format:
\`\`\`xml
<summary>
<request>Implement JWT authentication system</request>
<investigated>Existing auth middleware, session management, token storage patterns</investigated>
<learned>Current system uses session cookies; no JWT support; race condition in middleware</learned>
<completed>Implemented JWT token + refresh flow with 7-day expiry; fixed race condition with mutex; added token validation middleware</completed>
<next_steps>Add token revocation API endpoint; write integration tests</next_steps>
<files_read>
<file>src/auth.ts</file>
<file>src/middleware/session.ts</file>
<file>src/types/user.ts</file>
</files_read>
<files_edited>
<file>src/auth.ts</file>
<file>src/middleware/auth.ts</file>
<file>src/routes/auth.ts</file>
</files_edited>
<notes>Token secret stored in .env; refresh tokens use rotation strategy</notes>
</summary>
\`\`\`
Structure requirements:
- <summary> is the root element
- All 8 child elements are REQUIRED: request, investigated, learned, completed, next_steps, files_read, files_edited, notes
- <files_read> and <files_edited> must contain <file> child elements (one per file)
- If no files were read/edited, use empty tags: <files_read></files_read>
- Text fields can be multiple sentences but avoid markdown formatting
- Use underscores in element names: next_steps, files_read, files_edited
The SDK worker will parse the <summary> block and extract all fields to store in SQLite.
Generate the summary now in the required XML format.`;
}
+217
View File
@@ -0,0 +1,217 @@
#!/usr/bin/env bun
/**
* SDK Worker Process
* Background agent that processes tool observations and generates session summaries
*/
import { query } from '@anthropic-ai/claude-agent-sdk';
import { HooksDatabase } from '../services/sqlite/HooksDatabase.js';
import { buildInitPrompt, buildObservationPrompt, buildFinalizePrompt } from './prompts.js';
import { parseObservations, parseSummary } from './parser.js';
import type { Observation, SDKSession } from './prompts.js';
const POLL_INTERVAL_MS = 1000; // 1 second
const MODEL = 'claude-sonnet-4-5';
const DISALLOWED_TOOLS = ['Glob', 'Grep', 'ListMcpResourcesTool', 'WebSearch'];
/**
* Main worker process entry point
*/
async function main() {
const sessionDbId = parseInt(process.argv[2], 10);
if (!sessionDbId) {
console.error('[SDK Worker] Missing session ID argument');
process.exit(1);
}
const worker = new SDKWorker(sessionDbId);
await worker.run();
}
/**
* SDK Worker class - handles the full lifecycle of observation processing
*/
class SDKWorker {
private sessionDbId: number;
private db: HooksDatabase;
private sdkSessionId: string | null = null;
private project: string = '';
private userPrompt: string = '';
private abortController: AbortController;
private isFinalized = false;
constructor(sessionDbId: number) {
this.sessionDbId = sessionDbId;
this.db = new HooksDatabase();
this.abortController = new AbortController();
}
/**
* Main run loop
*/
async run(): Promise<void> {
try {
// Load session info
const session = await this.loadSession();
if (!session) {
console.error('[SDK Worker] Session not found');
process.exit(1);
}
this.project = session.project;
this.userPrompt = session.user_prompt;
// Run SDK agent with streaming input
await this.runSDKAgent();
// Mark session as completed
this.db.markSessionCompleted(this.sessionDbId);
this.db.close();
} catch (error: any) {
console.error('[SDK Worker] Error:', error.message);
this.db.markSessionFailed(this.sessionDbId);
this.db.close();
process.exit(1);
}
}
/**
* Load session from database
*/
private async loadSession(): Promise<SDKSession | null> {
// Query session by ID
const db = this.db as any;
const query = db.db.query(`
SELECT id, sdk_session_id, project, user_prompt
FROM sdk_sessions
WHERE id = ?
LIMIT 1
`);
const session = query.get(this.sessionDbId);
return session as SDKSession | null;
}
/**
* Run SDK agent with streaming input mode
*/
private async runSDKAgent(): Promise<void> {
const messageGenerator = this.createMessageGenerator();
await query({
model: MODEL,
messages: messageGenerator,
disallowedTools: DISALLOWED_TOOLS,
signal: this.abortController.signal,
onSystemInitMessage: (msg) => {
// Capture SDK session ID from init message
if (msg.session_id) {
this.sdkSessionId = msg.session_id;
this.db.updateSDKSessionId(this.sessionDbId, msg.session_id);
}
},
onAgentMessage: (msg) => {
// Parse and store observations from agent response
this.handleAgentMessage(msg.content);
}
});
}
/**
* Create async message generator for SDK streaming input
*/
private async* createMessageGenerator(): AsyncIterable<{ role: 'user'; content: string }> {
// Yield initial prompt
const claudeSessionId = `session-${this.sessionDbId}`;
const initPrompt = buildInitPrompt(this.project, claudeSessionId, this.userPrompt);
yield { role: 'user', content: initPrompt };
// Poll observation queue
while (!this.isFinalized) {
await this.sleep(POLL_INTERVAL_MS);
if (!this.sdkSessionId) {
continue; // Wait for SDK session ID to be captured
}
// Get pending observations
const observations = this.db.getPendingObservations(this.sdkSessionId, 10);
for (const obs of observations) {
// Check for FINALIZE message
if (this.isFinalizationMessage(obs)) {
this.isFinalized = true;
const session = await this.loadSession();
if (session) {
const finalizePrompt = buildFinalizePrompt(session);
yield { role: 'user', content: finalizePrompt };
}
this.db.markObservationProcessed(obs.id);
break;
}
// Send observation to SDK
const observationPrompt = buildObservationPrompt(obs);
yield { role: 'user', content: observationPrompt };
// Mark as processed
this.db.markObservationProcessed(obs.id);
}
}
}
/**
* Handle agent message and parse observations/summaries
*/
private handleAgentMessage(content: string): void {
// Parse observations
const observations = parseObservations(content);
for (const obs of observations) {
if (this.sdkSessionId) {
this.db.storeObservation(this.sdkSessionId, this.project, obs.type, obs.text);
}
}
// Parse summary (if present)
const summary = parseSummary(content);
if (summary && this.sdkSessionId) {
// Convert file arrays to JSON strings
const summaryWithArrays = {
request: summary.request,
investigated: summary.investigated,
learned: summary.learned,
completed: summary.completed,
next_steps: summary.next_steps,
files_read: JSON.stringify(summary.files_read),
files_edited: JSON.stringify(summary.files_edited),
notes: summary.notes
};
this.db.storeSummary(this.sdkSessionId, this.project, summaryWithArrays);
}
}
/**
* Check if observation is a FINALIZE message
*/
private isFinalizationMessage(obs: Observation): boolean {
return obs.tool_name === 'FINALIZE';
}
/**
* Sleep helper
*/
private sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
// Run if executed directly
if (import.meta.main) {
main().catch((error) => {
console.error('[SDK Worker] Fatal error:', error);
process.exit(1);
});
}