feat: Implement Phase 2 of SDK Worker Process

- Added background agent architecture for processing tool observations and generating session summaries. - Created SDK Prompts Module for generating prompts for the Claude Agent SDK. - Developed XML Parser Module for parsing observation and summary XML blocks from SDK responses. - Implemented SDK Worker Process to handle observation processing and session management. - Updated newHook implementation to spawn the SDK worker as a detached process with path resolution for development and production. - Created comprehensive test suite for SDK prompts, XML parsing, and HooksDatabase integration, ensuring all tests pass. - Documented Phase 2 implementation details, architecture validation, and success criteria in PHASE2-COMPLETE.md.
2025-10-15 19:18:38 -04:00
parent d07a40616d
commit 78fd1368db
8 changed files with 1150 additions and 91 deletions
@@ -0,0 +1,8 @@
+/**
+ * SDK Module Exports
+ */
+
+export { buildInitPrompt, buildObservationPrompt, buildFinalizePrompt } from './prompts.js';
+export { parseObservations, parseSummary } from './parser.js';
+export type { Observation, SDKSession } from './prompts.js';
+export type { ParsedObservation, ParsedSummary } from './parser.js';
@@ -0,0 +1,132 @@
+/**
+ * XML Parser Module
+ * Parses observation and summary XML blocks from SDK responses
+ */
+
+export interface ParsedObservation {
+  type: string;
+  text: string;
+}
+
+export interface ParsedSummary {
+  request: string;
+  investigated: string;
+  learned: string;
+  completed: string;
+  next_steps: string;
+  files_read: string[];
+  files_edited: string[];
+  notes: string;
+}
+
+/**
+ * Parse observation XML blocks from SDK response
+ * Returns all observations found in the response
+ */
+export function parseObservations(text: string): ParsedObservation[] {
+  const observations: ParsedObservation[] = [];
+
+  // Match <observation>...</observation> blocks (non-greedy)
+  const observationRegex = /<observation>\s*<type>([^<]+)<\/type>\s*<text>([^<]+)<\/text>\s*<\/observation>/g;
+
+  let match;
+  while ((match = observationRegex.exec(text)) !== null) {
+    const type = match[1].trim();
+    const observationText = match[2].trim();
+
+    // Validate type
+    const validTypes = ['decision', 'bugfix', 'feature', 'refactor', 'discovery'];
+    if (!validTypes.includes(type)) {
+      console.warn(`[SDK Parser] Invalid observation type: ${type}, skipping`);
+      continue;
+    }
+
+    observations.push({
+      type,
+      text: observationText
+    });
+  }
+
+  return observations;
+}
+
+/**
+ * Parse summary XML block from SDK response
+ * Returns null if no valid summary found
+ */
+export function parseSummary(text: string): ParsedSummary | null {
+  // Match <summary>...</summary> block (non-greedy)
+  const summaryRegex = /<summary>([\s\S]*?)<\/summary>/;
+  const summaryMatch = summaryRegex.exec(text);
+
+  if (!summaryMatch) {
+    return null;
+  }
+
+  const summaryContent = summaryMatch[1];
+
+  // Extract required fields
+  const request = extractField(summaryContent, 'request');
+  const investigated = extractField(summaryContent, 'investigated');
+  const learned = extractField(summaryContent, 'learned');
+  const completed = extractField(summaryContent, 'completed');
+  const next_steps = extractField(summaryContent, 'next_steps');
+  const notes = extractField(summaryContent, 'notes');
+
+  // Extract file arrays
+  const files_read = extractFileArray(summaryContent, 'files_read');
+  const files_edited = extractFileArray(summaryContent, 'files_edited');
+
+  // Validate all required fields are present
+  if (!request || !investigated || !learned || !completed || !next_steps || !notes) {
+    console.warn('[SDK Parser] Summary missing required fields');
+    return null;
+  }
+
+  return {
+    request,
+    investigated,
+    learned,
+    completed,
+    next_steps,
+    files_read,
+    files_edited,
+    notes
+  };
+}
+
+/**
+ * Extract a simple field value from XML content
+ */
+function extractField(content: string, fieldName: string): string | null {
+  const regex = new RegExp(`<${fieldName}>([^<]*)</${fieldName}>`);
+  const match = regex.exec(content);
+  return match ? match[1].trim() : null;
+}
+
+/**
+ * Extract file array from XML content
+ * Handles both <file> children and empty tags
+ */
+function extractFileArray(content: string, arrayName: string): string[] {
+  const files: string[] = [];
+
+  // Match the array block
+  const arrayRegex = new RegExp(`<${arrayName}>(.*?)</${arrayName}>`, 's');
+  const arrayMatch = arrayRegex.exec(content);
+
+  if (!arrayMatch) {
+    return files;
+  }
+
+  const arrayContent = arrayMatch[1];
+
+  // Extract individual <file> elements
+  const fileRegex = /<file>([^<]+)<\/file>/g;
+  let fileMatch;
+  while ((fileMatch = fileRegex.exec(arrayContent)) !== null) {
+    files.push(fileMatch[1].trim());
+  }
+
+  return files;
+}
@@ -0,0 +1,177 @@
+/**
+ * SDK Prompts Module
+ * Generates prompts for the Claude Agent SDK memory worker
+ */
+
+export interface Observation {
+  id: number;
+  tool_name: string;
+  tool_input: string;
+  tool_output: string;
+  created_at_epoch: number;
+}
+
+export interface SDKSession {
+  id: number;
+  sdk_session_id: string | null;
+  project: string;
+  user_prompt: string;
+}
+
+/**
+ * Build initial prompt to initialize the SDK agent
+ */
+export function buildInitPrompt(project: string, sessionId: string, userPrompt: string): string {
+  return `You are a memory assistant for the "${project}" project.
+
+SESSION CONTEXT
+---------------
+Session ID: ${sessionId}
+User's Goal: ${userPrompt}
+Date: ${new Date().toISOString().split('T')[0]}
+
+YOUR ROLE
+---------
+You will observe tool executions during this Claude Code session. Your job is to:
+
+1. Extract meaningful insights (not just raw data)
+2. Store atomic observations in SQLite
+3. Focus on: key decisions, patterns discovered, problems solved, technical insights
+
+WHAT TO CAPTURE
+----------------
+✓ Architecture decisions (e.g., "chose PostgreSQL over MongoDB for ACID guarantees")
+✓ Bug fixes (e.g., "fixed race condition in auth middleware by adding mutex")
+✓ New features (e.g., "implemented JWT refresh token flow")
+✓ Refactorings (e.g., "extracted validation logic into separate service")
+✓ Discoveries (e.g., "found that API rate limit is 100 req/min")
+
+✗ NOT routine operations (reading files, listing directories)
+✗ NOT work-in-progress (only completed work)
+✗ NOT obvious facts (e.g., "TypeScript file has types")
+
+HOW TO STORE OBSERVATIONS
+--------------------------
+When you identify something worth remembering, output your observation in this EXACT XML format:
+
+\`\`\`xml
+<observation>
+  <type>feature</type>
+  <text>Implemented JWT token refresh flow with 7-day expiry</text>
+</observation>
+\`\`\`
+
+Valid types: decision, bugfix, feature, refactor, discovery
+
+Structure requirements:
+- <observation> is the root element
+- <type> must be one of the 5 valid types (single word)
+- <text> contains your concise observation (one sentence preferred)
+- No additional fields or nesting
+
+The SDK worker will parse all <observation> blocks from your response using regex and store them in SQLite.
+
+You can include your reasoning before or after the observation block, or just output the observation by itself.
+
+EXAMPLE
+-------
+Bad: "Read src/auth.ts file"
+Good: "Implemented JWT token refresh flow with 7-day expiry"
+
+Wait for tool observations. Acknowledge this message briefly.`;
+}
+
+/**
+ * Build prompt to send tool observation to SDK agent
+ */
+export function buildObservationPrompt(obs: Observation): string {
+  return `TOOL OBSERVATION
+================
+Tool: ${obs.tool_name}
+Time: ${new Date(obs.created_at_epoch).toISOString()}
+
+Input:
+${JSON.stringify(JSON.parse(obs.tool_input), null, 2)}
+
+Output:
+${JSON.stringify(JSON.parse(obs.tool_output), null, 2)}
+
+ANALYSIS TASK
+-------------
+1. Does this observation contain something worth remembering?
+2. If YES: Output the observation in this EXACT XML format:
+
+   \`\`\`xml
+   <observation>
+     <type>feature</type>
+     <text>Your concise observation here</text>
+   </observation>
+   \`\`\`
+
+   Requirements:
+   - Use one of these types: decision, bugfix, feature, refactor, discovery
+   - Keep text concise (one sentence preferred)
+   - No markdown formatting inside <text>
+   - No additional XML fields
+
+3. If NO: Just acknowledge and wait for next observation
+
+Remember: Quality over quantity. Only store meaningful insights.`;
+}
+
+/**
+ * Build finalization prompt to generate session summary
+ */
+export function buildFinalizePrompt(session: SDKSession): string {
+  return `SESSION ENDING
+==============
+The Claude Code session is finishing.
+
+FINAL TASK
+----------
+1. Review the observations you've stored this session
+2. Generate a structured summary that answers these questions:
+   - What did user request?
+   - What did you investigate?
+   - What did you learn?
+   - What did you do?
+   - What's next?
+   - Files read
+   - Files edited
+   - Notes
+
+3. Generate the structured summary and output it in this EXACT XML format:
+
+\`\`\`xml
+<summary>
+  <request>Implement JWT authentication system</request>
+  <investigated>Existing auth middleware, session management, token storage patterns</investigated>
+  <learned>Current system uses session cookies; no JWT support; race condition in middleware</learned>
+  <completed>Implemented JWT token + refresh flow with 7-day expiry; fixed race condition with mutex; added token validation middleware</completed>
+  <next_steps>Add token revocation API endpoint; write integration tests</next_steps>
+  <files_read>
+    <file>src/auth.ts</file>
+    <file>src/middleware/session.ts</file>
+    <file>src/types/user.ts</file>
+  </files_read>
+  <files_edited>
+    <file>src/auth.ts</file>
+    <file>src/middleware/auth.ts</file>
+    <file>src/routes/auth.ts</file>
+  </files_edited>
+  <notes>Token secret stored in .env; refresh tokens use rotation strategy</notes>
+</summary>
+\`\`\`
+
+Structure requirements:
+- <summary> is the root element
+- All 8 child elements are REQUIRED: request, investigated, learned, completed, next_steps, files_read, files_edited, notes
+- <files_read> and <files_edited> must contain <file> child elements (one per file)
+- If no files were read/edited, use empty tags: <files_read></files_read>
+- Text fields can be multiple sentences but avoid markdown formatting
+- Use underscores in element names: next_steps, files_read, files_edited
+
+The SDK worker will parse the <summary> block and extract all fields to store in SQLite.
+
+Generate the summary now in the required XML format.`;
+}
@@ -0,0 +1,217 @@
+#!/usr/bin/env bun
+/**
+ * SDK Worker Process
+ * Background agent that processes tool observations and generates session summaries
+ */
+
+import { query } from '@anthropic-ai/claude-agent-sdk';
+import { HooksDatabase } from '../services/sqlite/HooksDatabase.js';
+import { buildInitPrompt, buildObservationPrompt, buildFinalizePrompt } from './prompts.js';
+import { parseObservations, parseSummary } from './parser.js';
+import type { Observation, SDKSession } from './prompts.js';
+
+const POLL_INTERVAL_MS = 1000; // 1 second
+const MODEL = 'claude-sonnet-4-5';
+const DISALLOWED_TOOLS = ['Glob', 'Grep', 'ListMcpResourcesTool', 'WebSearch'];
+
+/**
+ * Main worker process entry point
+ */
+async function main() {
+  const sessionDbId = parseInt(process.argv[2], 10);
+
+  if (!sessionDbId) {
+    console.error('[SDK Worker] Missing session ID argument');
+    process.exit(1);
+  }
+
+  const worker = new SDKWorker(sessionDbId);
+  await worker.run();
+}
+
+/**
+ * SDK Worker class - handles the full lifecycle of observation processing
+ */
+class SDKWorker {
+  private sessionDbId: number;
+  private db: HooksDatabase;
+  private sdkSessionId: string | null = null;
+  private project: string = '';
+  private userPrompt: string = '';
+  private abortController: AbortController;
+  private isFinalized = false;
+
+  constructor(sessionDbId: number) {
+    this.sessionDbId = sessionDbId;
+    this.db = new HooksDatabase();
+    this.abortController = new AbortController();
+  }
+
+  /**
+   * Main run loop
+   */
+  async run(): Promise<void> {
+    try {
+      // Load session info
+      const session = await this.loadSession();
+      if (!session) {
+        console.error('[SDK Worker] Session not found');
+        process.exit(1);
+      }
+
+      this.project = session.project;
+      this.userPrompt = session.user_prompt;
+
+      // Run SDK agent with streaming input
+      await this.runSDKAgent();
+
+      // Mark session as completed
+      this.db.markSessionCompleted(this.sessionDbId);
+      this.db.close();
+
+    } catch (error: any) {
+      console.error('[SDK Worker] Error:', error.message);
+      this.db.markSessionFailed(this.sessionDbId);
+      this.db.close();
+      process.exit(1);
+    }
+  }
+
+  /**
+   * Load session from database
+   */
+  private async loadSession(): Promise<SDKSession | null> {
+    // Query session by ID
+    const db = this.db as any;
+    const query = db.db.query(`
+      SELECT id, sdk_session_id, project, user_prompt
+      FROM sdk_sessions
+      WHERE id = ?
+      LIMIT 1
+    `);
+
+    const session = query.get(this.sessionDbId);
+    return session as SDKSession | null;
+  }
+
+  /**
+   * Run SDK agent with streaming input mode
+   */
+  private async runSDKAgent(): Promise<void> {
+    const messageGenerator = this.createMessageGenerator();
+
+    await query({
+      model: MODEL,
+      messages: messageGenerator,
+      disallowedTools: DISALLOWED_TOOLS,
+      signal: this.abortController.signal,
+      onSystemInitMessage: (msg) => {
+        // Capture SDK session ID from init message
+        if (msg.session_id) {
+          this.sdkSessionId = msg.session_id;
+          this.db.updateSDKSessionId(this.sessionDbId, msg.session_id);
+        }
+      },
+      onAgentMessage: (msg) => {
+        // Parse and store observations from agent response
+        this.handleAgentMessage(msg.content);
+      }
+    });
+  }
+
+  /**
+   * Create async message generator for SDK streaming input
+   */
+  private async* createMessageGenerator(): AsyncIterable<{ role: 'user'; content: string }> {
+    // Yield initial prompt
+    const claudeSessionId = `session-${this.sessionDbId}`;
+    const initPrompt = buildInitPrompt(this.project, claudeSessionId, this.userPrompt);
+    yield { role: 'user', content: initPrompt };
+
+    // Poll observation queue
+    while (!this.isFinalized) {
+      await this.sleep(POLL_INTERVAL_MS);
+
+      if (!this.sdkSessionId) {
+        continue; // Wait for SDK session ID to be captured
+      }
+
+      // Get pending observations
+      const observations = this.db.getPendingObservations(this.sdkSessionId, 10);
+
+      for (const obs of observations) {
+        // Check for FINALIZE message
+        if (this.isFinalizationMessage(obs)) {
+          this.isFinalized = true;
+          const session = await this.loadSession();
+          if (session) {
+            const finalizePrompt = buildFinalizePrompt(session);
+            yield { role: 'user', content: finalizePrompt };
+          }
+          this.db.markObservationProcessed(obs.id);
+          break;
+        }
+
+        // Send observation to SDK
+        const observationPrompt = buildObservationPrompt(obs);
+        yield { role: 'user', content: observationPrompt };
+
+        // Mark as processed
+        this.db.markObservationProcessed(obs.id);
+      }
+    }
+  }
+
+  /**
+   * Handle agent message and parse observations/summaries
+   */
+  private handleAgentMessage(content: string): void {
+    // Parse observations
+    const observations = parseObservations(content);
+    for (const obs of observations) {
+      if (this.sdkSessionId) {
+        this.db.storeObservation(this.sdkSessionId, this.project, obs.type, obs.text);
+      }
+    }
+
+    // Parse summary (if present)
+    const summary = parseSummary(content);
+    if (summary && this.sdkSessionId) {
+      // Convert file arrays to JSON strings
+      const summaryWithArrays = {
+        request: summary.request,
+        investigated: summary.investigated,
+        learned: summary.learned,
+        completed: summary.completed,
+        next_steps: summary.next_steps,
+        files_read: JSON.stringify(summary.files_read),
+        files_edited: JSON.stringify(summary.files_edited),
+        notes: summary.notes
+      };
+
+      this.db.storeSummary(this.sdkSessionId, this.project, summaryWithArrays);
+    }
+  }
+
+  /**
+   * Check if observation is a FINALIZE message
+   */
+  private isFinalizationMessage(obs: Observation): boolean {
+    return obs.tool_name === 'FINALIZE';
+  }
+
+  /**
+   * Sleep helper
+   */
+  private sleep(ms: number): Promise<void> {
+    return new Promise(resolve => setTimeout(resolve, ms));
+  }
+}
+
+// Run if executed directly
+if (import.meta.main) {
+  main().catch((error) => {
+    console.error('[SDK Worker] Fatal error:', error);
+    process.exit(1);
+  });
+}