backup: Phase 1 agent work (security, persistence, batch endpoint)

This is a backup of all work done by the 3 Phase 1 agents: Agent A - Command Injection Fix (Issue #354): - Fixed command injection in BranchManager.ts - Fixed unnecessary shell usage in bun-path.ts - Added comprehensive security test suite - Created SECURITY.md and SECURITY_AUDIT_REPORT.md Agent B - Observation Persistence Fix (Issue #353): - Added PendingMessageStore from PR #335 - Integrated persistent queue into SessionManager - Modified SDKAgent to mark messages complete - Updated SessionStore with pending_messages migration - Updated worker-types.ts with new interfaces Agent C - Batch Endpoint Verification (Issue #348): - Created batch-observations.test.ts - Updated worker-service.mdx documentation Also includes: - Documentation context files (biomimetic, windows struggles) - Build artifacts from agent testing This work will be re-evaluated after v7.3.0 release.
2025-12-16 15:44:06 -05:00
parent 2e919df2b4
commit 282345f379
41 changed files with 3130 additions and 147 deletions
@@ -5,7 +5,7 @@
 * The installed plugin at ~/.claude/plugins/marketplaces/thedotmack/ is a git repo.
 */

-import { execSync } from 'child_process';
+import { execSync, spawnSync } from 'child_process';
 import { existsSync, unlinkSync } from 'fs';
 import { homedir } from 'os';
 import { join } from 'path';
@@ -13,6 +13,21 @@ import { logger } from '../../utils/logger.js';

 const INSTALLED_PLUGIN_PATH = join(homedir(), '.claude', 'plugins', 'marketplaces', 'thedotmack');

+/**
+ * Validate branch name to prevent command injection
+ * Only allows alphanumeric, hyphens, underscores, forward slashes, and dots
+ */
+function isValidBranchName(branchName: string): boolean {
+  if (!branchName || typeof branchName !== 'string') {
+    return false;
+  }
+  // Git branch name validation: alphanumeric, hyphen, underscore, slash, dot
+  // Must not start with dot, hyphen, or slash
+  // Must not contain double dots (..)
+  const validBranchRegex = /^[a-zA-Z0-9][a-zA-Z0-9._/-]*$/;
+  return validBranchRegex.test(branchName) && !branchName.includes('..');
+}
+
 // Timeout constants
 const GIT_COMMAND_TIMEOUT_MS = 30_000;
 const NPM_INSTALL_TIMEOUT_MS = 120_000;
@@ -35,27 +50,54 @@ export interface SwitchResult {
 }

 /**
- * Execute git command in installed plugin directory
+ * Execute git command in installed plugin directory using safe array-based arguments
+ * SECURITY: Uses spawnSync with argument array to prevent command injection
 */
-function execGit(command: string): string {
-  return execSync(`git ${command}`, {
+function execGit(args: string[]): string {
+  const result = spawnSync('git', args, {
    cwd: INSTALLED_PLUGIN_PATH,
    encoding: 'utf-8',
    timeout: GIT_COMMAND_TIMEOUT_MS,
-    windowsHide: true
-  }).trim();
+    windowsHide: true,
+    shell: false  // CRITICAL: Never use shell with user input
+  });
+
+  if (result.error) {
+    throw result.error;
+  }
+
+  if (result.status !== 0) {
+    throw new Error(result.stderr || result.stdout || 'Git command failed');
+  }
+
+  return result.stdout.trim();
 }

 /**
- * Execute shell command in installed plugin directory
+ * Execute npm command in installed plugin directory using safe array-based arguments
+ * SECURITY: Uses spawnSync with argument array to prevent command injection
 */
-function execShell(command: string, timeoutMs: number = DEFAULT_SHELL_TIMEOUT_MS): string {
-  return execSync(command, {
+function execNpm(args: string[], timeoutMs: number = NPM_INSTALL_TIMEOUT_MS): string {
+  const isWindows = process.platform === 'win32';
+  const npmCmd = isWindows ? 'npm.cmd' : 'npm';
+
+  const result = spawnSync(npmCmd, args, {
    cwd: INSTALLED_PLUGIN_PATH,
    encoding: 'utf-8',
    timeout: timeoutMs,
-    windowsHide: true
-  }).trim();
+    windowsHide: true,
+    shell: false  // CRITICAL: Never use shell with user input
+  });
+
+  if (result.error) {
+    throw result.error;
+  }
+
+  if (result.status !== 0) {
+    throw new Error(result.stderr || result.stdout || 'npm command failed');
+  }
+
+  return result.stdout.trim();
 }

 /**
@@ -77,10 +119,10 @@ export function getBranchInfo(): BranchInfo {

  try {
    // Get current branch
-    const branch = execGit('rev-parse --abbrev-ref HEAD');
+    const branch = execGit(['rev-parse', '--abbrev-ref', 'HEAD']);

    // Check if dirty (has uncommitted changes)
-    const status = execGit('status --porcelain');
+    const status = execGit(['status', '--porcelain']);
    const isDirty = status.length > 0;

    // Determine if on beta branch
@@ -118,6 +160,14 @@ export function getBranchInfo(): BranchInfo {
 * 6. Restart worker (handled by caller after response)
 */
 export async function switchBranch(targetBranch: string): Promise<SwitchResult> {
+  // SECURITY: Validate branch name to prevent command injection
+  if (!isValidBranchName(targetBranch)) {
+    return {
+      success: false,
+      error: `Invalid branch name: ${targetBranch}. Branch names must be alphanumeric with hyphens, underscores, slashes, or dots.`
+    };
+  }
+
  const info = getBranchInfo();

  if (!info.isGitRepo) {
@@ -143,25 +193,25 @@ export async function switchBranch(targetBranch: string): Promise<SwitchResult>

    // 1. Discard local changes (safe - user data is at ~/.claude-mem/)
    logger.debug('BRANCH', 'Discarding local changes');
-    execGit('checkout -- .');
-    execGit('clean -fd'); // Remove untracked files too
+    execGit(['checkout', '--', '.']);
+    execGit(['clean', '-fd']); // Remove untracked files too

    // 2. Fetch latest
    logger.debug('BRANCH', 'Fetching from origin');
-    execGit('fetch origin');
+    execGit(['fetch', 'origin']);

    // 3. Checkout target branch
    logger.debug('BRANCH', 'Checking out branch', { branch: targetBranch });
    try {
-      execGit(`checkout ${targetBranch}`);
+      execGit(['checkout', targetBranch]);
    } catch {
      // Branch might not exist locally, try tracking remote
-      execGit(`checkout -b ${targetBranch} origin/${targetBranch}`);
+      execGit(['checkout', '-b', targetBranch, `origin/${targetBranch}`]);
    }

    // 4. Pull latest
    logger.debug('BRANCH', 'Pulling latest');
-    execGit(`pull origin ${targetBranch}`);
+    execGit(['pull', 'origin', targetBranch]);

    // 5. Clear install marker and run npm install
    const installMarker = join(INSTALLED_PLUGIN_PATH, '.install-version');
@@ -170,7 +220,7 @@ export async function switchBranch(targetBranch: string): Promise<SwitchResult>
    }

    logger.debug('BRANCH', 'Running npm install');
-    execShell('npm install', NPM_INSTALL_TIMEOUT_MS);
+    execNpm(['install'], NPM_INSTALL_TIMEOUT_MS);

    logger.success('BRANCH', 'Branch switch complete', {
      branch: targetBranch
@@ -186,8 +236,8 @@ export async function switchBranch(targetBranch: string): Promise<SwitchResult>

    // Try to recover by checking out original branch
    try {
-      if (info.branch) {
-        execGit(`checkout ${info.branch}`);
+      if (info.branch && isValidBranchName(info.branch)) {
+        execGit(['checkout', info.branch]);
      }
    } catch {
      // Recovery failed, user needs manual intervention
@@ -214,21 +264,29 @@ export async function pullUpdates(): Promise<SwitchResult> {
  }

  try {
+    // SECURITY: Validate branch name before use
+    if (!isValidBranchName(info.branch)) {
+      return {
+        success: false,
+        error: `Invalid current branch name: ${info.branch}`
+      };
+    }
+
    logger.info('BRANCH', 'Pulling updates', { branch: info.branch });

    // Discard local changes first
-    execGit('checkout -- .');
+    execGit(['checkout', '--', '.']);

    // Fetch and pull
-    execGit('fetch origin');
-    execGit(`pull origin ${info.branch}`);
+    execGit(['fetch', 'origin']);
+    execGit(['pull', 'origin', info.branch]);

    // Clear install marker and reinstall
    const installMarker = join(INSTALLED_PLUGIN_PATH, '.install-version');
    if (existsSync(installMarker)) {
      unlinkSync(installMarker);
    }
-    execShell('npm install', NPM_INSTALL_TIMEOUT_MS);
+    execNpm(['install'], NPM_INSTALL_TIMEOUT_MS);

    logger.success('BRANCH', 'Updates pulled', { branch: info.branch });

@@ -396,6 +396,21 @@ export class SDKAgent {
      }
    }

+    // CRITICAL: Mark ALL pending messages as successfully processed
+    // This prevents message loss if worker crashes before SDK finishes
+    const pendingMessageStore = this.sessionManager.getPendingMessageStore();
+    if (session.pendingProcessingIds.size > 0) {
+      for (const messageId of session.pendingProcessingIds) {
+        pendingMessageStore.markProcessed(messageId);
+      }
+      logger.debug('SDK', 'Messages marked as processed', {
+        sessionId: session.sessionDbId,
+        messageIds: Array.from(session.pendingProcessingIds),
+        count: session.pendingProcessingIds.size
+      });
+      session.pendingProcessingIds.clear();
+    }
+
    // Broadcast activity status after processing (queue may have changed)
    if (worker && typeof worker.broadcastProcessingStatus === 'function') {
      worker.broadcastProcessingStatus();
@@ -11,18 +11,31 @@
 import { EventEmitter } from 'events';
 import { DatabaseManager } from './DatabaseManager.js';
 import { logger } from '../../utils/logger.js';
-import type { ActiveSession, PendingMessage, ObservationData } from '../worker-types.js';
+import type { ActiveSession, PendingMessage, PendingMessageWithId, ObservationData } from '../worker-types.js';
+import { PendingMessageStore } from '../sqlite/PendingMessageStore.js';

 export class SessionManager {
  private dbManager: DatabaseManager;
  private sessions: Map<number, ActiveSession> = new Map();
  private sessionQueues: Map<number, EventEmitter> = new Map();
  private onSessionDeletedCallback?: () => void;
+  private pendingStore: PendingMessageStore | null = null;

  constructor(dbManager: DatabaseManager) {
    this.dbManager = dbManager;
  }

+  /**
+   * Get or create PendingMessageStore (lazy initialization to avoid circular dependency)
+   */
+  private getPendingStore(): PendingMessageStore {
+    if (!this.pendingStore) {
+      const sessionStore = this.dbManager.getSessionStore();
+      this.pendingStore = new PendingMessageStore(sessionStore.db, 3);
+    }
+    return this.pendingStore;
+  }
+
  /**
   * Set callback to be called when a session is deleted (for broadcasting status)
   */
@@ -103,7 +116,8 @@ export class SessionManager {
      lastPromptNumber: promptNumber || this.dbManager.getSessionStore().getPromptCounter(sessionDbId),
      startTime: Date.now(),
      cumulativeInputTokens: 0,
-      cumulativeOutputTokens: 0
+      cumulativeOutputTokens: 0,
+      pendingProcessingIds: new Set()
    };

    this.sessions.set(sessionDbId, session);
@@ -133,6 +147,9 @@ export class SessionManager {
  /**
   * Queue an observation for processing (zero-latency notification)
   * Auto-initializes session if not in memory but exists in database
+   *
+   * CRITICAL: Persists to database FIRST before adding to in-memory queue.
+   * This ensures observations survive worker crashes.
   */
  queueObservation(sessionDbId: number, data: ObservationData): void {
    // Auto-initialize from database if needed (handles worker restarts)
@@ -143,14 +160,33 @@ export class SessionManager {

    const beforeDepth = session.pendingMessages.length;

-    session.pendingMessages.push({
+    // CRITICAL: Persist to database FIRST
+    const message: PendingMessage = {
      type: 'observation',
      tool_name: data.tool_name,
      tool_input: data.tool_input,
      tool_response: data.tool_response,
      prompt_number: data.prompt_number,
      cwd: data.cwd
-    });
+    };
+
+    try {
+      const messageId = this.getPendingStore().enqueue(sessionDbId, session.claudeSessionId, message);
+      logger.debug('SESSION', `Observation persisted to DB`, {
+        sessionId: sessionDbId,
+        messageId,
+        tool: data.tool_name
+      });
+    } catch (error) {
+      logger.error('SESSION', 'Failed to persist observation to DB', {
+        sessionId: sessionDbId,
+        tool: data.tool_name
+      }, error);
+      throw error; // Don't continue if we can't persist
+    }
+
+    // Add to in-memory queue (for backward compatibility with existing iterator)
+    session.pendingMessages.push(message);

    const afterDepth = session.pendingMessages.length;

@@ -171,6 +207,9 @@ export class SessionManager {
  /**
   * Queue a summarize request (zero-latency notification)
   * Auto-initializes session if not in memory but exists in database
+   *
+   * CRITICAL: Persists to database FIRST before adding to in-memory queue.
+   * This ensures summarize requests survive worker crashes.
   */
  queueSummarize(sessionDbId: number, lastUserMessage: string, lastAssistantMessage?: string): void {
    // Auto-initialize from database if needed (handles worker restarts)
@@ -181,11 +220,28 @@ export class SessionManager {

    const beforeDepth = session.pendingMessages.length;

-    session.pendingMessages.push({
+    // CRITICAL: Persist to database FIRST
+    const message: PendingMessage = {
      type: 'summarize',
      last_user_message: lastUserMessage,
      last_assistant_message: lastAssistantMessage
-    });
+    };
+
+    try {
+      const messageId = this.getPendingStore().enqueue(sessionDbId, session.claudeSessionId, message);
+      logger.debug('SESSION', `Summarize persisted to DB`, {
+        sessionId: sessionDbId,
+        messageId
+      });
+    } catch (error) {
+      logger.error('SESSION', 'Failed to persist summarize to DB', {
+        sessionId: sessionDbId
+      }, error);
+      throw error; // Don't continue if we can't persist
+    }
+
+    // Add to in-memory queue (for backward compatibility with existing iterator)
+    session.pendingMessages.push(message);

    const afterDepth = session.pendingMessages.length;

@@ -306,8 +362,12 @@ export class SessionManager {
  /**
   * Get message iterator for SDKAgent to consume (event-driven, no polling)
   * Auto-initializes session if not in memory but exists in database
+   *
+   * CRITICAL: Uses PendingMessageStore for crash-safe message persistence.
+   * Messages are marked as 'processing' when yielded and must be marked 'processed'
+   * by the SDK agent after successful completion.
   */
-  async *getMessageIterator(sessionDbId: number): AsyncIterableIterator<PendingMessage> {
+  async *getMessageIterator(sessionDbId: number): AsyncIterableIterator<PendingMessageWithId> {
    // Auto-initialize from database if needed (handles worker restarts)
    let session = this.sessions.get(sessionDbId);
    if (!session) {
@@ -319,32 +379,100 @@ export class SessionManager {
      throw new Error(`No emitter for session ${sessionDbId}`);
    }

+    // Linger timeout: how long to wait for new messages before exiting
+    // This keeps the agent alive between messages, reducing "No active agent" windows
+    const LINGER_TIMEOUT_MS = 5000; // 5 seconds
+
    while (!session.abortController.signal.aborted) {
-      // Wait for messages if queue is empty
-      if (session.pendingMessages.length === 0) {
-        await new Promise<void>(resolve => {
-          const handler = () => resolve();
-          emitter.once('message', handler);
+      // Check for pending messages in persistent store
+      const persistentMessage = this.getPendingStore().peekPending(sessionDbId);
+
+      if (!persistentMessage) {
+        // Wait for new messages with timeout
+        const gotMessage = await new Promise<boolean>(resolve => {
+          let resolved = false;
+
+          const messageHandler = () => {
+            if (!resolved) {
+              resolved = true;
+              clearTimeout(timeoutId);
+              resolve(true);
+            }
+          };
+
+          const timeoutHandler = () => {
+            if (!resolved) {
+              resolved = true;
+              emitter.off('message', messageHandler);
+              resolve(false);
+            }
+          };
+
+          const timeoutId = setTimeout(timeoutHandler, LINGER_TIMEOUT_MS);
+
+          emitter.once('message', messageHandler);

          // Also listen for abort
          session.abortController.signal.addEventListener('abort', () => {
-            emitter.off('message', handler);
-            resolve();
+            if (!resolved) {
+              resolved = true;
+              clearTimeout(timeoutId);
+              emitter.off('message', messageHandler);
+              resolve(false);
+            }
          }, { once: true });
        });
-      }

-      // Yield all pending messages
-      while (session.pendingMessages.length > 0) {
-        const message = session.pendingMessages.shift()!;
-        yield message;
+        // Re-check for messages after waking up (handles race condition)
+        const recheckMessage = this.getPendingStore().peekPending(sessionDbId);
+        if (recheckMessage) {
+          // Got a message, continue processing
+          continue;
+        }

-        // If we just yielded a summary, that's the end of this batch - stop the iterator
-        if (message.type === 'summarize') {
-          logger.info('SESSION', `Summary yielded - ending generator`, { sessionId: sessionDbId });
+        if (!gotMessage) {
+          // Timeout or abort - exit the loop
+          logger.info('SESSION', `Generator exiting after linger timeout`, { sessionId: sessionDbId });
          return;
        }
+
+        continue;
+      }
+
+      // Mark as processing BEFORE yielding (status: pending -> processing)
+      this.getPendingStore().markProcessing(persistentMessage.id);
+
+      // Track this message ID for completion marking
+      session.pendingProcessingIds.add(persistentMessage.id);
+
+      // Convert to PendingMessageWithId and yield
+      // Include original timestamp for accurate observation timestamps (survives stuck processing)
+      const message: PendingMessageWithId = {
+        _persistentId: persistentMessage.id,
+        _originalTimestamp: persistentMessage.created_at_epoch,
+        ...this.getPendingStore().toPendingMessage(persistentMessage)
+      };
+
+      // Also add to in-memory queue for backward compatibility (status tracking)
+      session.pendingMessages.push(message);
+
+      yield message;
+
+      // Remove from in-memory queue after yielding
+      session.pendingMessages.shift();
+
+      // If we just yielded a summary, that's the end of this batch - stop the iterator
+      if (message.type === 'summarize') {
+        logger.info('SESSION', `Summary yielded - ending generator`, { sessionId: sessionDbId });
+        return;
      }
    }
  }
+
+  /**
+   * Get the PendingMessageStore (for SDKAgent to mark messages as processed)
+   */
+  getPendingMessageStore(): PendingMessageStore {
+    return this.getPendingStore();
+  }
 }