backup: Phase 1 agent work (security, persistence, batch endpoint)

This is a backup of all work done by the 3 Phase 1 agents:

Agent A - Command Injection Fix (Issue #354):
- Fixed command injection in BranchManager.ts
- Fixed unnecessary shell usage in bun-path.ts
- Added comprehensive security test suite
- Created SECURITY.md and SECURITY_AUDIT_REPORT.md

Agent B - Observation Persistence Fix (Issue #353):
- Added PendingMessageStore from PR #335
- Integrated persistent queue into SessionManager
- Modified SDKAgent to mark messages complete
- Updated SessionStore with pending_messages migration
- Updated worker-types.ts with new interfaces

Agent C - Batch Endpoint Verification (Issue #348):
- Created batch-observations.test.ts
- Updated worker-service.mdx documentation

Also includes:
- Documentation context files (biomimetic, windows struggles)
- Build artifacts from agent testing

This work will be re-evaluated after v7.3.0 release.
This commit is contained in:
Alex Newman
2025-12-16 15:44:06 -05:00
parent 2e919df2b4
commit 282345f379
41 changed files with 3130 additions and 147 deletions
+84 -26
View File
@@ -5,7 +5,7 @@
* The installed plugin at ~/.claude/plugins/marketplaces/thedotmack/ is a git repo.
*/
import { execSync } from 'child_process';
import { execSync, spawnSync } from 'child_process';
import { existsSync, unlinkSync } from 'fs';
import { homedir } from 'os';
import { join } from 'path';
@@ -13,6 +13,21 @@ import { logger } from '../../utils/logger.js';
const INSTALLED_PLUGIN_PATH = join(homedir(), '.claude', 'plugins', 'marketplaces', 'thedotmack');
/**
* Validate branch name to prevent command injection
* Only allows alphanumeric, hyphens, underscores, forward slashes, and dots
*/
function isValidBranchName(branchName: string): boolean {
if (!branchName || typeof branchName !== 'string') {
return false;
}
// Git branch name validation: alphanumeric, hyphen, underscore, slash, dot
// Must not start with dot, hyphen, or slash
// Must not contain double dots (..)
const validBranchRegex = /^[a-zA-Z0-9][a-zA-Z0-9._/-]*$/;
return validBranchRegex.test(branchName) && !branchName.includes('..');
}
// Timeout constants
const GIT_COMMAND_TIMEOUT_MS = 30_000;
const NPM_INSTALL_TIMEOUT_MS = 120_000;
@@ -35,27 +50,54 @@ export interface SwitchResult {
}
/**
* Execute git command in installed plugin directory
* Execute git command in installed plugin directory using safe array-based arguments
* SECURITY: Uses spawnSync with argument array to prevent command injection
*/
function execGit(command: string): string {
return execSync(`git ${command}`, {
function execGit(args: string[]): string {
const result = spawnSync('git', args, {
cwd: INSTALLED_PLUGIN_PATH,
encoding: 'utf-8',
timeout: GIT_COMMAND_TIMEOUT_MS,
windowsHide: true
}).trim();
windowsHide: true,
shell: false // CRITICAL: Never use shell with user input
});
if (result.error) {
throw result.error;
}
if (result.status !== 0) {
throw new Error(result.stderr || result.stdout || 'Git command failed');
}
return result.stdout.trim();
}
/**
* Execute shell command in installed plugin directory
* Execute npm command in installed plugin directory using safe array-based arguments
* SECURITY: Uses spawnSync with argument array to prevent command injection
*/
function execShell(command: string, timeoutMs: number = DEFAULT_SHELL_TIMEOUT_MS): string {
return execSync(command, {
function execNpm(args: string[], timeoutMs: number = NPM_INSTALL_TIMEOUT_MS): string {
const isWindows = process.platform === 'win32';
const npmCmd = isWindows ? 'npm.cmd' : 'npm';
const result = spawnSync(npmCmd, args, {
cwd: INSTALLED_PLUGIN_PATH,
encoding: 'utf-8',
timeout: timeoutMs,
windowsHide: true
}).trim();
windowsHide: true,
shell: false // CRITICAL: Never use shell with user input
});
if (result.error) {
throw result.error;
}
if (result.status !== 0) {
throw new Error(result.stderr || result.stdout || 'npm command failed');
}
return result.stdout.trim();
}
/**
@@ -77,10 +119,10 @@ export function getBranchInfo(): BranchInfo {
try {
// Get current branch
const branch = execGit('rev-parse --abbrev-ref HEAD');
const branch = execGit(['rev-parse', '--abbrev-ref', 'HEAD']);
// Check if dirty (has uncommitted changes)
const status = execGit('status --porcelain');
const status = execGit(['status', '--porcelain']);
const isDirty = status.length > 0;
// Determine if on beta branch
@@ -118,6 +160,14 @@ export function getBranchInfo(): BranchInfo {
* 6. Restart worker (handled by caller after response)
*/
export async function switchBranch(targetBranch: string): Promise<SwitchResult> {
// SECURITY: Validate branch name to prevent command injection
if (!isValidBranchName(targetBranch)) {
return {
success: false,
error: `Invalid branch name: ${targetBranch}. Branch names must be alphanumeric with hyphens, underscores, slashes, or dots.`
};
}
const info = getBranchInfo();
if (!info.isGitRepo) {
@@ -143,25 +193,25 @@ export async function switchBranch(targetBranch: string): Promise<SwitchResult>
// 1. Discard local changes (safe - user data is at ~/.claude-mem/)
logger.debug('BRANCH', 'Discarding local changes');
execGit('checkout -- .');
execGit('clean -fd'); // Remove untracked files too
execGit(['checkout', '--', '.']);
execGit(['clean', '-fd']); // Remove untracked files too
// 2. Fetch latest
logger.debug('BRANCH', 'Fetching from origin');
execGit('fetch origin');
execGit(['fetch', 'origin']);
// 3. Checkout target branch
logger.debug('BRANCH', 'Checking out branch', { branch: targetBranch });
try {
execGit(`checkout ${targetBranch}`);
execGit(['checkout', targetBranch]);
} catch {
// Branch might not exist locally, try tracking remote
execGit(`checkout -b ${targetBranch} origin/${targetBranch}`);
execGit(['checkout', '-b', targetBranch, `origin/${targetBranch}`]);
}
// 4. Pull latest
logger.debug('BRANCH', 'Pulling latest');
execGit(`pull origin ${targetBranch}`);
execGit(['pull', 'origin', targetBranch]);
// 5. Clear install marker and run npm install
const installMarker = join(INSTALLED_PLUGIN_PATH, '.install-version');
@@ -170,7 +220,7 @@ export async function switchBranch(targetBranch: string): Promise<SwitchResult>
}
logger.debug('BRANCH', 'Running npm install');
execShell('npm install', NPM_INSTALL_TIMEOUT_MS);
execNpm(['install'], NPM_INSTALL_TIMEOUT_MS);
logger.success('BRANCH', 'Branch switch complete', {
branch: targetBranch
@@ -186,8 +236,8 @@ export async function switchBranch(targetBranch: string): Promise<SwitchResult>
// Try to recover by checking out original branch
try {
if (info.branch) {
execGit(`checkout ${info.branch}`);
if (info.branch && isValidBranchName(info.branch)) {
execGit(['checkout', info.branch]);
}
} catch {
// Recovery failed, user needs manual intervention
@@ -214,21 +264,29 @@ export async function pullUpdates(): Promise<SwitchResult> {
}
try {
// SECURITY: Validate branch name before use
if (!isValidBranchName(info.branch)) {
return {
success: false,
error: `Invalid current branch name: ${info.branch}`
};
}
logger.info('BRANCH', 'Pulling updates', { branch: info.branch });
// Discard local changes first
execGit('checkout -- .');
execGit(['checkout', '--', '.']);
// Fetch and pull
execGit('fetch origin');
execGit(`pull origin ${info.branch}`);
execGit(['fetch', 'origin']);
execGit(['pull', 'origin', info.branch]);
// Clear install marker and reinstall
const installMarker = join(INSTALLED_PLUGIN_PATH, '.install-version');
if (existsSync(installMarker)) {
unlinkSync(installMarker);
}
execShell('npm install', NPM_INSTALL_TIMEOUT_MS);
execNpm(['install'], NPM_INSTALL_TIMEOUT_MS);
logger.success('BRANCH', 'Updates pulled', { branch: info.branch });
+15
View File
@@ -396,6 +396,21 @@ export class SDKAgent {
}
}
// CRITICAL: Mark ALL pending messages as successfully processed
// This prevents message loss if worker crashes before SDK finishes
const pendingMessageStore = this.sessionManager.getPendingMessageStore();
if (session.pendingProcessingIds.size > 0) {
for (const messageId of session.pendingProcessingIds) {
pendingMessageStore.markProcessed(messageId);
}
logger.debug('SDK', 'Messages marked as processed', {
sessionId: session.sessionDbId,
messageIds: Array.from(session.pendingProcessingIds),
count: session.pendingProcessingIds.size
});
session.pendingProcessingIds.clear();
}
// Broadcast activity status after processing (queue may have changed)
if (worker && typeof worker.broadcastProcessingStatus === 'function') {
worker.broadcastProcessingStatus();
+150 -22
View File
@@ -11,18 +11,31 @@
import { EventEmitter } from 'events';
import { DatabaseManager } from './DatabaseManager.js';
import { logger } from '../../utils/logger.js';
import type { ActiveSession, PendingMessage, ObservationData } from '../worker-types.js';
import type { ActiveSession, PendingMessage, PendingMessageWithId, ObservationData } from '../worker-types.js';
import { PendingMessageStore } from '../sqlite/PendingMessageStore.js';
export class SessionManager {
private dbManager: DatabaseManager;
private sessions: Map<number, ActiveSession> = new Map();
private sessionQueues: Map<number, EventEmitter> = new Map();
private onSessionDeletedCallback?: () => void;
private pendingStore: PendingMessageStore | null = null;
constructor(dbManager: DatabaseManager) {
this.dbManager = dbManager;
}
/**
* Get or create PendingMessageStore (lazy initialization to avoid circular dependency)
*/
private getPendingStore(): PendingMessageStore {
if (!this.pendingStore) {
const sessionStore = this.dbManager.getSessionStore();
this.pendingStore = new PendingMessageStore(sessionStore.db, 3);
}
return this.pendingStore;
}
/**
* Set callback to be called when a session is deleted (for broadcasting status)
*/
@@ -103,7 +116,8 @@ export class SessionManager {
lastPromptNumber: promptNumber || this.dbManager.getSessionStore().getPromptCounter(sessionDbId),
startTime: Date.now(),
cumulativeInputTokens: 0,
cumulativeOutputTokens: 0
cumulativeOutputTokens: 0,
pendingProcessingIds: new Set()
};
this.sessions.set(sessionDbId, session);
@@ -133,6 +147,9 @@ export class SessionManager {
/**
* Queue an observation for processing (zero-latency notification)
* Auto-initializes session if not in memory but exists in database
*
* CRITICAL: Persists to database FIRST before adding to in-memory queue.
* This ensures observations survive worker crashes.
*/
queueObservation(sessionDbId: number, data: ObservationData): void {
// Auto-initialize from database if needed (handles worker restarts)
@@ -143,14 +160,33 @@ export class SessionManager {
const beforeDepth = session.pendingMessages.length;
session.pendingMessages.push({
// CRITICAL: Persist to database FIRST
const message: PendingMessage = {
type: 'observation',
tool_name: data.tool_name,
tool_input: data.tool_input,
tool_response: data.tool_response,
prompt_number: data.prompt_number,
cwd: data.cwd
});
};
try {
const messageId = this.getPendingStore().enqueue(sessionDbId, session.claudeSessionId, message);
logger.debug('SESSION', `Observation persisted to DB`, {
sessionId: sessionDbId,
messageId,
tool: data.tool_name
});
} catch (error) {
logger.error('SESSION', 'Failed to persist observation to DB', {
sessionId: sessionDbId,
tool: data.tool_name
}, error);
throw error; // Don't continue if we can't persist
}
// Add to in-memory queue (for backward compatibility with existing iterator)
session.pendingMessages.push(message);
const afterDepth = session.pendingMessages.length;
@@ -171,6 +207,9 @@ export class SessionManager {
/**
* Queue a summarize request (zero-latency notification)
* Auto-initializes session if not in memory but exists in database
*
* CRITICAL: Persists to database FIRST before adding to in-memory queue.
* This ensures summarize requests survive worker crashes.
*/
queueSummarize(sessionDbId: number, lastUserMessage: string, lastAssistantMessage?: string): void {
// Auto-initialize from database if needed (handles worker restarts)
@@ -181,11 +220,28 @@ export class SessionManager {
const beforeDepth = session.pendingMessages.length;
session.pendingMessages.push({
// CRITICAL: Persist to database FIRST
const message: PendingMessage = {
type: 'summarize',
last_user_message: lastUserMessage,
last_assistant_message: lastAssistantMessage
});
};
try {
const messageId = this.getPendingStore().enqueue(sessionDbId, session.claudeSessionId, message);
logger.debug('SESSION', `Summarize persisted to DB`, {
sessionId: sessionDbId,
messageId
});
} catch (error) {
logger.error('SESSION', 'Failed to persist summarize to DB', {
sessionId: sessionDbId
}, error);
throw error; // Don't continue if we can't persist
}
// Add to in-memory queue (for backward compatibility with existing iterator)
session.pendingMessages.push(message);
const afterDepth = session.pendingMessages.length;
@@ -306,8 +362,12 @@ export class SessionManager {
/**
* Get message iterator for SDKAgent to consume (event-driven, no polling)
* Auto-initializes session if not in memory but exists in database
*
* CRITICAL: Uses PendingMessageStore for crash-safe message persistence.
* Messages are marked as 'processing' when yielded and must be marked 'processed'
* by the SDK agent after successful completion.
*/
async *getMessageIterator(sessionDbId: number): AsyncIterableIterator<PendingMessage> {
async *getMessageIterator(sessionDbId: number): AsyncIterableIterator<PendingMessageWithId> {
// Auto-initialize from database if needed (handles worker restarts)
let session = this.sessions.get(sessionDbId);
if (!session) {
@@ -319,32 +379,100 @@ export class SessionManager {
throw new Error(`No emitter for session ${sessionDbId}`);
}
// Linger timeout: how long to wait for new messages before exiting
// This keeps the agent alive between messages, reducing "No active agent" windows
const LINGER_TIMEOUT_MS = 5000; // 5 seconds
while (!session.abortController.signal.aborted) {
// Wait for messages if queue is empty
if (session.pendingMessages.length === 0) {
await new Promise<void>(resolve => {
const handler = () => resolve();
emitter.once('message', handler);
// Check for pending messages in persistent store
const persistentMessage = this.getPendingStore().peekPending(sessionDbId);
if (!persistentMessage) {
// Wait for new messages with timeout
const gotMessage = await new Promise<boolean>(resolve => {
let resolved = false;
const messageHandler = () => {
if (!resolved) {
resolved = true;
clearTimeout(timeoutId);
resolve(true);
}
};
const timeoutHandler = () => {
if (!resolved) {
resolved = true;
emitter.off('message', messageHandler);
resolve(false);
}
};
const timeoutId = setTimeout(timeoutHandler, LINGER_TIMEOUT_MS);
emitter.once('message', messageHandler);
// Also listen for abort
session.abortController.signal.addEventListener('abort', () => {
emitter.off('message', handler);
resolve();
if (!resolved) {
resolved = true;
clearTimeout(timeoutId);
emitter.off('message', messageHandler);
resolve(false);
}
}, { once: true });
});
}
// Yield all pending messages
while (session.pendingMessages.length > 0) {
const message = session.pendingMessages.shift()!;
yield message;
// Re-check for messages after waking up (handles race condition)
const recheckMessage = this.getPendingStore().peekPending(sessionDbId);
if (recheckMessage) {
// Got a message, continue processing
continue;
}
// If we just yielded a summary, that's the end of this batch - stop the iterator
if (message.type === 'summarize') {
logger.info('SESSION', `Summary yielded - ending generator`, { sessionId: sessionDbId });
if (!gotMessage) {
// Timeout or abort - exit the loop
logger.info('SESSION', `Generator exiting after linger timeout`, { sessionId: sessionDbId });
return;
}
continue;
}
// Mark as processing BEFORE yielding (status: pending -> processing)
this.getPendingStore().markProcessing(persistentMessage.id);
// Track this message ID for completion marking
session.pendingProcessingIds.add(persistentMessage.id);
// Convert to PendingMessageWithId and yield
// Include original timestamp for accurate observation timestamps (survives stuck processing)
const message: PendingMessageWithId = {
_persistentId: persistentMessage.id,
_originalTimestamp: persistentMessage.created_at_epoch,
...this.getPendingStore().toPendingMessage(persistentMessage)
};
// Also add to in-memory queue for backward compatibility (status tracking)
session.pendingMessages.push(message);
yield message;
// Remove from in-memory queue after yielding
session.pendingMessages.shift();
// If we just yielded a summary, that's the end of this batch - stop the iterator
if (message.type === 'summarize') {
logger.info('SESSION', `Summary yielded - ending generator`, { sessionId: sessionDbId });
return;
}
}
}
/**
* Get the PendingMessageStore (for SDKAgent to mark messages as processed)
*/
getPendingMessageStore(): PendingMessageStore {
return this.getPendingStore();
}
}