MAESTRO: fix(db): prevent FK constraint failures on worker restart
Cherry-picked source changes from PR #889 by @Et9797. Fixes #846. Key changes: - Add ensureMemorySessionIdRegistered() guard in SessionStore.ts - Add ON UPDATE CASCADE migration (schema v21) for observations and session_summaries FK constraints - Change message queue from claim-and-delete to claim-confirm pattern (PendingMessageStore.ts) - Add spawn deduplication and unrecoverable error detection in SessionRoutes.ts and worker-service.ts - Add forceInit flag to SDKAgent for stale session recovery Build artifacts skipped (pre-existing dompurify dep issue). Path fixes (HealthMonitor.ts, worker-utils.ts) already merged via PR #634. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -72,10 +72,21 @@ export class SDKAgent {
|
||||
// CRITICAL: Only resume if:
|
||||
// 1. memorySessionId exists (was captured from a previous SDK response)
|
||||
// 2. lastPromptNumber > 1 (this is a continuation within the same SDK session)
|
||||
// 3. forceInit is NOT set (stale session recovery clears this)
|
||||
// On worker restart or crash recovery, memorySessionId may exist from a previous
|
||||
// SDK session but we must NOT resume because the SDK context was lost.
|
||||
// NEVER use contentSessionId for resume - that would inject messages into the user's transcript!
|
||||
const hasRealMemorySessionId = !!session.memorySessionId;
|
||||
const shouldResume = hasRealMemorySessionId && session.lastPromptNumber > 1 && !session.forceInit;
|
||||
|
||||
// Clear forceInit after using it
|
||||
if (session.forceInit) {
|
||||
logger.info('SDK', 'forceInit flag set, starting fresh SDK session', {
|
||||
sessionDbId: session.sessionDbId,
|
||||
previousMemorySessionId: session.memorySessionId
|
||||
});
|
||||
session.forceInit = false;
|
||||
}
|
||||
|
||||
// Build isolated environment from ~/.claude-mem/.env
|
||||
// This prevents Issue #733: random ANTHROPIC_API_KEY from project .env files
|
||||
@@ -88,15 +99,15 @@ export class SDKAgent {
|
||||
contentSessionId: session.contentSessionId,
|
||||
memorySessionId: session.memorySessionId,
|
||||
hasRealMemorySessionId,
|
||||
resume_parameter: hasRealMemorySessionId ? session.memorySessionId : '(none - fresh start)',
|
||||
shouldResume,
|
||||
resume_parameter: shouldResume ? session.memorySessionId : '(none - fresh start)',
|
||||
lastPromptNumber: session.lastPromptNumber,
|
||||
authMethod
|
||||
});
|
||||
|
||||
// Debug-level alignment logs for detailed tracing
|
||||
if (session.lastPromptNumber > 1) {
|
||||
const willResume = hasRealMemorySessionId;
|
||||
logger.debug('SDK', `[ALIGNMENT] Resume Decision | contentSessionId=${session.contentSessionId} | memorySessionId=${session.memorySessionId} | prompt#=${session.lastPromptNumber} | hasRealMemorySessionId=${hasRealMemorySessionId} | willResume=${willResume} | resumeWith=${willResume ? session.memorySessionId : 'NONE'}`);
|
||||
logger.debug('SDK', `[ALIGNMENT] Resume Decision | contentSessionId=${session.contentSessionId} | memorySessionId=${session.memorySessionId} | prompt#=${session.lastPromptNumber} | hasRealMemorySessionId=${hasRealMemorySessionId} | shouldResume=${shouldResume} | resumeWith=${shouldResume ? session.memorySessionId : 'NONE'}`);
|
||||
} else {
|
||||
// INIT prompt - never resume even if memorySessionId exists (stale from previous session)
|
||||
const hasStaleMemoryId = hasRealMemorySessionId;
|
||||
@@ -119,10 +130,8 @@ export class SDKAgent {
|
||||
// Isolate observer sessions - they'll appear under project "observer-sessions"
|
||||
// instead of polluting user's actual project resume lists
|
||||
cwd: OBSERVER_SESSIONS_DIR,
|
||||
// Only resume if BOTH: (1) we have a memorySessionId AND (2) this isn't the first prompt
|
||||
// On worker restart, memorySessionId may exist from a previous SDK session but we
|
||||
// need to start fresh since the SDK context was lost
|
||||
...(hasRealMemorySessionId && session.lastPromptNumber > 1 && { resume: session.memorySessionId }),
|
||||
// Only resume if shouldResume is true (memorySessionId exists, not first prompt, not forceInit)
|
||||
...(shouldResume && { resume: session.memorySessionId }),
|
||||
disallowedTools,
|
||||
abortController: session.abortController,
|
||||
pathToClaudeCodeExecutable: claudePath,
|
||||
@@ -134,21 +143,35 @@ export class SDKAgent {
|
||||
|
||||
// Process SDK messages
|
||||
for await (const message of queryResult) {
|
||||
// Capture memory session ID from first SDK message (any type has session_id)
|
||||
// This enables resume for subsequent generator starts within the same user session
|
||||
if (!session.memorySessionId && message.session_id) {
|
||||
// Capture or update memory session ID from SDK message
|
||||
// IMPORTANT: The SDK may return a DIFFERENT session_id on resume than what we sent!
|
||||
// We must always sync the DB to match what the SDK actually uses.
|
||||
//
|
||||
// MULTI-TERMINAL COLLISION FIX (FK constraint bug):
|
||||
// Use ensureMemorySessionIdRegistered() instead of updateMemorySessionId() because:
|
||||
// 1. It's idempotent - safe to call multiple times
|
||||
// 2. It verifies the update happened (SELECT before UPDATE)
|
||||
// 3. Consistent with ResponseProcessor's usage pattern
|
||||
// This ensures FK constraint compliance BEFORE any observations are stored.
|
||||
if (message.session_id && message.session_id !== session.memorySessionId) {
|
||||
const previousId = session.memorySessionId;
|
||||
session.memorySessionId = message.session_id;
|
||||
// Persist to database for cross-restart recovery
|
||||
this.dbManager.getSessionStore().updateMemorySessionId(
|
||||
// Persist to database IMMEDIATELY for FK constraint compliance
|
||||
// This must happen BEFORE any observations referencing this ID are stored
|
||||
this.dbManager.getSessionStore().ensureMemorySessionIdRegistered(
|
||||
session.sessionDbId,
|
||||
message.session_id
|
||||
);
|
||||
// Verify the update by reading back from DB
|
||||
const verification = this.dbManager.getSessionStore().getSessionById(session.sessionDbId);
|
||||
const dbVerified = verification?.memory_session_id === message.session_id;
|
||||
logger.info('SESSION', `MEMORY_ID_CAPTURED | sessionDbId=${session.sessionDbId} | memorySessionId=${message.session_id} | dbVerified=${dbVerified}`, {
|
||||
const logMessage = previousId
|
||||
? `MEMORY_ID_CHANGED | sessionDbId=${session.sessionDbId} | from=${previousId} | to=${message.session_id} | dbVerified=${dbVerified}`
|
||||
: `MEMORY_ID_CAPTURED | sessionDbId=${session.sessionDbId} | memorySessionId=${message.session_id} | dbVerified=${dbVerified}`;
|
||||
logger.info('SESSION', logMessage, {
|
||||
sessionId: session.sessionDbId,
|
||||
memorySessionId: message.session_id
|
||||
memorySessionId: message.session_id,
|
||||
previousId
|
||||
});
|
||||
if (!dbVerified) {
|
||||
logger.error('SESSION', `MEMORY_ID_MISMATCH | sessionDbId=${session.sessionDbId} | expected=${message.session_id} | got=${verification?.memory_session_id}`, {
|
||||
@@ -156,7 +179,7 @@ export class SDKAgent {
|
||||
});
|
||||
}
|
||||
// Debug-level alignment log for detailed tracing
|
||||
logger.debug('SDK', `[ALIGNMENT] Captured | contentSessionId=${session.contentSessionId} → memorySessionId=${message.session_id} | Future prompts will resume with this ID`);
|
||||
logger.debug('SDK', `[ALIGNMENT] ${previousId ? 'Updated' : 'Captured'} | contentSessionId=${session.contentSessionId} → memorySessionId=${message.session_id} | Future prompts will resume with this ID`);
|
||||
}
|
||||
|
||||
// Handle assistant messages
|
||||
@@ -166,6 +189,14 @@ export class SDKAgent {
|
||||
? content.filter((c: any) => c.type === 'text').map((c: any) => c.text).join('\n')
|
||||
: typeof content === 'string' ? content : '';
|
||||
|
||||
// Check for context overflow - prevents infinite retry loops
|
||||
if (textContent.includes('prompt is too long') ||
|
||||
textContent.includes('context window')) {
|
||||
logger.error('SDK', 'Context overflow detected - terminating session');
|
||||
session.abortController.abort();
|
||||
return;
|
||||
}
|
||||
|
||||
const responseSize = textContent.length;
|
||||
|
||||
// Capture token state BEFORE updating (for delta calculation)
|
||||
@@ -317,6 +348,10 @@ export class SDKAgent {
|
||||
|
||||
// Consume pending messages from SessionManager (event-driven, no polling)
|
||||
for await (const message of this.sessionManager.getMessageIterator(session.sessionDbId)) {
|
||||
// CLAIM-CONFIRM: Track message ID for confirmProcessed() after successful storage
|
||||
// The message is now in 'processing' status in DB until ResponseProcessor calls confirmProcessed()
|
||||
session.processingMessageIds.push(message._persistentId);
|
||||
|
||||
// Capture cwd from each message for worktree support
|
||||
if (message.cwd) {
|
||||
cwdTracker.lastCwd = message.cwd;
|
||||
|
||||
Reference in New Issue
Block a user