fix: prevent stale memory_session_id resume crash after worker restart (Issue #817) (#839)

When the worker restarts, the SDK context is lost but the database still contains
memory_session_id values from the previous worker instance. The existing guard
(lastPromptNumber > 1) doesn't protect against this because lastPromptNumber is
also loaded from the database.

This fix:
- Clears memory_session_id when initializing a session from DB (not from cache)
- Adds warning log when discarding stale session IDs
- Lets SDK agent capture fresh memory_session_id on first response

The key insight: if a session is not in memory, we're in a new worker instance,
and any database memory_session_id is definitely stale.

Fixes #817
Related to #825

Co-authored-by: bigphoot <bigphoot@gmail.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alexander Knigge
2026-01-27 23:40:19 -08:00
committed by GitHub
parent d3331d1e22
commit 3e6add90de
+18 -4
View File
@@ -106,6 +106,15 @@ export class SessionManager {
memory_session_id: dbSession.memory_session_id
});
// Log warning if we're discarding a stale memory_session_id (Issue #817)
if (dbSession.memory_session_id) {
logger.warn('SESSION', `Discarding stale memory_session_id from previous worker instance (Issue #817)`, {
sessionDbId,
staleMemorySessionId: dbSession.memory_session_id,
reason: 'SDK context lost on worker restart - will capture new ID'
});
}
// Use currentUserPrompt if provided, otherwise fall back to database (first prompt)
const userPrompt = currentUserPrompt || dbSession.user_prompt;
@@ -124,11 +133,15 @@ export class SessionManager {
}
// Create active session
// Load memorySessionId from database if previously captured (enables resume across restarts)
// CRITICAL: Do NOT load memorySessionId from database here (Issue #817)
// When creating a new in-memory session, any database memory_session_id is STALE
// because the SDK context was lost when the worker restarted. The SDK agent will
// capture a new memorySessionId on the first response and persist it.
// Loading stale memory_session_id causes "No conversation found" crashes on resume.
session = {
sessionDbId,
contentSessionId: dbSession.content_session_id,
memorySessionId: dbSession.memory_session_id || null,
memorySessionId: null, // Always start fresh - SDK will capture new ID
project: dbSession.project,
userPrompt,
pendingMessages: [],
@@ -143,10 +156,11 @@ export class SessionManager {
currentProvider: null // Will be set when generator starts
};
logger.debug('SESSION', 'Creating new session object', {
logger.debug('SESSION', 'Creating new session object (memorySessionId cleared to prevent stale resume)', {
sessionDbId,
contentSessionId: dbSession.content_session_id,
memorySessionId: dbSession.memory_session_id || '(none - fresh session)',
dbMemorySessionId: dbSession.memory_session_id || '(none in DB)',
memorySessionId: '(cleared - will capture fresh from SDK)',
lastPromptNumber: promptNumber || this.dbManager.getSessionStore().getPromptNumberFromUserPrompts(dbSession.content_session_id)
});