feat(queue): Simplify queue processing and enhance reliability
- Implemented atomic message claiming in PendingMessageStore with claimNextMessage. - Removed obsolete peekPending method to streamline message retrieval. - Introduced SessionQueueProcessor for robust async message iteration, replacing complex polling logic. - Refactored SessionManager to eliminate in-memory queue state, relying on PendingMessageStore for message tracking. - Cleaned up session handling logic, removing recursive restarts and session deletion on empty queues. - Enhanced error handling and logging for generator failures and session processing. - Updated SessionRoutes to handle crash recovery more effectively without deleting sessions.
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
import { EventEmitter } from 'events';
|
||||
import { PendingMessageStore, PersistentPendingMessage } from '../sqlite/PendingMessageStore.js';
|
||||
import type { PendingMessageWithId } from '../worker-types.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
|
||||
export class SessionQueueProcessor {
|
||||
constructor(
|
||||
private store: PendingMessageStore,
|
||||
private events: EventEmitter
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Create an async iterator that yields messages as they become available.
|
||||
* Uses atomic database claiming to prevent race conditions.
|
||||
* Waits for 'message' event when queue is empty.
|
||||
*/
|
||||
async *createIterator(sessionDbId: number, signal: AbortSignal): AsyncIterableIterator<PendingMessageWithId> {
|
||||
while (!signal.aborted) {
|
||||
try {
|
||||
// 1. Atomically claim next message from DB
|
||||
const persistentMessage = this.store.claimNextMessage(sessionDbId);
|
||||
|
||||
if (persistentMessage) {
|
||||
// Yield the message for processing
|
||||
yield this.toPendingMessageWithId(persistentMessage);
|
||||
} else {
|
||||
// 2. Queue empty - wait for wake-up event
|
||||
// We use a promise that resolves on 'message' event or abort
|
||||
await this.waitForMessage(signal);
|
||||
}
|
||||
} catch (error) {
|
||||
if (signal.aborted) return;
|
||||
logger.error('SESSION', 'Error in queue processor loop', { sessionDbId }, error as Error);
|
||||
// Small backoff to prevent tight loop on DB error
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private toPendingMessageWithId(msg: PersistentPendingMessage): PendingMessageWithId {
|
||||
const pending = this.store.toPendingMessage(msg);
|
||||
return {
|
||||
...pending,
|
||||
_persistentId: msg.id,
|
||||
_originalTimestamp: msg.created_at_epoch
|
||||
};
|
||||
}
|
||||
|
||||
private waitForMessage(signal: AbortSignal): Promise<void> {
|
||||
return new Promise<void>((resolve) => {
|
||||
const onMessage = () => {
|
||||
cleanup();
|
||||
resolve();
|
||||
};
|
||||
|
||||
const onAbort = () => {
|
||||
cleanup();
|
||||
resolve(); // Resolve to let the loop check signal.aborted and exit
|
||||
};
|
||||
|
||||
const cleanup = () => {
|
||||
this.events.off('message', onMessage);
|
||||
signal.removeEventListener('abort', onAbort);
|
||||
};
|
||||
|
||||
this.events.once('message', onMessage);
|
||||
signal.addEventListener('abort', onAbort, { once: true });
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -82,17 +82,41 @@ export class PendingMessageStore {
|
||||
}
|
||||
|
||||
/**
|
||||
* Peek at oldest pending message for session (does NOT change status)
|
||||
* @returns The oldest pending message or null if none
|
||||
* Atomically claim the next pending message for processing
|
||||
* Finds oldest pending -> marks processing -> returns it
|
||||
* Uses a transaction to prevent race conditions
|
||||
*/
|
||||
peekPending(sessionDbId: number): PersistentPendingMessage | null {
|
||||
const stmt = this.db.prepare(`
|
||||
SELECT * FROM pending_messages
|
||||
WHERE session_db_id = ? AND status = 'pending'
|
||||
ORDER BY id ASC
|
||||
LIMIT 1
|
||||
`);
|
||||
return stmt.get(sessionDbId) as PersistentPendingMessage | null;
|
||||
claimNextMessage(sessionDbId: number): PersistentPendingMessage | null {
|
||||
const now = Date.now();
|
||||
|
||||
const claimTx = this.db.transaction((sessionId: number, timestamp: number) => {
|
||||
const peekStmt = this.db.prepare(`
|
||||
SELECT * FROM pending_messages
|
||||
WHERE session_db_id = ? AND status = 'pending'
|
||||
ORDER BY id ASC
|
||||
LIMIT 1
|
||||
`);
|
||||
const msg = peekStmt.get(sessionId) as PersistentPendingMessage | null;
|
||||
|
||||
if (msg) {
|
||||
const updateStmt = this.db.prepare(`
|
||||
UPDATE pending_messages
|
||||
SET status = 'processing', started_processing_at_epoch = ?
|
||||
WHERE id = ?
|
||||
`);
|
||||
updateStmt.run(timestamp, msg.id);
|
||||
|
||||
// Return updated object
|
||||
return {
|
||||
...msg,
|
||||
status: 'processing',
|
||||
started_processing_at_epoch: timestamp
|
||||
} as PersistentPendingMessage;
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
return claimTx(sessionDbId, now) as PersistentPendingMessage | null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -690,24 +690,25 @@ export class WorkerService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a session with auto-restart and cleanup logic
|
||||
* Recursively restarts the generator if there's pending work
|
||||
* Start a session processor
|
||||
* It will run continuously until the session is deleted/aborted
|
||||
*/
|
||||
private startSessionWithAutoRestart(
|
||||
private startSessionProcessor(
|
||||
session: ReturnType<typeof this.sessionManager.getSession>,
|
||||
getPendingCount: (sid: number) => number,
|
||||
source: string
|
||||
): void {
|
||||
if (!session) return;
|
||||
|
||||
const sid = session.sessionDbId;
|
||||
logger.info('SYSTEM', `Starting generator (${source})`, {
|
||||
sessionId: sid,
|
||||
pendingCount: getPendingCount(sid)
|
||||
sessionId: sid
|
||||
});
|
||||
|
||||
session.generatorPromise = this.sdkAgent.startSession(session, this)
|
||||
.catch(error => {
|
||||
// Only log if not aborted
|
||||
if (session.abortController.signal.aborted) return;
|
||||
|
||||
logger.error('SYSTEM', `Generator failed (${source})`, {
|
||||
sessionId: sid,
|
||||
error: error.message
|
||||
@@ -716,24 +717,13 @@ export class WorkerService {
|
||||
.finally(() => {
|
||||
session.generatorPromise = null;
|
||||
this.broadcastProcessingStatus();
|
||||
|
||||
// Check if there's more work pending
|
||||
const stillPending = getPendingCount(sid);
|
||||
if (stillPending > 0) {
|
||||
logger.info('SYSTEM', `Auto-restarting generator for pending work`, {
|
||||
sessionId: sid,
|
||||
pendingCount: stillPending
|
||||
});
|
||||
setTimeout(() => {
|
||||
const stillExists = this.sessionManager.getSession(sid);
|
||||
if (stillExists && !stillExists.generatorPromise) {
|
||||
// Recursive call for continuous processing
|
||||
this.startSessionWithAutoRestart(stillExists, getPendingCount, 'auto-restart');
|
||||
}
|
||||
}, 0);
|
||||
} else {
|
||||
// No more work - clean up session
|
||||
this.sessionManager.deleteSession(sid).catch(() => {});
|
||||
|
||||
// Crash recovery: if not aborted, check if we should restart
|
||||
if (!session.abortController.signal.aborted) {
|
||||
// We can check if there are pending messages to decide if restart is urgent
|
||||
// But generally, if it crashed, we might want to restart?
|
||||
// For now, let's just log. The user/system can trigger restart if needed.
|
||||
logger.warn('SYSTEM', `Session processor exited unexpectedly`, { sessionId: sid });
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -789,12 +779,8 @@ export class WorkerService {
|
||||
pendingCount: pendingStore.getPendingCount(sessionDbId)
|
||||
});
|
||||
|
||||
// Start SDK agent (non-blocking) using shared helper
|
||||
this.startSessionWithAutoRestart(
|
||||
session,
|
||||
(sid) => pendingStore.getPendingCount(sid),
|
||||
'startup-recovery'
|
||||
);
|
||||
// Start SDK agent (non-blocking)
|
||||
this.startSessionProcessor(session, 'startup-recovery');
|
||||
|
||||
result.sessionsStarted++;
|
||||
result.startedSessionIds.push(sessionDbId);
|
||||
|
||||
@@ -13,6 +13,7 @@ import { DatabaseManager } from './DatabaseManager.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import type { ActiveSession, PendingMessage, PendingMessageWithId, ObservationData } from '../worker-types.js';
|
||||
import { PendingMessageStore } from '../sqlite/PendingMessageStore.js';
|
||||
import { SessionQueueProcessor } from '../queue/SessionQueueProcessor.js';
|
||||
|
||||
export class SessionManager {
|
||||
private dbManager: DatabaseManager;
|
||||
@@ -185,8 +186,6 @@ export class SessionManager {
|
||||
session = this.initializeSession(sessionDbId);
|
||||
}
|
||||
|
||||
const beforeDepth = session.pendingMessages.length;
|
||||
|
||||
// CRITICAL: Persist to database FIRST
|
||||
const message: PendingMessage = {
|
||||
type: 'observation',
|
||||
@@ -212,11 +211,6 @@ export class SessionManager {
|
||||
throw error; // Don't continue if we can't persist
|
||||
}
|
||||
|
||||
// Add to in-memory queue (for backward compatibility with existing iterator)
|
||||
session.pendingMessages.push(message);
|
||||
|
||||
const afterDepth = session.pendingMessages.length;
|
||||
|
||||
// Notify generator immediately (zero latency)
|
||||
const emitter = this.sessionQueues.get(sessionDbId);
|
||||
emitter?.emit('message');
|
||||
@@ -224,7 +218,7 @@ export class SessionManager {
|
||||
// Format tool name for logging
|
||||
const toolSummary = logger.formatTool(data.tool_name, data.tool_input);
|
||||
|
||||
logger.info('SESSION', `Observation queued (${beforeDepth}→${afterDepth})`, {
|
||||
logger.info('SESSION', `Observation queued`, {
|
||||
sessionId: sessionDbId,
|
||||
tool: toolSummary,
|
||||
hasGenerator: !!session.generatorPromise
|
||||
@@ -245,8 +239,6 @@ export class SessionManager {
|
||||
session = this.initializeSession(sessionDbId);
|
||||
}
|
||||
|
||||
const beforeDepth = session.pendingMessages.length;
|
||||
|
||||
// CRITICAL: Persist to database FIRST
|
||||
const message: PendingMessage = {
|
||||
type: 'summarize',
|
||||
@@ -267,15 +259,10 @@ export class SessionManager {
|
||||
throw error; // Don't continue if we can't persist
|
||||
}
|
||||
|
||||
// Add to in-memory queue (for backward compatibility with existing iterator)
|
||||
session.pendingMessages.push(message);
|
||||
|
||||
const afterDepth = session.pendingMessages.length;
|
||||
|
||||
const emitter = this.sessionQueues.get(sessionDbId);
|
||||
emitter?.emit('message');
|
||||
|
||||
logger.info('SESSION', `Summarize queued (${beforeDepth}→${afterDepth})`, {
|
||||
logger.info('SESSION', `Summarize queued`, {
|
||||
sessionId: sessionDbId,
|
||||
hasGenerator: !!session.generatorPromise
|
||||
});
|
||||
@@ -328,9 +315,7 @@ export class SessionManager {
|
||||
* Check if any session has pending messages (for spinner tracking)
|
||||
*/
|
||||
hasPendingMessages(): boolean {
|
||||
return Array.from(this.sessions.values()).some(
|
||||
session => session.pendingMessages.length > 0
|
||||
);
|
||||
return this.getPendingStore().hasAnyPendingWork();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -345,8 +330,9 @@ export class SessionManager {
|
||||
*/
|
||||
getTotalQueueDepth(): number {
|
||||
let total = 0;
|
||||
// We can iterate over active sessions to get their pending count
|
||||
for (const session of this.sessions.values()) {
|
||||
total += session.pendingMessages.length;
|
||||
total += this.getPendingStore().getPendingCount(session.sessionDbId);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
@@ -356,16 +342,8 @@ export class SessionManager {
|
||||
* Counts both pending messages and items actively being processed by SDK agents
|
||||
*/
|
||||
getTotalActiveWork(): number {
|
||||
let total = 0;
|
||||
for (const session of this.sessions.values()) {
|
||||
// Count queued messages
|
||||
total += session.pendingMessages.length;
|
||||
// Count currently processing item (1 per active generator)
|
||||
if (session.generatorPromise !== null) {
|
||||
total += 1;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
// getPendingCount includes 'processing' status, so this IS the total active work
|
||||
return this.getTotalQueueDepth();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -373,17 +351,8 @@ export class SessionManager {
|
||||
* Used for activity indicator to prevent spinner from stopping while SDK is processing
|
||||
*/
|
||||
isAnySessionProcessing(): boolean {
|
||||
for (const session of this.sessions.values()) {
|
||||
// Has queued messages waiting to be processed
|
||||
if (session.pendingMessages.length > 0) {
|
||||
return true;
|
||||
}
|
||||
// Has active SDK generator running (processing dequeued messages)
|
||||
if (session.generatorPromise !== null) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
// hasAnyPendingWork checks for 'pending' OR 'processing'
|
||||
return this.getPendingStore().hasAnyPendingWork();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -406,73 +375,22 @@ export class SessionManager {
|
||||
throw new Error(`No emitter for session ${sessionDbId}`);
|
||||
}
|
||||
|
||||
while (!session.abortController.signal.aborted) {
|
||||
// Check for pending messages in persistent store
|
||||
const persistentMessage = this.getPendingStore().peekPending(sessionDbId);
|
||||
|
||||
if (!persistentMessage) {
|
||||
// Wait for new message event
|
||||
await new Promise<void>(resolve => {
|
||||
const messageHandler = () => {
|
||||
emitter.off('message', messageHandler);
|
||||
resolve();
|
||||
};
|
||||
|
||||
const abortHandler = () => {
|
||||
emitter.off('message', messageHandler);
|
||||
resolve();
|
||||
};
|
||||
|
||||
emitter.once('message', messageHandler);
|
||||
session.abortController.signal.addEventListener('abort', abortHandler, { once: true });
|
||||
});
|
||||
|
||||
// Re-check for messages after waking up (handles race condition)
|
||||
const recheckMessage = this.getPendingStore().peekPending(sessionDbId);
|
||||
if (recheckMessage) {
|
||||
continue; // Got a message, process it
|
||||
}
|
||||
|
||||
// Woke up due to abort
|
||||
if (session.abortController.signal.aborted) {
|
||||
logger.info('SESSION', 'Generator exiting due to abort', { sessionId: sessionDbId });
|
||||
return;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Mark as processing BEFORE yielding (status: pending -> processing)
|
||||
this.getPendingStore().markProcessing(persistentMessage.id);
|
||||
|
||||
const processor = new SessionQueueProcessor(this.getPendingStore(), emitter);
|
||||
|
||||
// Use the robust Pump iterator
|
||||
for await (const message of processor.createIterator(sessionDbId, session.abortController.signal)) {
|
||||
// Track this message ID for completion marking
|
||||
session.pendingProcessingIds.add(persistentMessage.id);
|
||||
session.pendingProcessingIds.add(message._persistentId);
|
||||
|
||||
// Track earliest timestamp for accurate observation timestamps
|
||||
// This ensures backlog messages get their original timestamps, not current time
|
||||
if (session.earliestPendingTimestamp === null) {
|
||||
session.earliestPendingTimestamp = persistentMessage.created_at_epoch;
|
||||
session.earliestPendingTimestamp = message._originalTimestamp;
|
||||
} else {
|
||||
session.earliestPendingTimestamp = Math.min(session.earliestPendingTimestamp, persistentMessage.created_at_epoch);
|
||||
session.earliestPendingTimestamp = Math.min(session.earliestPendingTimestamp, message._originalTimestamp);
|
||||
}
|
||||
|
||||
// Convert to PendingMessageWithId and yield
|
||||
// Include original timestamp for accurate observation timestamps (survives stuck processing)
|
||||
const message: PendingMessageWithId = {
|
||||
_persistentId: persistentMessage.id,
|
||||
_originalTimestamp: persistentMessage.created_at_epoch,
|
||||
...this.getPendingStore().toPendingMessage(persistentMessage)
|
||||
};
|
||||
|
||||
// Also add to in-memory queue for backward compatibility (status tracking)
|
||||
session.pendingMessages.push(message);
|
||||
|
||||
yield message;
|
||||
|
||||
// Remove from in-memory queue after yielding
|
||||
session.pendingMessages.shift();
|
||||
|
||||
// Continue processing - don't stop after summary, let the queue drain completely
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -136,6 +136,9 @@ export class SessionRoutes extends BaseRouteHandler {
|
||||
|
||||
session.generatorPromise = agent.startSession(session, this.workerService)
|
||||
.catch(error => {
|
||||
// Only log non-abort errors
|
||||
if (session.abortController.signal.aborted) return;
|
||||
|
||||
logger.error('SESSION', `Generator failed`, {
|
||||
sessionId: session.sessionDbId,
|
||||
provider: provider,
|
||||
@@ -144,47 +147,64 @@ export class SessionRoutes extends BaseRouteHandler {
|
||||
|
||||
// Mark all processing messages as failed so they can be retried or abandoned
|
||||
const pendingStore = this.sessionManager.getPendingMessageStore();
|
||||
const db = this.dbManager.getDatabase();
|
||||
const stmt = db.prepare(`
|
||||
SELECT id FROM pending_messages
|
||||
WHERE session_db_id = ? AND status = 'processing'
|
||||
`);
|
||||
const processingMessages = stmt.all(session.sessionDbId) as { id: number }[];
|
||||
const db = this.dbManager.getSessionStore().db;
|
||||
try {
|
||||
const stmt = db.prepare(`
|
||||
SELECT id FROM pending_messages
|
||||
WHERE session_db_id = ? AND status = 'processing'
|
||||
`);
|
||||
const processingMessages = stmt.all(session.sessionDbId) as { id: number }[];
|
||||
|
||||
for (const msg of processingMessages) {
|
||||
pendingStore.markFailed(msg.id);
|
||||
logger.warn('SESSION', `Marked message as failed after generator error`, {
|
||||
sessionId: session.sessionDbId,
|
||||
messageId: msg.id
|
||||
});
|
||||
for (const msg of processingMessages) {
|
||||
pendingStore.markFailed(msg.id);
|
||||
logger.warn('SESSION', `Marked message as failed after generator error`, {
|
||||
sessionId: session.sessionDbId,
|
||||
messageId: msg.id
|
||||
});
|
||||
}
|
||||
} catch (dbError) {
|
||||
logger.error('SESSION', 'Failed to mark messages as failed', { sessionId: session.sessionDbId }, dbError as Error);
|
||||
}
|
||||
})
|
||||
.finally(() => {
|
||||
const sessionDbId = session.sessionDbId;
|
||||
logger.info('SESSION', `Generator finished`, { sessionId: sessionDbId });
|
||||
|
||||
if (session.abortController.signal.aborted) {
|
||||
logger.info('SESSION', `Generator aborted`, { sessionId: sessionDbId });
|
||||
} else {
|
||||
logger.warn('SESSION', `Generator exited unexpectedly`, { sessionId: sessionDbId });
|
||||
}
|
||||
|
||||
session.generatorPromise = null;
|
||||
session.currentProvider = null;
|
||||
this.workerService.broadcastProcessingStatus();
|
||||
|
||||
// Check if there's more work pending
|
||||
const pendingStore = this.sessionManager.getPendingMessageStore();
|
||||
const pendingCount = pendingStore.getPendingCount(sessionDbId);
|
||||
if (pendingCount > 0) {
|
||||
// Auto-restart for pending work
|
||||
logger.info('SESSION', `Auto-restarting generator for pending work`, {
|
||||
sessionId: sessionDbId,
|
||||
pendingCount
|
||||
});
|
||||
setTimeout(() => {
|
||||
const stillExists = this.sessionManager.getSession(sessionDbId);
|
||||
if (stillExists && !stillExists.generatorPromise) {
|
||||
this.startGeneratorWithProvider(stillExists, this.getSelectedProvider(), 'auto-restart');
|
||||
// Crash recovery: If not aborted and still has work, restart
|
||||
if (!session.abortController.signal.aborted) {
|
||||
try {
|
||||
const pendingStore = this.sessionManager.getPendingMessageStore();
|
||||
const pendingCount = pendingStore.getPendingCount(sessionDbId);
|
||||
|
||||
if (pendingCount > 0) {
|
||||
logger.info('SESSION', `Restarting generator after crash/exit with pending work`, {
|
||||
sessionId: sessionDbId,
|
||||
pendingCount
|
||||
});
|
||||
// Small delay before restart
|
||||
setTimeout(() => {
|
||||
const stillExists = this.sessionManager.getSession(sessionDbId);
|
||||
if (stillExists && !stillExists.generatorPromise) {
|
||||
this.startGeneratorWithProvider(stillExists, this.getSelectedProvider(), 'crash-recovery');
|
||||
}
|
||||
}, 1000);
|
||||
}
|
||||
}, 0);
|
||||
} else {
|
||||
// No more work - clean up session
|
||||
this.sessionManager.deleteSession(sessionDbId).catch(() => {});
|
||||
} catch (e) {
|
||||
// Ignore errors during recovery check
|
||||
}
|
||||
}
|
||||
// NOTE: We do NOT delete the session here anymore.
|
||||
// The generator waits for events, so if it exited, it's either aborted or crashed.
|
||||
// Idle sessions stay in memory (ActiveSession is small) to listen for future events.
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user