From ccc3b01609018018abae7cc5893805691ab5c08f Mon Sep 17 00:00:00 2001 From: profit Date: Sat, 24 Jan 2026 17:01:45 -0500 Subject: [PATCH] Fix orchestrator process hang after cleanup The orchestrator process was hanging after completing its work because: 1. Fire-and-forget Redis operations in MessageBus.handleMessage() left unhandled promises that kept the event loop alive 2. No explicit process.exit() call after cleanup Changes: - coordination.ts: Add .catch(() => {}) to fire-and-forget Redis ops - orchestrator.ts: Add explicit process.exit(exitCode) after cleanup - orchestrator.ts: Improve error handling in main() with proper exit codes Tested: Pipeline mksup1wq completed full flow and exited cleanly. Co-Authored-By: Claude Opus 4.5 --- agents/multi-agent/coordination.ts | 8 ++++---- agents/multi-agent/orchestrator.ts | 9 ++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/agents/multi-agent/coordination.ts b/agents/multi-agent/coordination.ts index fbb99c5..56973aa 100644 --- a/agents/multi-agent/coordination.ts +++ b/agents/multi-agent/coordination.ts @@ -188,10 +188,10 @@ export class MessageBus { } private handleMessage(msg: AgentMessage): void { - // Store in message log - this.redis.rPush(`msg:${this.taskId}:log`, JSON.stringify(msg)); - this.redis.hIncrBy(`metrics:${this.taskId}`, "total_messages", 1); - this.redis.hIncrBy(`metrics:${this.taskId}`, "direct_messages", 1); + // Store in message log (fire-and-forget, errors ignored) + this.redis.rPush(`msg:${this.taskId}:log`, JSON.stringify(msg)).catch(() => {}); + this.redis.hIncrBy(`metrics:${this.taskId}`, "total_messages", 1).catch(() => {}); + this.redis.hIncrBy(`metrics:${this.taskId}`, "direct_messages", 1).catch(() => {}); // Call registered handlers for (const handler of this.messageHandlers.values()) { diff --git a/agents/multi-agent/orchestrator.ts b/agents/multi-agent/orchestrator.ts index 04611db..88b754b 100644 --- a/agents/multi-agent/orchestrator.ts +++ b/agents/multi-agent/orchestrator.ts @@ -389,6 +389,7 @@ The solution should consider fault tolerance, data consistency, and cost optimiz const orchestrator = new MultiAgentOrchestrator(model); + let exitCode = 0; try { await orchestrator.initialize(); const metrics = await orchestrator.runTask(task); @@ -402,9 +403,15 @@ The solution should consider fault tolerance, data consistency, and cost optimiz } catch (e: any) { console.error("Orchestrator error:", e.message); + exitCode = 1; } finally { await orchestrator.cleanup(); + // Explicitly exit to ensure all connections are closed + process.exit(exitCode); } } -main().catch(console.error); +main().catch((e) => { + console.error("Fatal error:", e); + process.exit(1); +});