/** * Multi-Agent Coordination System - Orchestrator * Manages parallel agent execution, spawn conditions, and metrics * * Environment variables: * - PIPELINE_ID: Parent pipeline ID for error reporting * - TASK_ID: Task ID override */ import type { TaskDefinition, CoordinationMetrics, SpawnCondition, AgentRole } from "./types"; import { Blackboard, MessageBus, AgentStateManager, SpawnController, MetricsCollector, } from "./coordination"; import { AgentAlpha, AgentBeta, AgentGamma } from "./agents"; import { createClient, RedisClientType } from "redis"; // Redis client for direct handoff writes let handoffRedis: RedisClientType | null = null; async function getHandoffRedis(): Promise { if (handoffRedis && handoffRedis.isOpen) return handoffRedis; const initKeys = await Bun.file("/opt/vault/init-keys.json").json(); const token = initKeys.root_token; const result = await fetch(`https://127.0.0.1:8200/v1/secret/data/services/dragonfly`, { headers: { "X-Vault-Token": token }, // @ts-ignore - Bun supports this tls: { rejectUnauthorized: false } }).then(r => r.json()); const creds = result.data.data; handoffRedis = createClient({ url: `redis://${creds.host}:${creds.port}`, password: creds.password, }); await handoffRedis.connect(); return handoffRedis; } function now(): string { return new Date().toISOString(); } function generateId(): string { return "task-" + Math.random().toString(36).slice(2, 8) + "-" + Date.now().toString(36); } // Error reporting to parent pipeline's observability system async function reportErrorToObservability( pipelineId: string, errorType: string, severity: "low" | "medium" | "high" | "critical", details: string ): Promise { try { // Report to the UI server's error tracking API const response = await fetch("http://localhost:3000/api/pipeline/errors/record", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ pipeline_id: pipelineId, error_type: errorType, severity, details }) }); if (!response.ok) { console.error(`[ERROR_REPORT] Failed to report error: ${response.status}`); } } catch (e: any) { // Silently fail - don't let error reporting cause more errors console.error(`[ERROR_REPORT] Error reporting failed: ${e.message}`); } } // ============================================================================= // Multi-Agent Orchestrator // ============================================================================= export class MultiAgentOrchestrator { private taskId: string; private pipelineId?: string; private blackboard!: Blackboard; private stateManager!: AgentStateManager; private spawnController!: SpawnController; private metrics!: MetricsCollector; private alphaAgent!: AgentAlpha; private betaAgent!: AgentBeta; private gammaAgent?: AgentGamma; private alphaBus!: MessageBus; private betaBus!: MessageBus; private gammaBus?: MessageBus; private model: string; private startTime!: number; private monitorInterval?: ReturnType; private errorCount: number = 0; private iterationCount: number = 0; private maxIterations: number = 10; private lastProgressTime: number = 0; private progressTimeout: number = 60000; // 60 seconds without progress = stuck // Recovery context from prior runs private isRecoveryRun: boolean = false; private recoveryAttempt: number = 1; private inheritedContext: any = null; private forceGamma: boolean = false; constructor(model: string = "anthropic/claude-sonnet-4") { // Use environment variable for task ID if provided this.taskId = process.env.TASK_ID || generateId(); this.pipelineId = process.env.PIPELINE_ID; this.model = model; } private async reportError(errorType: string, severity: "low" | "medium" | "high" | "critical", details: string): Promise { this.errorCount++; if (this.pipelineId) { await reportErrorToObservability(this.pipelineId, errorType, severity, details); } this.log(`ERROR [${severity}] ${errorType}: ${details}`); } private updateProgress(): void { this.lastProgressTime = Date.now(); this.iterationCount++; } private isStuck(): boolean { if (this.lastProgressTime === 0) return false; return (Date.now() - this.lastProgressTime) > this.progressTimeout; } private isIterationLimitExceeded(): boolean { return this.iterationCount >= this.maxIterations; } // Request token revocation for stuck agents private async revokeStuckAgentTokens(): Promise { if (!this.pipelineId) return; try { await fetch("http://localhost:3000/api/pipeline/revoke", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ pipeline_id: this.pipelineId, reason: "iteration_timeout", details: `Agents stuck after ${this.iterationCount} iterations` }) }); this.log("Token revocation requested for stuck agents"); } catch (e: any) { this.log(`Failed to revoke tokens: ${e.message}`); } } // Record structured failure context for auto-recovery private async recordFailureContext( reason: "stuck" | "iteration_limit" | "consensus_failed", metrics: CoordinationMetrics ): Promise { if (!this.pipelineId) return; const failureContext = { pipeline_id: this.pipelineId, task_id: this.taskId, failure_reason: reason, failure_time: new Date().toISOString(), iteration_count: this.iterationCount, elapsed_ms: Date.now() - this.startTime, metrics: metrics, gamma_spawned: this.gammaAgent !== undefined, error_count: this.errorCount, recovery_hint: this.getRecoveryHint(reason) }; try { await fetch("http://localhost:3000/api/pipeline/failure-context", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(failureContext) }); this.log(`Failure context recorded: ${reason}`); } catch (e: any) { this.log(`Failed to record failure context: ${e.message}`); } } private getRecoveryHint(reason: string): string { switch (reason) { case "stuck": return "Agents became unresponsive. Try with fresh agents or GAMMA mediator."; case "iteration_limit": return "Max iterations reached without consensus. Consider simplifying objective."; case "consensus_failed": return "Agents completed but disagreed. GAMMA mediator may help resolve conflicts."; default: return "Unknown failure. Review logs for details."; } } // Load recovery context from prior run if this is a recovery pipeline private async loadRecoveryContext(): Promise { if (!this.pipelineId) return; try { const redis = await getHandoffRedis(); const pipelineData = await redis.hGetAll(`pipeline:${this.pipelineId}`); if (pipelineData.is_recovery === "true") { this.isRecoveryRun = true; this.recoveryAttempt = parseInt(pipelineData.recovery_attempt || "1"); this.forceGamma = pipelineData.force_gamma === "true"; console.log("\n" + "=".repeat(70)); console.log(`RECOVERY RUN - Attempt ${this.recoveryAttempt}`); console.log("=".repeat(70)); // Load inherited handoff const inheritedKey = pipelineData.inherited_handoff; if (inheritedKey) { const inheritedData = await redis.get(inheritedKey); if (inheritedData) { this.inheritedContext = JSON.parse(inheritedData); console.log(`Loaded inherited context from: ${inheritedKey}`); console.log(`- Prior proposals: ${this.inheritedContext.proposals?.length || 0}`); console.log(`- Prior synthesis attempts: ${this.inheritedContext.synthesis_attempts?.length || 0}`); console.log(`- Recovery hints:`); this.inheritedContext.recovery_hints?.forEach((hint: string, i: number) => { console.log(` ${i + 1}. ${hint}`); }); } } // Also try loading prior_context (JSON string) if (pipelineData.prior_context) { const priorContext = JSON.parse(pipelineData.prior_context); console.log(`Prior run: ${priorContext.prior_pipeline}`); console.log(`Prior failure reason: ${priorContext.failure_reason}`); console.log(`Prior iteration count: ${priorContext.iteration_count}`); } if (this.forceGamma) { console.log("\nFORCE GAMMA MODE: GAMMA mediator will be spawned immediately"); } console.log("=".repeat(70) + "\n"); } } catch (e: any) { this.log(`Warning: Could not load recovery context: ${e.message}`); } } // Get inherited context for agents to use getInheritedContext(): any { return this.inheritedContext; } // Check if GAMMA should be force-spawned shouldForceGamma(): boolean { return this.forceGamma; } // Pre-seed the blackboard with inherited proposals from prior run private async preseedBlackboard(): Promise { if (!this.inheritedContext) return; try { // Seed prior proposals into the blackboard if (this.inheritedContext.proposals?.length > 0) { for (const proposal of this.inheritedContext.proposals) { await this.blackboard.write( "solutions", `inherited_${proposal.agent}_${proposal.key || 'proposal'}`, { ...proposal.value, _inherited: true, _from_run: this.recoveryAttempt - 1 }, proposal.agent as any ); } this.log(`Seeded ${this.inheritedContext.proposals.length} proposals from prior run`); } // Seed prior synthesis attempts if (this.inheritedContext.synthesis_attempts?.length > 0) { for (const synthesis of this.inheritedContext.synthesis_attempts) { await this.blackboard.write( "synthesis", `inherited_${synthesis.agent}_${synthesis.key || 'synthesis'}`, { ...synthesis.value, _inherited: true, _from_run: this.recoveryAttempt - 1 }, synthesis.agent as any ); } this.log(`Seeded ${this.inheritedContext.synthesis_attempts.length} synthesis attempts from prior run`); } // Write recovery metadata to blackboard await this.blackboard.write("problem", "recovery_context", { is_recovery: true, recovery_attempt: this.recoveryAttempt, prior_pipeline: this.inheritedContext.from_pipeline, prior_proposals_count: this.inheritedContext.proposals?.length || 0, recovery_hints: this.inheritedContext.recovery_hints || [], instructions: [ "This is a RECOVERY run - prior agents failed to reach consensus", "Review the inherited proposals in the 'solutions' section", "Look for common ground between prior proposals", "GAMMA mediator will help resolve conflicts", "Try to synthesize a solution that incorporates the best ideas from prior attempts" ] }, "ALPHA"); } catch (e: any) { this.log(`Warning: Failed to pre-seed blackboard: ${e.message}`); } } // Collect and dump all agent proposals/analysis to handoff JSON private async dumpAgentHandoff(): Promise { if (!this.pipelineId) return; try { const redis = await getHandoffRedis(); const handoffKey = `handoff:${this.pipelineId}:agents`; // Collect all solutions from blackboard const solutions = await this.blackboard.readSection("solutions"); const synthesis = await this.blackboard.readSection("synthesis"); const consensus = await this.blackboard.readSection("consensus"); const problem = await this.blackboard.readSection("problem"); // Get agent states const agentStates = await this.stateManager.getAllStates(); // Get message history const alphaMessages = await this.alphaBus.getMessageLog(50); const betaMessages = await this.betaBus.getMessageLog(50); const gammaMessages = this.gammaBus ? await this.gammaBus.getMessageLog(50) : []; // Build structured handoff const handoff = { pipeline_id: this.pipelineId, task_id: this.taskId, dump_time: new Date().toISOString(), iteration_count: this.iterationCount, max_iterations: this.maxIterations, gamma_active: this.gammaAgent !== undefined, // Agent proposals and analysis proposals: solutions.map(s => ({ agent: s.author, key: s.key, value: s.value, version: s.version, timestamp: s.timestamp })), // Synthesis attempts synthesis_attempts: synthesis.map(s => ({ agent: s.author, key: s.key, value: s.value, timestamp: s.timestamp })), // Consensus votes and discussions consensus_state: consensus.map(c => ({ key: c.key, value: c.value, author: c.author, timestamp: c.timestamp })), // Problem analysis problem_analysis: problem.map(p => ({ key: p.key, value: p.value, author: p.author })), // Agent states at abort time agent_states: agentStates.map(s => ({ role: s.role, status: s.status, phase: s.phase, progress: s.progress, blocked_reason: s.blocked_reason, last_activity: s.last_activity })), // Recent message history for context message_summary: { alpha_last_messages: alphaMessages.slice(-10), beta_last_messages: betaMessages.slice(-10), gamma_last_messages: gammaMessages.slice(-10) }, // Recovery hints recovery_hints: [ `Iteration limit (${this.maxIterations}) exceeded after ${this.iterationCount} iterations`, this.gammaAgent ? "GAMMA was active but could not resolve conflicts" : "GAMMA was not spawned", `${solutions.length} proposals generated, ${synthesis.length} synthesis attempts`, "Consider: simplifying objective, forcing GAMMA earlier, or increasing iteration limit" ] }; // Store handoff JSON await redis.set(handoffKey, JSON.stringify(handoff), { EX: 86400 }); // 24hr TTL await redis.hSet(`pipeline:${this.pipelineId}`, "handoff_key", handoffKey); await redis.hSet(`pipeline:${this.pipelineId}`, "handoff_time", handoff.dump_time); this.log(`Agent handoff dumped: ${solutions.length} proposals, ${synthesis.length} synthesis attempts`); } catch (e: any) { this.log(`Failed to dump agent handoff: ${e.message}`); } } private log(msg: string) { const elapsed = this.startTime ? ((Date.now() - this.startTime) / 1000).toFixed(1) : "0.0"; console.log(`[${elapsed}s] [ORCHESTRATOR] ${msg}`); } async initialize(): Promise { this.startTime = Date.now(); console.log("\n" + "=".repeat(70)); console.log("MULTI-AGENT COORDINATION SYSTEM"); console.log("Task ID: " + this.taskId); if (this.pipelineId) { console.log("Pipeline ID: " + this.pipelineId); } console.log("Model: " + this.model); console.log("=".repeat(70) + "\n"); // Check if this is a recovery run and load inherited context await this.loadRecoveryContext(); this.log("Initializing coordination infrastructure..."); // Initialize shared infrastructure this.blackboard = new Blackboard(this.taskId); this.stateManager = new AgentStateManager(this.taskId); this.spawnController = new SpawnController(this.taskId); this.metrics = new MetricsCollector(this.taskId); await Promise.all([ this.blackboard.connect(), this.stateManager.connect(), this.spawnController.connect(), this.metrics.connect(), ]); this.log("Infrastructure connected"); // Pre-seed blackboard with inherited context if this is a recovery run if (this.isRecoveryRun && this.inheritedContext) { this.log("Pre-seeding blackboard with inherited context..."); await this.preseedBlackboard(); } // Initialize message buses for ALPHA and BETA this.alphaBus = new MessageBus(this.taskId, "ALPHA"); this.betaBus = new MessageBus(this.taskId, "BETA"); await Promise.all([ this.alphaBus.connect(), this.betaBus.connect(), ]); this.log("Message buses connected"); // Create initial agents this.alphaAgent = new AgentAlpha( this.taskId, this.blackboard, this.alphaBus, this.stateManager, this.metrics, this.model ); this.betaAgent = new AgentBeta( this.taskId, this.blackboard, this.betaBus, this.stateManager, this.metrics, this.model ); await Promise.all([ this.alphaAgent.init(), this.betaAgent.init(), ]); this.log("Agents ALPHA and BETA initialized"); } async spawnGamma(reason: SpawnCondition): Promise { if (this.gammaAgent) { this.log("GAMMA already spawned, skipping"); return; } this.log(`SPAWNING GAMMA - Reason: ${reason.type} (threshold: ${reason.threshold}, current: ${reason.current_value})`); // Create message bus for GAMMA this.gammaBus = new MessageBus(this.taskId, "GAMMA"); await this.gammaBus.connect(); // Create and initialize GAMMA agent this.gammaAgent = new AgentGamma( this.taskId, this.blackboard, this.gammaBus, this.stateManager, this.metrics, reason.type, this.model ); await this.gammaAgent.init(); await this.spawnController.markGammaSpawned(reason); this.log("GAMMA agent spawned and initialized"); } private async monitorConditions(): Promise<{ abort: boolean; reason?: string }> { // Track progress this.updateProgress(); // Check for iteration timeout (stuck without progress) if (this.isStuck()) { this.log("TIMEOUT: No progress detected - triggering auto-recovery"); await this.reportError("iteration_timeout", "high", "Agents stuck without progress"); return { abort: true, reason: "stuck" }; } // Check for iteration limit exceeded if (this.isIterationLimitExceeded()) { this.log("LIMIT: Maximum iterations exceeded - triggering auto-recovery"); await this.reportError("iteration_limit", "high", `Max iterations (${this.maxIterations}) exceeded`); return { abort: true, reason: "iteration_limit" }; } // Check stuck condition const stuckAgents = await this.stateManager.detectStuckAgents(30); if (stuckAgents.length > 0) { this.log(`Stuck agents detected: ${stuckAgents.join(", ")}`); const condition = await this.spawnController.updateCondition("STUCK", stuckAgents.length); if (condition?.triggered) { const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions(); if (shouldSpawn && reason) { await this.spawnGamma(reason); } } } // Check conflict condition const metricsData = await this.metrics.getMetrics(); const unresolvedConflicts = metricsData.conflicts_detected - metricsData.conflicts_resolved; const conflictCondition = await this.spawnController.updateCondition("CONFLICT", unresolvedConflicts); if (conflictCondition?.triggered && !this.spawnController.isGammaSpawned()) { const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions(); if (shouldSpawn && reason) { await this.spawnGamma(reason); } } // Check complexity condition (from blackboard) const analysis = await this.blackboard.read("problem", "analysis"); if (analysis?.value?.complexity_score) { const complexityCondition = await this.spawnController.updateCondition("COMPLEXITY", analysis.value.complexity_score); if (complexityCondition?.triggered && !this.spawnController.isGammaSpawned()) { const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions(); if (shouldSpawn && reason) { await this.spawnGamma(reason); } } } // Log current state const states = await this.stateManager.getAllStates(); const statesSummary = states.map(s => `${s.role}:${s.status}(${(s.progress * 100).toFixed(0)}%)`).join(", "); this.log(`Status: ${statesSummary} | Messages: ${metricsData.total_messages} | Conflicts: ${unresolvedConflicts} | Iter: ${this.iterationCount}`); return { abort: false }; } async runTask(task: TaskDefinition): Promise { this.log(`Starting task: ${task.objective.slice(0, 60)}...`); this.lastProgressTime = Date.now(); // Write task to blackboard await this.blackboard.write("problem", "task_definition", task, "ALPHA"); // FORCE GAMMA: If this is a recovery run, spawn GAMMA immediately if (this.forceGamma && !this.gammaAgent) { this.log("FORCE GAMMA MODE: Spawning GAMMA mediator immediately for recovery"); const forceReason: SpawnCondition = { type: "STUCK", threshold: 0, current_value: 1, triggered: true, description: "Force-spawned for recovery run" }; await this.spawnGamma(forceReason); } // Track if we need to abort due to timeout/iteration limit let abortReason: string | undefined; // Start monitoring with abort detection this.monitorInterval = setInterval(async () => { const result = await this.monitorConditions(); if (result.abort && result.reason) { abortReason = result.reason; if (this.monitorInterval) { clearInterval(this.monitorInterval); } } }, 2000); // Run agents in parallel this.log("Launching ALPHA and BETA in parallel..."); const alphaPromise = this.alphaAgent.run(task).catch(async e => { await this.reportError("agent_failure", "high", `ALPHA error: ${e.message}`); }); const betaPromise = this.betaAgent.run(task).catch(async e => { await this.reportError("agent_failure", "high", `BETA error: ${e.message}`); }); // Wait for initial agents to complete (or timeout) const timeout = task.timeout_seconds * 1000; const timeoutPromise = new Promise(resolve => setTimeout(resolve, timeout)); await Promise.race([ Promise.all([alphaPromise, betaPromise]), timeoutPromise, ]); // Check if we were aborted during execution if (abortReason) { this.log(`Task aborted: ${abortReason}`); if (this.monitorInterval) { clearInterval(this.monitorInterval); } // CRITICAL: Dump all agent proposals/analysis to handoff JSON BEFORE revoking tokens this.log("Dumping agent handoff data for recovery pipeline..."); await this.dumpAgentHandoff(); // Now revoke tokens for stuck agents await this.revokeStuckAgentTokens(); // Get partial metrics and mark as failed const partialMetrics = await this.metrics.finalize(false); await this.recordFailureContext(abortReason as any, partialMetrics); return { ...partialMetrics, abort_reason: abortReason }; } this.log("Initial agents completed or timeout reached"); // Check if GAMMA needs to be spawned for success validation const states = await this.stateManager.getAllStates(); const bothComplete = states.every(s => s.status === "WAITING" || s.status === "COMPLETED"); if (bothComplete && !this.spawnController.isGammaSpawned()) { await this.spawnController.updateCondition("SUCCESS", 1.0); const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions(); if (shouldSpawn && reason) { await this.spawnGamma(reason); } } // If GAMMA was spawned, run it if (this.gammaAgent) { this.log("Running GAMMA for resolution..."); await this.gammaAgent.run(task).catch(async e => { await this.reportError("agent_failure", "high", `GAMMA error: ${e.message}`); }); } // Stop monitoring if (this.monitorInterval) { clearInterval(this.monitorInterval); } // Check consensus const consensus = await this.blackboard.checkConsensus("synthesis", ["ALPHA", "BETA"]); const consensusAchieved = consensus.reached || (await this.blackboard.read("consensus", "final"))?.value?.achieved === true; this.log(`Consensus achieved: ${consensusAchieved}`); // Finalize metrics const finalMetrics = await this.metrics.finalize(consensusAchieved); // If consensus failed, record structured failure context if (!consensusAchieved) { await this.recordFailureContext("consensus_failed", finalMetrics); } return finalMetrics; } async cleanup(): Promise { this.log("Cleaning up..."); if (this.monitorInterval) { clearInterval(this.monitorInterval); } await Promise.all([ this.alphaBus?.disconnect(), this.betaBus?.disconnect(), this.gammaBus?.disconnect(), this.blackboard?.disconnect(), this.stateManager?.disconnect(), this.spawnController?.disconnect(), this.metrics?.disconnect(), ].filter(Boolean)); this.log("Cleanup complete"); } getTaskId(): string { return this.taskId; } } // ============================================================================= // Performance Analysis // ============================================================================= export function analyzePerformance(metrics: CoordinationMetrics): void { console.log("\n" + "=".repeat(70)); console.log("PERFORMANCE ANALYSIS"); console.log("=".repeat(70)); const duration = metrics.end_time ? (new Date(metrics.end_time).getTime() - new Date(metrics.start_time).getTime()) / 1000 : 0; console.log("\nTiming:"); console.log(` Duration: ${duration.toFixed(1)}s`); console.log("\nCommunication:"); console.log(` Total messages: ${metrics.total_messages}`); console.log(` Direct messages: ${metrics.direct_messages}`); console.log(` Blackboard writes: ${metrics.blackboard_writes}`); console.log(` Blackboard reads: ${metrics.blackboard_reads}`); console.log(` Communication overhead: ${((metrics.total_messages + metrics.blackboard_writes) / duration).toFixed(2)} ops/sec`); console.log("\nCoordination:"); console.log(` Conflicts detected: ${metrics.conflicts_detected}`); console.log(` Conflicts resolved: ${metrics.conflicts_resolved}`); console.log(` Conflict resolution rate: ${metrics.conflicts_detected > 0 ? ((metrics.conflicts_resolved / metrics.conflicts_detected) * 100).toFixed(1) : 100}%`); console.log("\nGamma Agent:"); console.log(` Spawned: ${metrics.gamma_spawned ? "Yes" : "No"}`); if (metrics.gamma_spawned) { console.log(` Spawn reason: ${metrics.gamma_spawn_reason}`); console.log(` Spawn time: ${metrics.gamma_spawn_time}`); } console.log("\nOutcome:"); console.log(` Consensus achieved: ${metrics.final_consensus ? "Yes" : "No"}`); console.log(` Performance score: ${(metrics.performance_score * 100).toFixed(1)}%`); // Threshold analysis console.log("\nThreshold Effects:"); const messageThreshold = 50; const conflictThreshold = 3; if (metrics.total_messages > messageThreshold) { console.log(` ! High message volume (${metrics.total_messages} > ${messageThreshold}) - potential coordination overhead`); } else { console.log(` + Message volume within threshold (${metrics.total_messages} <= ${messageThreshold})`); } if (metrics.conflicts_detected > conflictThreshold) { console.log(` ! High conflict rate (${metrics.conflicts_detected} > ${conflictThreshold}) - agents may have divergent strategies`); } else { console.log(` + Conflict rate within threshold (${metrics.conflicts_detected} <= ${conflictThreshold})`); } if (metrics.gamma_spawned && metrics.gamma_spawn_reason === "STUCK") { console.log(` ! Gamma spawned due to stuck condition - consider adjusting agent strategies`); } console.log("\n" + "=".repeat(70)); } // ============================================================================= // CLI Entry Point // ============================================================================= async function main() { const args = process.argv.slice(2); // Default complex task let objective = args[0] || `Design a distributed event-driven architecture for a real-time analytics platform that handles: 1) High-throughput data ingestion from multiple sources 2) Stream processing with exactly-once semantics 3) Real-time aggregations and windowed computations 4) Low-latency query serving for dashboards 5) Horizontal scalability to handle 1M events/second The solution should consider fault tolerance, data consistency, and cost optimization.`; let model = "anthropic/claude-sonnet-4"; const modelIdx = args.indexOf("--model"); if (modelIdx !== -1 && args[modelIdx + 1]) { model = args[modelIdx + 1]; } // Parse timeout let timeout = 120; const timeoutIdx = args.indexOf("--timeout"); if (timeoutIdx !== -1 && args[timeoutIdx + 1]) { timeout = parseInt(args[timeoutIdx + 1]); } const task: TaskDefinition = { task_id: generateId(), objective, complexity: "high", subtasks: [ { id: "s1", description: "Analyze data ingestion requirements", status: "pending", dependencies: [] }, { id: "s2", description: "Design stream processing pipeline", status: "pending", dependencies: ["s1"] }, { id: "s3", description: "Plan storage and query layer", status: "pending", dependencies: ["s1"] }, { id: "s4", description: "Define scalability strategy", status: "pending", dependencies: ["s2", "s3"] }, { id: "s5", description: "Integrate fault tolerance mechanisms", status: "pending", dependencies: ["s4"] }, ], constraints: [ "Must use open-source technologies where possible", "Latency < 100ms for query responses", "Support for multiple data formats (JSON, Avro, Protobuf)", "Cost-effective for variable workloads", ], success_criteria: [ "Complete architecture design with component diagrams", "Data flow specifications", "Scalability analysis", "Fault tolerance mechanisms documented", "Cost estimation provided", ], timeout_seconds: timeout, }; const orchestrator = new MultiAgentOrchestrator(model); let exitCode = 0; try { await orchestrator.initialize(); const metrics = await orchestrator.runTask(task); console.log("\n" + "=".repeat(70)); console.log("FINAL METRICS"); console.log("=".repeat(70)); console.log(JSON.stringify(metrics, null, 2)); analyzePerformance(metrics); // Output special marker for server to parse consensus status // Format: ORCHESTRATION_RESULT:{"consensus":true/false,"metrics":{...},"abort_reason":...} console.log("\nORCHESTRATION_RESULT:" + JSON.stringify({ consensus: metrics.final_consensus, task_id: metrics.task_id, metrics: metrics, abort_reason: (metrics as any).abort_reason || null, requires_auto_recovery: !metrics.final_consensus || !!(metrics as any).abort_reason })); // Exit codes: // 0 = Success (consensus achieved) // 1 = Error (crash or exception) // 2 = Consensus failure (agents completed but no agreement) // 3 = Aborted (timeout/stuck/iteration limit - auto-recovery needed) if ((metrics as any).abort_reason) { console.log(`\n[ORCHESTRATOR] Task aborted: ${(metrics as any).abort_reason} - exiting with code 3 (auto-recovery needed)`); exitCode = 3; const pipelineId = process.env.PIPELINE_ID; if (pipelineId) { await reportErrorToObservability(pipelineId, "orchestrator_aborted", "high", `Orchestration aborted: ${(metrics as any).abort_reason}. Auto-recovery required.`); } } else if (!metrics.final_consensus) { console.log("\n[ORCHESTRATOR] Consensus NOT achieved - exiting with code 2 (auto-recovery needed)"); exitCode = 2; // Report consensus failure to observability const pipelineId = process.env.PIPELINE_ID; if (pipelineId) { await reportErrorToObservability(pipelineId, "consensus_failure", "high", `Agents failed to reach consensus. Conflicts: ${metrics.conflicts_detected}, Resolved: ${metrics.conflicts_resolved}`); } } } catch (e: any) { console.error("Orchestrator error:", e.message); exitCode = 1; // Report critical error to observability if pipeline ID is set const pipelineId = process.env.PIPELINE_ID; if (pipelineId) { await reportErrorToObservability(pipelineId, "orchestrator_failure", "critical", e.message); } } finally { await orchestrator.cleanup(); // Explicitly exit to ensure all connections are closed process.exit(exitCode); } } main().catch((e) => { console.error("Fatal error:", e); process.exit(1); });