/** * Multi-Agent Coordination System - Orchestrator * Manages parallel agent execution, spawn conditions, and metrics * * Environment variables: * - PIPELINE_ID: Parent pipeline ID for error reporting * - TASK_ID: Task ID override */ import type { TaskDefinition, CoordinationMetrics, SpawnCondition, AgentRole } from "./types"; import { Blackboard, MessageBus, AgentStateManager, SpawnController, MetricsCollector, } from "./coordination"; import { AgentAlpha, AgentBeta, AgentGamma } from "./agents"; function now(): string { return new Date().toISOString(); } function generateId(): string { return "task-" + Math.random().toString(36).slice(2, 8) + "-" + Date.now().toString(36); } // Error reporting to parent pipeline's observability system async function reportErrorToObservability( pipelineId: string, errorType: string, severity: "low" | "medium" | "high" | "critical", details: string ): Promise { try { // Report to the UI server's error tracking API const response = await fetch("http://localhost:3000/api/pipeline/errors/record", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ pipeline_id: pipelineId, error_type: errorType, severity, details }) }); if (!response.ok) { console.error(`[ERROR_REPORT] Failed to report error: ${response.status}`); } } catch (e: any) { // Silently fail - don't let error reporting cause more errors console.error(`[ERROR_REPORT] Error reporting failed: ${e.message}`); } } // ============================================================================= // Multi-Agent Orchestrator // ============================================================================= export class MultiAgentOrchestrator { private taskId: string; private pipelineId?: string; private blackboard!: Blackboard; private stateManager!: AgentStateManager; private spawnController!: SpawnController; private metrics!: MetricsCollector; private alphaAgent!: AgentAlpha; private betaAgent!: AgentBeta; private gammaAgent?: AgentGamma; private alphaBus!: MessageBus; private betaBus!: MessageBus; private gammaBus?: MessageBus; private model: string; private startTime!: number; private monitorInterval?: ReturnType; private errorCount: number = 0; constructor(model: string = "anthropic/claude-sonnet-4") { // Use environment variable for task ID if provided this.taskId = process.env.TASK_ID || generateId(); this.pipelineId = process.env.PIPELINE_ID; this.model = model; } private async reportError(errorType: string, severity: "low" | "medium" | "high" | "critical", details: string): Promise { this.errorCount++; if (this.pipelineId) { await reportErrorToObservability(this.pipelineId, errorType, severity, details); } this.log(`ERROR [${severity}] ${errorType}: ${details}`); } private log(msg: string) { const elapsed = this.startTime ? ((Date.now() - this.startTime) / 1000).toFixed(1) : "0.0"; console.log(`[${elapsed}s] [ORCHESTRATOR] ${msg}`); } async initialize(): Promise { this.startTime = Date.now(); console.log("\n" + "=".repeat(70)); console.log("MULTI-AGENT COORDINATION SYSTEM"); console.log("Task ID: " + this.taskId); if (this.pipelineId) { console.log("Pipeline ID: " + this.pipelineId); } console.log("Model: " + this.model); console.log("=".repeat(70) + "\n"); this.log("Initializing coordination infrastructure..."); // Initialize shared infrastructure this.blackboard = new Blackboard(this.taskId); this.stateManager = new AgentStateManager(this.taskId); this.spawnController = new SpawnController(this.taskId); this.metrics = new MetricsCollector(this.taskId); await Promise.all([ this.blackboard.connect(), this.stateManager.connect(), this.spawnController.connect(), this.metrics.connect(), ]); this.log("Infrastructure connected"); // Initialize message buses for ALPHA and BETA this.alphaBus = new MessageBus(this.taskId, "ALPHA"); this.betaBus = new MessageBus(this.taskId, "BETA"); await Promise.all([ this.alphaBus.connect(), this.betaBus.connect(), ]); this.log("Message buses connected"); // Create initial agents this.alphaAgent = new AgentAlpha( this.taskId, this.blackboard, this.alphaBus, this.stateManager, this.metrics, this.model ); this.betaAgent = new AgentBeta( this.taskId, this.blackboard, this.betaBus, this.stateManager, this.metrics, this.model ); await Promise.all([ this.alphaAgent.init(), this.betaAgent.init(), ]); this.log("Agents ALPHA and BETA initialized"); } async spawnGamma(reason: SpawnCondition): Promise { if (this.gammaAgent) { this.log("GAMMA already spawned, skipping"); return; } this.log(`SPAWNING GAMMA - Reason: ${reason.type} (threshold: ${reason.threshold}, current: ${reason.current_value})`); // Create message bus for GAMMA this.gammaBus = new MessageBus(this.taskId, "GAMMA"); await this.gammaBus.connect(); // Create and initialize GAMMA agent this.gammaAgent = new AgentGamma( this.taskId, this.blackboard, this.gammaBus, this.stateManager, this.metrics, reason.type, this.model ); await this.gammaAgent.init(); await this.spawnController.markGammaSpawned(reason); this.log("GAMMA agent spawned and initialized"); } private async monitorConditions(): Promise { // Check stuck condition const stuckAgents = await this.stateManager.detectStuckAgents(30); if (stuckAgents.length > 0) { this.log(`Stuck agents detected: ${stuckAgents.join(", ")}`); const condition = await this.spawnController.updateCondition("STUCK", stuckAgents.length); if (condition?.triggered) { const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions(); if (shouldSpawn && reason) { await this.spawnGamma(reason); } } } // Check conflict condition const metricsData = await this.metrics.getMetrics(); const unresolvedConflicts = metricsData.conflicts_detected - metricsData.conflicts_resolved; const conflictCondition = await this.spawnController.updateCondition("CONFLICT", unresolvedConflicts); if (conflictCondition?.triggered && !this.spawnController.isGammaSpawned()) { const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions(); if (shouldSpawn && reason) { await this.spawnGamma(reason); } } // Check complexity condition (from blackboard) const analysis = await this.blackboard.read("problem", "analysis"); if (analysis?.value?.complexity_score) { const complexityCondition = await this.spawnController.updateCondition("COMPLEXITY", analysis.value.complexity_score); if (complexityCondition?.triggered && !this.spawnController.isGammaSpawned()) { const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions(); if (shouldSpawn && reason) { await this.spawnGamma(reason); } } } // Log current state const states = await this.stateManager.getAllStates(); const statesSummary = states.map(s => `${s.role}:${s.status}(${(s.progress * 100).toFixed(0)}%)`).join(", "); this.log(`Status: ${statesSummary} | Messages: ${metricsData.total_messages} | Conflicts: ${unresolvedConflicts}`); } async runTask(task: TaskDefinition): Promise { this.log(`Starting task: ${task.objective.slice(0, 60)}...`); // Write task to blackboard await this.blackboard.write("problem", "task_definition", task, "ALPHA"); // Start monitoring this.monitorInterval = setInterval(() => this.monitorConditions(), 2000); // Run agents in parallel this.log("Launching ALPHA and BETA in parallel..."); const alphaPromise = this.alphaAgent.run(task).catch(async e => { await this.reportError("agent_failure", "high", `ALPHA error: ${e.message}`); }); const betaPromise = this.betaAgent.run(task).catch(async e => { await this.reportError("agent_failure", "high", `BETA error: ${e.message}`); }); // Wait for initial agents to complete (or timeout) const timeout = task.timeout_seconds * 1000; const timeoutPromise = new Promise(resolve => setTimeout(resolve, timeout)); await Promise.race([ Promise.all([alphaPromise, betaPromise]), timeoutPromise, ]); this.log("Initial agents completed or timeout reached"); // Check if GAMMA needs to be spawned for success validation const states = await this.stateManager.getAllStates(); const bothComplete = states.every(s => s.status === "WAITING" || s.status === "COMPLETED"); if (bothComplete && !this.spawnController.isGammaSpawned()) { await this.spawnController.updateCondition("SUCCESS", 1.0); const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions(); if (shouldSpawn && reason) { await this.spawnGamma(reason); } } // If GAMMA was spawned, run it if (this.gammaAgent) { this.log("Running GAMMA for resolution..."); await this.gammaAgent.run(task).catch(async e => { await this.reportError("agent_failure", "high", `GAMMA error: ${e.message}`); }); } // Stop monitoring if (this.monitorInterval) { clearInterval(this.monitorInterval); } // Check consensus const consensus = await this.blackboard.checkConsensus("synthesis", ["ALPHA", "BETA"]); const consensusAchieved = consensus.reached || (await this.blackboard.read("consensus", "final"))?.value?.achieved === true; this.log(`Consensus achieved: ${consensusAchieved}`); // Finalize metrics const finalMetrics = await this.metrics.finalize(consensusAchieved); return finalMetrics; } async cleanup(): Promise { this.log("Cleaning up..."); if (this.monitorInterval) { clearInterval(this.monitorInterval); } await Promise.all([ this.alphaBus?.disconnect(), this.betaBus?.disconnect(), this.gammaBus?.disconnect(), this.blackboard?.disconnect(), this.stateManager?.disconnect(), this.spawnController?.disconnect(), this.metrics?.disconnect(), ].filter(Boolean)); this.log("Cleanup complete"); } getTaskId(): string { return this.taskId; } } // ============================================================================= // Performance Analysis // ============================================================================= export function analyzePerformance(metrics: CoordinationMetrics): void { console.log("\n" + "=".repeat(70)); console.log("PERFORMANCE ANALYSIS"); console.log("=".repeat(70)); const duration = metrics.end_time ? (new Date(metrics.end_time).getTime() - new Date(metrics.start_time).getTime()) / 1000 : 0; console.log("\nTiming:"); console.log(` Duration: ${duration.toFixed(1)}s`); console.log("\nCommunication:"); console.log(` Total messages: ${metrics.total_messages}`); console.log(` Direct messages: ${metrics.direct_messages}`); console.log(` Blackboard writes: ${metrics.blackboard_writes}`); console.log(` Blackboard reads: ${metrics.blackboard_reads}`); console.log(` Communication overhead: ${((metrics.total_messages + metrics.blackboard_writes) / duration).toFixed(2)} ops/sec`); console.log("\nCoordination:"); console.log(` Conflicts detected: ${metrics.conflicts_detected}`); console.log(` Conflicts resolved: ${metrics.conflicts_resolved}`); console.log(` Conflict resolution rate: ${metrics.conflicts_detected > 0 ? ((metrics.conflicts_resolved / metrics.conflicts_detected) * 100).toFixed(1) : 100}%`); console.log("\nGamma Agent:"); console.log(` Spawned: ${metrics.gamma_spawned ? "Yes" : "No"}`); if (metrics.gamma_spawned) { console.log(` Spawn reason: ${metrics.gamma_spawn_reason}`); console.log(` Spawn time: ${metrics.gamma_spawn_time}`); } console.log("\nOutcome:"); console.log(` Consensus achieved: ${metrics.final_consensus ? "Yes" : "No"}`); console.log(` Performance score: ${(metrics.performance_score * 100).toFixed(1)}%`); // Threshold analysis console.log("\nThreshold Effects:"); const messageThreshold = 50; const conflictThreshold = 3; if (metrics.total_messages > messageThreshold) { console.log(` ! High message volume (${metrics.total_messages} > ${messageThreshold}) - potential coordination overhead`); } else { console.log(` + Message volume within threshold (${metrics.total_messages} <= ${messageThreshold})`); } if (metrics.conflicts_detected > conflictThreshold) { console.log(` ! High conflict rate (${metrics.conflicts_detected} > ${conflictThreshold}) - agents may have divergent strategies`); } else { console.log(` + Conflict rate within threshold (${metrics.conflicts_detected} <= ${conflictThreshold})`); } if (metrics.gamma_spawned && metrics.gamma_spawn_reason === "STUCK") { console.log(` ! Gamma spawned due to stuck condition - consider adjusting agent strategies`); } console.log("\n" + "=".repeat(70)); } // ============================================================================= // CLI Entry Point // ============================================================================= async function main() { const args = process.argv.slice(2); // Default complex task let objective = args[0] || `Design a distributed event-driven architecture for a real-time analytics platform that handles: 1) High-throughput data ingestion from multiple sources 2) Stream processing with exactly-once semantics 3) Real-time aggregations and windowed computations 4) Low-latency query serving for dashboards 5) Horizontal scalability to handle 1M events/second The solution should consider fault tolerance, data consistency, and cost optimization.`; let model = "anthropic/claude-sonnet-4"; const modelIdx = args.indexOf("--model"); if (modelIdx !== -1 && args[modelIdx + 1]) { model = args[modelIdx + 1]; } // Parse timeout let timeout = 120; const timeoutIdx = args.indexOf("--timeout"); if (timeoutIdx !== -1 && args[timeoutIdx + 1]) { timeout = parseInt(args[timeoutIdx + 1]); } const task: TaskDefinition = { task_id: generateId(), objective, complexity: "high", subtasks: [ { id: "s1", description: "Analyze data ingestion requirements", status: "pending", dependencies: [] }, { id: "s2", description: "Design stream processing pipeline", status: "pending", dependencies: ["s1"] }, { id: "s3", description: "Plan storage and query layer", status: "pending", dependencies: ["s1"] }, { id: "s4", description: "Define scalability strategy", status: "pending", dependencies: ["s2", "s3"] }, { id: "s5", description: "Integrate fault tolerance mechanisms", status: "pending", dependencies: ["s4"] }, ], constraints: [ "Must use open-source technologies where possible", "Latency < 100ms for query responses", "Support for multiple data formats (JSON, Avro, Protobuf)", "Cost-effective for variable workloads", ], success_criteria: [ "Complete architecture design with component diagrams", "Data flow specifications", "Scalability analysis", "Fault tolerance mechanisms documented", "Cost estimation provided", ], timeout_seconds: timeout, }; const orchestrator = new MultiAgentOrchestrator(model); let exitCode = 0; try { await orchestrator.initialize(); const metrics = await orchestrator.runTask(task); console.log("\n" + "=".repeat(70)); console.log("FINAL METRICS"); console.log("=".repeat(70)); console.log(JSON.stringify(metrics, null, 2)); analyzePerformance(metrics); // Output special marker for server to parse consensus status // Format: ORCHESTRATION_RESULT:{"consensus":true/false,"metrics":{...}} console.log("\nORCHESTRATION_RESULT:" + JSON.stringify({ consensus: metrics.final_consensus, task_id: metrics.task_id, metrics: metrics })); // Exit with code 2 for consensus failure (distinct from error=1, success=0) if (!metrics.final_consensus) { console.log("\n[ORCHESTRATOR] Consensus NOT achieved - exiting with code 2"); exitCode = 2; // Report consensus failure to observability const pipelineId = process.env.PIPELINE_ID; if (pipelineId) { await reportErrorToObservability(pipelineId, "consensus_failure", "high", `Agents failed to reach consensus. Conflicts: ${metrics.conflicts_detected}, Resolved: ${metrics.conflicts_resolved}`); } } } catch (e: any) { console.error("Orchestrator error:", e.message); exitCode = 1; // Report critical error to observability if pipeline ID is set const pipelineId = process.env.PIPELINE_ID; if (pipelineId) { await reportErrorToObservability(pipelineId, "orchestrator_failure", "critical", e.message); } } finally { await orchestrator.cleanup(); // Explicitly exit to ensure all connections are closed process.exit(exitCode); } } main().catch((e) => { console.error("Fatal error:", e); process.exit(1); });