agent-governance/agents/multi-agent/orchestrator.ts

/**
 * Multi-Agent Coordination System - Orchestrator
 * Manages parallel agent execution, spawn conditions, and metrics
 *
 * Environment variables:
 * - PIPELINE_ID: Parent pipeline ID for error reporting
 * - TASK_ID: Task ID override
 */

import type { TaskDefinition, CoordinationMetrics, SpawnCondition, AgentRole } from "./types";
import {
  Blackboard,
  MessageBus,
  AgentStateManager,
  SpawnController,
  MetricsCollector,
} from "./coordination";
import { AgentAlpha, AgentBeta, AgentGamma } from "./agents";
import { createClient, RedisClientType } from "redis";

// Redis client for direct handoff writes
let handoffRedis: RedisClientType | null = null;

async function getHandoffRedis(): Promise<RedisClientType> {
  if (handoffRedis && handoffRedis.isOpen) return handoffRedis;

  const initKeys = await Bun.file("/opt/vault/init-keys.json").json();
  const token = initKeys.root_token;
  const result = await fetch(`https://127.0.0.1:8200/v1/secret/data/services/dragonfly`, {
    headers: { "X-Vault-Token": token },
    // @ts-ignore - Bun supports this
    tls: { rejectUnauthorized: false }
  }).then(r => r.json());

  const creds = result.data.data;
  handoffRedis = createClient({
    url: `redis://${creds.host}:${creds.port}`,
    password: creds.password,
  });
  await handoffRedis.connect();
  return handoffRedis;
}

function now(): string {
  return new Date().toISOString();
}

function generateId(): string {
  return "task-" + Math.random().toString(36).slice(2, 8) + "-" + Date.now().toString(36);
}

// Error reporting to parent pipeline's observability system
async function reportErrorToObservability(
  pipelineId: string,
  errorType: string,
  severity: "low" | "medium" | "high" | "critical",
  details: string
): Promise<void> {
  try {
    // Report to the UI server's error tracking API
    const response = await fetch("http://localhost:3000/api/pipeline/errors/record", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({
        pipeline_id: pipelineId,
        error_type: errorType,
        severity,
        details
      })
    });
    if (!response.ok) {
      console.error(`[ERROR_REPORT] Failed to report error: ${response.status}`);
    }
  } catch (e: any) {
    // Silently fail - don't let error reporting cause more errors
    console.error(`[ERROR_REPORT] Error reporting failed: ${e.message}`);
  }
}

// =============================================================================
// Multi-Agent Orchestrator
// =============================================================================

export class MultiAgentOrchestrator {
  private taskId: string;
  private pipelineId?: string;
  private blackboard!: Blackboard;
  private stateManager!: AgentStateManager;
  private spawnController!: SpawnController;
  private metrics!: MetricsCollector;

  private alphaAgent!: AgentAlpha;
  private betaAgent!: AgentBeta;
  private gammaAgent?: AgentGamma;

  private alphaBus!: MessageBus;
  private betaBus!: MessageBus;
  private gammaBus?: MessageBus;

  private model: string;
  private startTime!: number;
  private monitorInterval?: ReturnType<typeof setInterval>;
  private errorCount: number = 0;
  private iterationCount: number = 0;
  private maxIterations: number = 10;
  private lastProgressTime: number = 0;
  private progressTimeout: number = 60000; // 60 seconds without progress = stuck

  // Recovery context from prior runs
  private isRecoveryRun: boolean = false;
  private recoveryAttempt: number = 1;
  private inheritedContext: any = null;
  private forceGamma: boolean = false;

  constructor(model: string = "anthropic/claude-sonnet-4") {
    // Use environment variable for task ID if provided
    this.taskId = process.env.TASK_ID || generateId();
    this.pipelineId = process.env.PIPELINE_ID;
    this.model = model;
  }

  private async reportError(errorType: string, severity: "low" | "medium" | "high" | "critical", details: string): Promise<void> {
    this.errorCount++;
    if (this.pipelineId) {
      await reportErrorToObservability(this.pipelineId, errorType, severity, details);
    }
    this.log(`ERROR [${severity}] ${errorType}: ${details}`);
  }

  private updateProgress(): void {
    this.lastProgressTime = Date.now();
    this.iterationCount++;
  }

  private isStuck(): boolean {
    if (this.lastProgressTime === 0) return false;
    return (Date.now() - this.lastProgressTime) > this.progressTimeout;
  }

  private isIterationLimitExceeded(): boolean {
    return this.iterationCount >= this.maxIterations;
  }

  // Request token revocation for stuck agents
  private async revokeStuckAgentTokens(): Promise<void> {
    if (!this.pipelineId) return;

    try {
      await fetch("http://localhost:3000/api/pipeline/revoke", {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          pipeline_id: this.pipelineId,
          reason: "iteration_timeout",
          details: `Agents stuck after ${this.iterationCount} iterations`
        })
      });
      this.log("Token revocation requested for stuck agents");
    } catch (e: any) {
      this.log(`Failed to revoke tokens: ${e.message}`);
    }
  }

  // Record structured failure context for auto-recovery
  private async recordFailureContext(
    reason: "stuck" | "iteration_limit" | "consensus_failed",
    metrics: CoordinationMetrics
  ): Promise<void> {
    if (!this.pipelineId) return;

    const failureContext = {
      pipeline_id: this.pipelineId,
      task_id: this.taskId,
      failure_reason: reason,
      failure_time: new Date().toISOString(),
      iteration_count: this.iterationCount,
      elapsed_ms: Date.now() - this.startTime,
      metrics: metrics,
      gamma_spawned: this.gammaAgent !== undefined,
      error_count: this.errorCount,
      recovery_hint: this.getRecoveryHint(reason)
    };

    try {
      await fetch("http://localhost:3000/api/pipeline/failure-context", {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify(failureContext)
      });
      this.log(`Failure context recorded: ${reason}`);
    } catch (e: any) {
      this.log(`Failed to record failure context: ${e.message}`);
    }
  }

  private getRecoveryHint(reason: string): string {
    switch (reason) {
      case "stuck":
        return "Agents became unresponsive. Try with fresh agents or GAMMA mediator.";
      case "iteration_limit":
        return "Max iterations reached without consensus. Consider simplifying objective.";
      case "consensus_failed":
        return "Agents completed but disagreed. GAMMA mediator may help resolve conflicts.";
      default:
        return "Unknown failure. Review logs for details.";
    }
  }

  // Load recovery context from prior run if this is a recovery pipeline
  private async loadRecoveryContext(): Promise<void> {
    if (!this.pipelineId) return;

    try {
      const redis = await getHandoffRedis();
      const pipelineData = await redis.hGetAll(`pipeline:${this.pipelineId}`);

      if (pipelineData.is_recovery === "true") {
        this.isRecoveryRun = true;
        this.recoveryAttempt = parseInt(pipelineData.recovery_attempt || "1");
        this.forceGamma = pipelineData.force_gamma === "true";

        console.log("\n" + "=".repeat(70));
        console.log(`RECOVERY RUN - Attempt ${this.recoveryAttempt}`);
        console.log("=".repeat(70));

        // Load inherited handoff
        const inheritedKey = pipelineData.inherited_handoff;
        if (inheritedKey) {
          const inheritedData = await redis.get(inheritedKey);
          if (inheritedData) {
            this.inheritedContext = JSON.parse(inheritedData);
            console.log(`Loaded inherited context from: ${inheritedKey}`);
            console.log(`- Prior proposals: ${this.inheritedContext.proposals?.length || 0}`);
            console.log(`- Prior synthesis attempts: ${this.inheritedContext.synthesis_attempts?.length || 0}`);
            console.log(`- Recovery hints:`);
            this.inheritedContext.recovery_hints?.forEach((hint: string, i: number) => {
              console.log(`  ${i + 1}. ${hint}`);
            });
          }
        }

        // Also try loading prior_context (JSON string)
        if (pipelineData.prior_context) {
          const priorContext = JSON.parse(pipelineData.prior_context);
          console.log(`Prior run: ${priorContext.prior_pipeline}`);
          console.log(`Prior failure reason: ${priorContext.failure_reason}`);
          console.log(`Prior iteration count: ${priorContext.iteration_count}`);
        }

        if (this.forceGamma) {
          console.log("\nFORCE GAMMA MODE: GAMMA mediator will be spawned immediately");
        }

        console.log("=".repeat(70) + "\n");
      }
    } catch (e: any) {
      this.log(`Warning: Could not load recovery context: ${e.message}`);
    }
  }

  // Get inherited context for agents to use
  getInheritedContext(): any {
    return this.inheritedContext;
  }

  // Check if GAMMA should be force-spawned
  shouldForceGamma(): boolean {
    return this.forceGamma;
  }

  // Pre-seed the blackboard with inherited proposals from prior run
  private async preseedBlackboard(): Promise<void> {
    if (!this.inheritedContext) return;

    try {
      // Seed prior proposals into the blackboard
      if (this.inheritedContext.proposals?.length > 0) {
        for (const proposal of this.inheritedContext.proposals) {
          await this.blackboard.write(
            "solutions",
            `inherited_${proposal.agent}_${proposal.key || 'proposal'}`,
            {
              ...proposal.value,
              _inherited: true,
              _from_run: this.recoveryAttempt - 1
            },
            proposal.agent as any
          );
        }
        this.log(`Seeded ${this.inheritedContext.proposals.length} proposals from prior run`);
      }

      // Seed prior synthesis attempts
      if (this.inheritedContext.synthesis_attempts?.length > 0) {
        for (const synthesis of this.inheritedContext.synthesis_attempts) {
          await this.blackboard.write(
            "synthesis",
            `inherited_${synthesis.agent}_${synthesis.key || 'synthesis'}`,
            {
              ...synthesis.value,
              _inherited: true,
              _from_run: this.recoveryAttempt - 1
            },
            synthesis.agent as any
          );
        }
        this.log(`Seeded ${this.inheritedContext.synthesis_attempts.length} synthesis attempts from prior run`);
      }

      // Write recovery metadata to blackboard
      await this.blackboard.write("problem", "recovery_context", {
        is_recovery: true,
        recovery_attempt: this.recoveryAttempt,
        prior_pipeline: this.inheritedContext.from_pipeline,
        prior_proposals_count: this.inheritedContext.proposals?.length || 0,
        recovery_hints: this.inheritedContext.recovery_hints || [],
        instructions: [
          "This is a RECOVERY run - prior agents failed to reach consensus",
          "Review the inherited proposals in the 'solutions' section",
          "Look for common ground between prior proposals",
          "GAMMA mediator will help resolve conflicts",
          "Try to synthesize a solution that incorporates the best ideas from prior attempts"
        ]
      }, "ALPHA");

    } catch (e: any) {
      this.log(`Warning: Failed to pre-seed blackboard: ${e.message}`);
    }
  }

  // Collect and dump all agent proposals/analysis to handoff JSON
  private async dumpAgentHandoff(): Promise<void> {
    if (!this.pipelineId) return;

    try {
      const redis = await getHandoffRedis();
      const handoffKey = `handoff:${this.pipelineId}:agents`;

      // Collect all solutions from blackboard
      const solutions = await this.blackboard.readSection("solutions");
      const synthesis = await this.blackboard.readSection("synthesis");
      const consensus = await this.blackboard.readSection("consensus");
      const problem = await this.blackboard.readSection("problem");

      // Get agent states
      const agentStates = await this.stateManager.getAllStates();

      // Get message history
      const alphaMessages = await this.alphaBus.getMessageLog(50);
      const betaMessages = await this.betaBus.getMessageLog(50);
      const gammaMessages = this.gammaBus ? await this.gammaBus.getMessageLog(50) : [];

      // Build structured handoff
      const handoff = {
        pipeline_id: this.pipelineId,
        task_id: this.taskId,
        dump_time: new Date().toISOString(),
        iteration_count: this.iterationCount,
        max_iterations: this.maxIterations,
        gamma_active: this.gammaAgent !== undefined,

        // Agent proposals and analysis
        proposals: solutions.map(s => ({
          agent: s.author,
          key: s.key,
          value: s.value,
          version: s.version,
          timestamp: s.timestamp
        })),

        // Synthesis attempts
        synthesis_attempts: synthesis.map(s => ({
          agent: s.author,
          key: s.key,
          value: s.value,
          timestamp: s.timestamp
        })),

        // Consensus votes and discussions
        consensus_state: consensus.map(c => ({
          key: c.key,
          value: c.value,
          author: c.author,
          timestamp: c.timestamp
        })),

        // Problem analysis
        problem_analysis: problem.map(p => ({
          key: p.key,
          value: p.value,
          author: p.author
        })),

        // Agent states at abort time
        agent_states: agentStates.map(s => ({
          role: s.role,
          status: s.status,
          phase: s.phase,
          progress: s.progress,
          blocked_reason: s.blocked_reason,
          last_activity: s.last_activity
        })),

        // Recent message history for context
        message_summary: {
          alpha_last_messages: alphaMessages.slice(-10),
          beta_last_messages: betaMessages.slice(-10),
          gamma_last_messages: gammaMessages.slice(-10)
        },

        // Recovery hints
        recovery_hints: [
          `Iteration limit (${this.maxIterations}) exceeded after ${this.iterationCount} iterations`,
          this.gammaAgent ? "GAMMA was active but could not resolve conflicts" : "GAMMA was not spawned",
          `${solutions.length} proposals generated, ${synthesis.length} synthesis attempts`,
          "Consider: simplifying objective, forcing GAMMA earlier, or increasing iteration limit"
        ]
      };

      // Store handoff JSON
      await redis.set(handoffKey, JSON.stringify(handoff), { EX: 86400 }); // 24hr TTL
      await redis.hSet(`pipeline:${this.pipelineId}`, "handoff_key", handoffKey);
      await redis.hSet(`pipeline:${this.pipelineId}`, "handoff_time", handoff.dump_time);

      this.log(`Agent handoff dumped: ${solutions.length} proposals, ${synthesis.length} synthesis attempts`);

    } catch (e: any) {
      this.log(`Failed to dump agent handoff: ${e.message}`);
    }
  }

  private log(msg: string) {
    const elapsed = this.startTime ? ((Date.now() - this.startTime) / 1000).toFixed(1) : "0.0";
    console.log(`[${elapsed}s] [ORCHESTRATOR] ${msg}`);
  }

  async initialize(): Promise<void> {
    this.startTime = Date.now();

    console.log("\n" + "=".repeat(70));
    console.log("MULTI-AGENT COORDINATION SYSTEM");
    console.log("Task ID: " + this.taskId);
    if (this.pipelineId) {
      console.log("Pipeline ID: " + this.pipelineId);
    }
    console.log("Model: " + this.model);
    console.log("=".repeat(70) + "\n");

    // Check if this is a recovery run and load inherited context
    await this.loadRecoveryContext();

    this.log("Initializing coordination infrastructure...");

    // Initialize shared infrastructure
    this.blackboard = new Blackboard(this.taskId);
    this.stateManager = new AgentStateManager(this.taskId);
    this.spawnController = new SpawnController(this.taskId);
    this.metrics = new MetricsCollector(this.taskId);

    await Promise.all([
      this.blackboard.connect(),
      this.stateManager.connect(),
      this.spawnController.connect(),
      this.metrics.connect(),
    ]);

    this.log("Infrastructure connected");

    // Pre-seed blackboard with inherited context if this is a recovery run
    if (this.isRecoveryRun && this.inheritedContext) {
      this.log("Pre-seeding blackboard with inherited context...");
      await this.preseedBlackboard();
    }

    // Initialize message buses for ALPHA and BETA
    this.alphaBus = new MessageBus(this.taskId, "ALPHA");
    this.betaBus = new MessageBus(this.taskId, "BETA");

    await Promise.all([
      this.alphaBus.connect(),
      this.betaBus.connect(),
    ]);

    this.log("Message buses connected");

    // Create initial agents
    this.alphaAgent = new AgentAlpha(
      this.taskId, this.blackboard, this.alphaBus, this.stateManager, this.metrics, this.model
    );
    this.betaAgent = new AgentBeta(
      this.taskId, this.blackboard, this.betaBus, this.stateManager, this.metrics, this.model
    );

    await Promise.all([
      this.alphaAgent.init(),
      this.betaAgent.init(),
    ]);

    this.log("Agents ALPHA and BETA initialized");
  }

  async spawnGamma(reason: SpawnCondition): Promise<void> {
    if (this.gammaAgent) {
      this.log("GAMMA already spawned, skipping");
      return;
    }

    this.log(`SPAWNING GAMMA - Reason: ${reason.type} (threshold: ${reason.threshold}, current: ${reason.current_value})`);

    // Create message bus for GAMMA
    this.gammaBus = new MessageBus(this.taskId, "GAMMA");
    await this.gammaBus.connect();

    // Create and initialize GAMMA agent
    this.gammaAgent = new AgentGamma(
      this.taskId, this.blackboard, this.gammaBus, this.stateManager, this.metrics,
      reason.type, this.model
    );
    await this.gammaAgent.init();

    await this.spawnController.markGammaSpawned(reason);

    this.log("GAMMA agent spawned and initialized");
  }

  private async monitorConditions(): Promise<{ abort: boolean; reason?: string }> {
    // Track progress
    this.updateProgress();

    // Check for iteration timeout (stuck without progress)
    if (this.isStuck()) {
      this.log("TIMEOUT: No progress detected - triggering auto-recovery");
      await this.reportError("iteration_timeout", "high", "Agents stuck without progress");
      return { abort: true, reason: "stuck" };
    }

    // Check for iteration limit exceeded
    if (this.isIterationLimitExceeded()) {
      this.log("LIMIT: Maximum iterations exceeded - triggering auto-recovery");
      await this.reportError("iteration_limit", "high", `Max iterations (${this.maxIterations}) exceeded`);
      return { abort: true, reason: "iteration_limit" };
    }

    // Check stuck condition
    const stuckAgents = await this.stateManager.detectStuckAgents(30);
    if (stuckAgents.length > 0) {
      this.log(`Stuck agents detected: ${stuckAgents.join(", ")}`);
      const condition = await this.spawnController.updateCondition("STUCK", stuckAgents.length);
      if (condition?.triggered) {
        const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions();
        if (shouldSpawn && reason) {
          await this.spawnGamma(reason);
        }
      }
    }

    // Check conflict condition
    const metricsData = await this.metrics.getMetrics();
    const unresolvedConflicts = metricsData.conflicts_detected - metricsData.conflicts_resolved;
    const conflictCondition = await this.spawnController.updateCondition("CONFLICT", unresolvedConflicts);
    if (conflictCondition?.triggered && !this.spawnController.isGammaSpawned()) {
      const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions();
      if (shouldSpawn && reason) {
        await this.spawnGamma(reason);
      }
    }

    // Check complexity condition (from blackboard)
    const analysis = await this.blackboard.read("problem", "analysis");
    if (analysis?.value?.complexity_score) {
      const complexityCondition = await this.spawnController.updateCondition("COMPLEXITY", analysis.value.complexity_score);
      if (complexityCondition?.triggered && !this.spawnController.isGammaSpawned()) {
        const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions();
        if (shouldSpawn && reason) {
          await this.spawnGamma(reason);
        }
      }
    }

    // Log current state
    const states = await this.stateManager.getAllStates();
    const statesSummary = states.map(s => `${s.role}:${s.status}(${(s.progress * 100).toFixed(0)}%)`).join(", ");
    this.log(`Status: ${statesSummary} | Messages: ${metricsData.total_messages} | Conflicts: ${unresolvedConflicts} | Iter: ${this.iterationCount}`);

    return { abort: false };
  }

  async runTask(task: TaskDefinition): Promise<CoordinationMetrics & { abort_reason?: string }> {
    this.log(`Starting task: ${task.objective.slice(0, 60)}...`);
    this.lastProgressTime = Date.now();

    // Write task to blackboard
    await this.blackboard.write("problem", "task_definition", task, "ALPHA");

    // FORCE GAMMA: If this is a recovery run, spawn GAMMA immediately
    if (this.forceGamma && !this.gammaAgent) {
      this.log("FORCE GAMMA MODE: Spawning GAMMA mediator immediately for recovery");
      const forceReason: SpawnCondition = {
        type: "STUCK",
        threshold: 0,
        current_value: 1,
        triggered: true,
        description: "Force-spawned for recovery run"
      };
      await this.spawnGamma(forceReason);
    }

    // Track if we need to abort due to timeout/iteration limit
    let abortReason: string | undefined;

    // Start monitoring with abort detection
    this.monitorInterval = setInterval(async () => {
      const result = await this.monitorConditions();
      if (result.abort && result.reason) {
        abortReason = result.reason;
        if (this.monitorInterval) {
          clearInterval(this.monitorInterval);
        }
      }
    }, 2000);

    // Run agents in parallel
    this.log("Launching ALPHA and BETA in parallel...");

    const alphaPromise = this.alphaAgent.run(task).catch(async e => {
      await this.reportError("agent_failure", "high", `ALPHA error: ${e.message}`);
    });

    const betaPromise = this.betaAgent.run(task).catch(async e => {
      await this.reportError("agent_failure", "high", `BETA error: ${e.message}`);
    });

    // Wait for initial agents to complete (or timeout)
    const timeout = task.timeout_seconds * 1000;
    const timeoutPromise = new Promise<void>(resolve => setTimeout(resolve, timeout));

    await Promise.race([
      Promise.all([alphaPromise, betaPromise]),
      timeoutPromise,
    ]);

    // Check if we were aborted during execution
    if (abortReason) {
      this.log(`Task aborted: ${abortReason}`);
      if (this.monitorInterval) {
        clearInterval(this.monitorInterval);
      }

      // CRITICAL: Dump all agent proposals/analysis to handoff JSON BEFORE revoking tokens
      this.log("Dumping agent handoff data for recovery pipeline...");
      await this.dumpAgentHandoff();

      // Now revoke tokens for stuck agents
      await this.revokeStuckAgentTokens();

      // Get partial metrics and mark as failed
      const partialMetrics = await this.metrics.finalize(false);
      await this.recordFailureContext(abortReason as any, partialMetrics);

      return { ...partialMetrics, abort_reason: abortReason };
    }

    this.log("Initial agents completed or timeout reached");

    // Check if GAMMA needs to be spawned for success validation
    const states = await this.stateManager.getAllStates();
    const bothComplete = states.every(s => s.status === "WAITING" || s.status === "COMPLETED");

    if (bothComplete && !this.spawnController.isGammaSpawned()) {
      await this.spawnController.updateCondition("SUCCESS", 1.0);
      const { shouldSpawn, reason } = await this.spawnController.checkSpawnConditions();
      if (shouldSpawn && reason) {
        await this.spawnGamma(reason);
      }
    }

    // If GAMMA was spawned, run it
    if (this.gammaAgent) {
      this.log("Running GAMMA for resolution...");
      await this.gammaAgent.run(task).catch(async e => {
        await this.reportError("agent_failure", "high", `GAMMA error: ${e.message}`);
      });
    }

    // Stop monitoring
    if (this.monitorInterval) {
      clearInterval(this.monitorInterval);
    }

    // Check consensus
    const consensus = await this.blackboard.checkConsensus("synthesis", ["ALPHA", "BETA"]);
    const consensusAchieved = consensus.reached ||
      (await this.blackboard.read("consensus", "final"))?.value?.achieved === true;

    this.log(`Consensus achieved: ${consensusAchieved}`);

    // Finalize metrics
    const finalMetrics = await this.metrics.finalize(consensusAchieved);

    // If consensus failed, record structured failure context
    if (!consensusAchieved) {
      await this.recordFailureContext("consensus_failed", finalMetrics);
    }

    return finalMetrics;
  }

  async cleanup(): Promise<void> {
    this.log("Cleaning up...");

    if (this.monitorInterval) {
      clearInterval(this.monitorInterval);
    }

    await Promise.all([
      this.alphaBus?.disconnect(),
      this.betaBus?.disconnect(),
      this.gammaBus?.disconnect(),
      this.blackboard?.disconnect(),
      this.stateManager?.disconnect(),
      this.spawnController?.disconnect(),
      this.metrics?.disconnect(),
    ].filter(Boolean));

    this.log("Cleanup complete");
  }

  getTaskId(): string {
    return this.taskId;
  }
}

// =============================================================================
// Performance Analysis
// =============================================================================

export function analyzePerformance(metrics: CoordinationMetrics): void {
  console.log("\n" + "=".repeat(70));
  console.log("PERFORMANCE ANALYSIS");
  console.log("=".repeat(70));

  const duration = metrics.end_time
    ? (new Date(metrics.end_time).getTime() - new Date(metrics.start_time).getTime()) / 1000
    : 0;

  console.log("\nTiming:");
  console.log(`  Duration: ${duration.toFixed(1)}s`);

  console.log("\nCommunication:");
  console.log(`  Total messages: ${metrics.total_messages}`);
  console.log(`  Direct messages: ${metrics.direct_messages}`);
  console.log(`  Blackboard writes: ${metrics.blackboard_writes}`);
  console.log(`  Blackboard reads: ${metrics.blackboard_reads}`);
  console.log(`  Communication overhead: ${((metrics.total_messages + metrics.blackboard_writes) / duration).toFixed(2)} ops/sec`);

  console.log("\nCoordination:");
  console.log(`  Conflicts detected: ${metrics.conflicts_detected}`);
  console.log(`  Conflicts resolved: ${metrics.conflicts_resolved}`);
  console.log(`  Conflict resolution rate: ${metrics.conflicts_detected > 0 ? ((metrics.conflicts_resolved / metrics.conflicts_detected) * 100).toFixed(1) : 100}%`);

  console.log("\nGamma Agent:");
  console.log(`  Spawned: ${metrics.gamma_spawned ? "Yes" : "No"}`);
  if (metrics.gamma_spawned) {
    console.log(`  Spawn reason: ${metrics.gamma_spawn_reason}`);
    console.log(`  Spawn time: ${metrics.gamma_spawn_time}`);
  }

  console.log("\nOutcome:");
  console.log(`  Consensus achieved: ${metrics.final_consensus ? "Yes" : "No"}`);
  console.log(`  Performance score: ${(metrics.performance_score * 100).toFixed(1)}%`);

  // Threshold analysis
  console.log("\nThreshold Effects:");
  const messageThreshold = 50;
  const conflictThreshold = 3;

  if (metrics.total_messages > messageThreshold) {
    console.log(`  ! High message volume (${metrics.total_messages} > ${messageThreshold}) - potential coordination overhead`);
  } else {
    console.log(`  + Message volume within threshold (${metrics.total_messages} <= ${messageThreshold})`);
  }

  if (metrics.conflicts_detected > conflictThreshold) {
    console.log(`  ! High conflict rate (${metrics.conflicts_detected} > ${conflictThreshold}) - agents may have divergent strategies`);
  } else {
    console.log(`  + Conflict rate within threshold (${metrics.conflicts_detected} <= ${conflictThreshold})`);
  }

  if (metrics.gamma_spawned && metrics.gamma_spawn_reason === "STUCK") {
    console.log(`  ! Gamma spawned due to stuck condition - consider adjusting agent strategies`);
  }

  console.log("\n" + "=".repeat(70));
}

// =============================================================================
// CLI Entry Point
// =============================================================================

async function main() {
  const args = process.argv.slice(2);

  // Default complex task
  let objective = args[0] || `Design a distributed event-driven architecture for a real-time analytics platform that handles:
1) High-throughput data ingestion from multiple sources
2) Stream processing with exactly-once semantics
3) Real-time aggregations and windowed computations
4) Low-latency query serving for dashboards
5) Horizontal scalability to handle 1M events/second
The solution should consider fault tolerance, data consistency, and cost optimization.`;

  let model = "anthropic/claude-sonnet-4";
  const modelIdx = args.indexOf("--model");
  if (modelIdx !== -1 && args[modelIdx + 1]) {
    model = args[modelIdx + 1];
  }

  // Parse timeout
  let timeout = 120;
  const timeoutIdx = args.indexOf("--timeout");
  if (timeoutIdx !== -1 && args[timeoutIdx + 1]) {
    timeout = parseInt(args[timeoutIdx + 1]);
  }

  const task: TaskDefinition = {
    task_id: generateId(),
    objective,
    complexity: "high",
    subtasks: [
      { id: "s1", description: "Analyze data ingestion requirements", status: "pending", dependencies: [] },
      { id: "s2", description: "Design stream processing pipeline", status: "pending", dependencies: ["s1"] },
      { id: "s3", description: "Plan storage and query layer", status: "pending", dependencies: ["s1"] },
      { id: "s4", description: "Define scalability strategy", status: "pending", dependencies: ["s2", "s3"] },
      { id: "s5", description: "Integrate fault tolerance mechanisms", status: "pending", dependencies: ["s4"] },
    ],
    constraints: [
      "Must use open-source technologies where possible",
      "Latency < 100ms for query responses",
      "Support for multiple data formats (JSON, Avro, Protobuf)",
      "Cost-effective for variable workloads",
    ],
    success_criteria: [
      "Complete architecture design with component diagrams",
      "Data flow specifications",
      "Scalability analysis",
      "Fault tolerance mechanisms documented",
      "Cost estimation provided",
    ],
    timeout_seconds: timeout,
  };

  const orchestrator = new MultiAgentOrchestrator(model);

  let exitCode = 0;
  try {
    await orchestrator.initialize();
    const metrics = await orchestrator.runTask(task);

    console.log("\n" + "=".repeat(70));
    console.log("FINAL METRICS");
    console.log("=".repeat(70));
    console.log(JSON.stringify(metrics, null, 2));

    analyzePerformance(metrics);

    // Output special marker for server to parse consensus status
    // Format: ORCHESTRATION_RESULT:{"consensus":true/false,"metrics":{...},"abort_reason":...}
    console.log("\nORCHESTRATION_RESULT:" + JSON.stringify({
      consensus: metrics.final_consensus,
      task_id: metrics.task_id,
      metrics: metrics,
      abort_reason: (metrics as any).abort_reason || null,
      requires_auto_recovery: !metrics.final_consensus || !!(metrics as any).abort_reason
    }));

    // Exit codes:
    // 0 = Success (consensus achieved)
    // 1 = Error (crash or exception)
    // 2 = Consensus failure (agents completed but no agreement)
    // 3 = Aborted (timeout/stuck/iteration limit - auto-recovery needed)
    if ((metrics as any).abort_reason) {
      console.log(`\n[ORCHESTRATOR] Task aborted: ${(metrics as any).abort_reason} - exiting with code 3 (auto-recovery needed)`);
      exitCode = 3;

      const pipelineId = process.env.PIPELINE_ID;
      if (pipelineId) {
        await reportErrorToObservability(pipelineId, "orchestrator_aborted", "high",
          `Orchestration aborted: ${(metrics as any).abort_reason}. Auto-recovery required.`);
      }
    } else if (!metrics.final_consensus) {
      console.log("\n[ORCHESTRATOR] Consensus NOT achieved - exiting with code 2 (auto-recovery needed)");
      exitCode = 2;

      // Report consensus failure to observability
      const pipelineId = process.env.PIPELINE_ID;
      if (pipelineId) {
        await reportErrorToObservability(pipelineId, "consensus_failure", "high",
          `Agents failed to reach consensus. Conflicts: ${metrics.conflicts_detected}, Resolved: ${metrics.conflicts_resolved}`);
      }
    }

  } catch (e: any) {
    console.error("Orchestrator error:", e.message);
    exitCode = 1;

    // Report critical error to observability if pipeline ID is set
    const pipelineId = process.env.PIPELINE_ID;
    if (pipelineId) {
      await reportErrorToObservability(pipelineId, "orchestrator_failure", "critical", e.message);
    }
  } finally {
    await orchestrator.cleanup();
    // Explicitly exit to ensure all connections are closed
    process.exit(exitCode);
  }
}

main().catch((e) => {
  console.error("Fatal error:", e);
  process.exit(1);
});