agent-governance/testing/framework.ts

/**
 * Agent Testing Framework
 * =======================
 * Provides mocks, utilities, and test harnesses for agent development.
 *
 * IMPORTANT: By default, tests require real services (Vault, DragonflyDB).
 * Use --use-mocks flag to explicitly enable mock mode.
 *
 * Usage:
 *   bun run framework.ts                    # Requires real services
 *   bun run framework.ts --use-mocks        # Explicitly use mocks
 *   bun run framework.ts --validate-only    # Check services without running tests
 */

import { describe, it, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test";
import { existsSync } from "fs";
import { createClient, type RedisClientType } from "redis";

// =============================================================================
// Test Mode Configuration
// =============================================================================

export enum TestMode {
  REAL = "real",       // Requires real services, fails if unavailable
  MOCK = "mock",       // Uses mocks, clearly logged
  HYBRID = "hybrid",   // Uses real where available, mocks otherwise (logged)
}

export interface TestConfig {
  mode: TestMode;
  vaultAddr: string;
  dragonflyAddr: string;
  requiredAgents: string[];
  requiredFiles: string[];
  verbose: boolean;
}

const DEFAULT_CONFIG: TestConfig = {
  mode: TestMode.REAL,  // Default: require real services
  vaultAddr: process.env.VAULT_ADDR || "https://127.0.0.1:8200",
  dragonflyAddr: process.env.DRAGONFLY_ADDR || "redis://127.0.0.1:6379",
  requiredAgents: [
    "/opt/agent-governance/agents/llm-planner-ts/index.ts",
    "/opt/agent-governance/agents/multi-agent/orchestrator.ts",
  ],
  requiredFiles: [
    "/opt/agent-governance/runtime",
    "/opt/agent-governance/pipeline",
  ],
  verbose: false,
};

// Global config, set by CLI
let globalConfig: TestConfig = { ...DEFAULT_CONFIG };

// =============================================================================
// Service Validation
// =============================================================================

export interface ValidationResult {
  service: string;
  available: boolean;
  error?: string;
  latencyMs?: number;
}

export interface ValidationReport {
  timestamp: string;
  mode: TestMode;
  allServicesAvailable: boolean;
  results: ValidationResult[];
  missingFiles: string[];
  mocksFallback: string[];
}

export async function validateServices(config: TestConfig = globalConfig): Promise<ValidationReport> {
  const results: ValidationResult[] = [];
  const missingFiles: string[] = [];
  const mocksFallback: string[] = [];
  const timestamp = new Date().toISOString();

  // Check Vault
  const vaultResult = await checkVault(config.vaultAddr);
  results.push(vaultResult);
  if (!vaultResult.available && config.mode !== TestMode.REAL) {
    mocksFallback.push("Vault");
  }

  // Check DragonflyDB
  const dragonflyResult = await checkDragonfly(config.dragonflyAddr);
  results.push(dragonflyResult);
  if (!dragonflyResult.available && config.mode !== TestMode.REAL) {
    mocksFallback.push("DragonflyDB");
  }

  // Check required files
  for (const file of [...config.requiredAgents, ...config.requiredFiles]) {
    if (!existsSync(file)) {
      missingFiles.push(file);
    }
  }

  const allServicesAvailable = results.every(r => r.available) && missingFiles.length === 0;

  return {
    timestamp,
    mode: config.mode,
    allServicesAvailable,
    results,
    missingFiles,
    mocksFallback,
  };
}

async function checkVault(addr: string): Promise<ValidationResult> {
  const start = Date.now();
  try {
    // Try to reach Vault health endpoint
    // Use rejectUnauthorized: false for local self-signed certs
    const response = await fetch(`${addr}/v1/sys/health`, {
      method: "GET",
      signal: AbortSignal.timeout(5000),
      // @ts-ignore - Bun supports this for self-signed certs
      tls: { rejectUnauthorized: false },
    });
    return {
      service: "Vault",
      available: response.status === 200 || response.status === 429 || response.status === 472 || response.status === 473,
      latencyMs: Date.now() - start,
    };
  } catch (error: any) {
    // Fallback: try with NODE_TLS_REJECT_UNAUTHORIZED workaround
    try {
      const oldTls = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
      process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
      const response = await fetch(`${addr}/v1/sys/health`, {
        method: "GET",
        signal: AbortSignal.timeout(5000),
      });
      process.env.NODE_TLS_REJECT_UNAUTHORIZED = oldTls;
      return {
        service: "Vault",
        available: response.status === 200 || response.status === 429 || response.status === 472 || response.status === 473,
        latencyMs: Date.now() - start,
      };
    } catch (e2: any) {
      return {
        service: "Vault",
        available: false,
        error: error.message,
        latencyMs: Date.now() - start,
      };
    }
  }
}

async function checkDragonfly(addr: string): Promise<ValidationResult> {
  const start = Date.now();
  try {
    const client = createClient({
      url: addr,
      password: process.env.DRAGONFLY_PASSWORD || "governance2026",
    });
    await client.connect();
    await client.ping();
    await client.disconnect();
    return {
      service: "DragonflyDB",
      available: true,
      latencyMs: Date.now() - start,
    };
  } catch (error: any) {
    return {
      service: "DragonflyDB",
      available: false,
      error: error.message,
      latencyMs: Date.now() - start,
    };
  }
}

// =============================================================================
// Type Definitions
// =============================================================================

export interface TestContext {
  taskId: string;
  agentId: string;
  startTime: number;
  mode: TestMode;
  vault: IVault;
  dragonfly: IDragonfly;
  llm: ILLM;
  _usingMocks: string[];  // Track which services are mocked
}

export interface TestScenario {
  name: string;
  description: string;
  requiresReal?: string[];  // List of services that must be real
  setup: () => Promise<void>;
  execute: (ctx: TestContext) => Promise<void>;
  assertions: (ctx: TestContext) => Promise<void>;
  cleanup: () => Promise<void>;
}

export interface TestMetrics {
  passed: number;
  failed: number;
  skipped: number;
  duration: number;
  coverage: number;
  mocksUsed: string[];
}

// =============================================================================
// Service Interfaces (shared by real and mock implementations)
// =============================================================================

export interface IVault {
  getSecret(path: string): Promise<any>;
  setSecret?(path: string, value: any): void;
  createToken(policy: string, ttl?: number): Promise<string>;
  validateToken(token: string): Promise<boolean>;
  revokeToken(token: string): Promise<void>;
  isMock(): boolean;
}

export interface IDragonfly {
  set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null>;
  get(key: string): Promise<any>;
  del(key: string): Promise<number>;
  exists(key: string): Promise<number>;
  hSet(key: string, field: string, value: any): Promise<number>;
  hGet(key: string, field: string): Promise<any>;
  hGetAll(key: string): Promise<Record<string, any>>;
  hIncrBy(key: string, field: string, increment: number): Promise<number>;
  rPush(key: string, ...values: any[]): Promise<number>;
  lRange(key: string, start: number, stop: number): Promise<any[]>;
  isMock(): boolean;
  disconnect?(): Promise<void>;
}

export interface ILLM {
  complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise<string>;
  isMock(): boolean;
}

// =============================================================================
// Mock Vault
// =============================================================================

export class MockVault implements IVault {
  private secrets: Map<string, any> = new Map();
  private policies: Map<string, string[]> = new Map();
  private tokens: Map<string, { policy: string; ttl: number; created: number }> = new Map();
  private accessLog: Array<{ path: string; action: string; timestamp: number }> = [];

  constructor() {
    // Initialize with default test secrets
    this.secrets.set("api-keys/openrouter", { api_key: "test-key" });
    this.secrets.set("services/dragonfly", {
      host: "127.0.0.1",
      port: 6379,
      password: "test-password",
    });

    // Default policies
    this.policies.set("t0-observer", ["read:secret/data/docs/*", "read:secret/data/inventory/*"]);
    this.policies.set("t1-operator", ["read:ssh/creds/sandbox-*", "read:proxmox/creds/sandbox"]);
  }

  isMock(): boolean { return true; }

  async getSecret(path: string): Promise<any> {
    this.accessLog.push({ path, action: "read", timestamp: Date.now() });
    return this.secrets.get(path) || null;
  }

  setSecret(path: string, value: any): void {
    this.secrets.set(path, value);
  }

  async createToken(policy: string, ttl: number = 3600): Promise<string> {
    const token = "hvs.test-" + Math.random().toString(36).slice(2);
    this.tokens.set(token, { policy, ttl, created: Date.now() });
    return token;
  }

  async validateToken(token: string): Promise<boolean> {
    const tokenData = this.tokens.get(token);
    if (!tokenData) return false;
    const elapsed = (Date.now() - tokenData.created) / 1000;
    return elapsed < tokenData.ttl;
  }

  async revokeToken(token: string): Promise<void> {
    this.tokens.delete(token);
  }

  checkAccess(token: string, path: string): boolean {
    const tokenData = this.tokens.get(token);
    if (!tokenData) return false;
    const allowedPaths = this.policies.get(tokenData.policy) || [];
    return allowedPaths.some(p => {
      const pattern = p.replace("*", ".*");
      return new RegExp(pattern).test(path);
    });
  }

  getAccessLog(): Array<{ path: string; action: string; timestamp: number }> {
    return this.accessLog;
  }

  reset(): void {
    this.accessLog = [];
    this.tokens.clear();
  }
}

// =============================================================================
// Real Vault Client
// =============================================================================

export class RealVault implements IVault {
  private addr: string;
  private token: string | null = null;

  constructor(addr: string) {
    this.addr = addr;
  }

  isMock(): boolean { return false; }

  private async fetch(path: string, options: RequestInit = {}): Promise<Response> {
    const token = this.token || process.env.VAULT_TOKEN;
    return fetch(`${this.addr}${path}`, {
      ...options,
      headers: {
        "X-Vault-Token": token || "",
        "Content-Type": "application/json",
        ...options.headers,
      },
      // @ts-ignore - Bun supports this for self-signed certs
      tls: { rejectUnauthorized: false },
    });
  }

  async getSecret(path: string): Promise<any> {
    try {
      const response = await this.fetch(`/v1/secret/data/${path}`);
      if (!response.ok) return null;
      const data = await response.json();
      return data?.data?.data || null;
    } catch {
      return null;
    }
  }

  async createToken(policy: string, ttl: number = 3600): Promise<string> {
    const response = await this.fetch("/v1/auth/token/create", {
      method: "POST",
      body: JSON.stringify({
        policies: [policy],
        ttl: `${ttl}s`,
      }),
    });
    const data = await response.json();
    return data?.auth?.client_token || "";
  }

  async validateToken(token: string): Promise<boolean> {
    try {
      const response = await this.fetch("/v1/auth/token/lookup-self", {
        headers: { "X-Vault-Token": token },
      });
      return response.ok;
    } catch {
      return false;
    }
  }

  async revokeToken(token: string): Promise<void> {
    await this.fetch("/v1/auth/token/revoke-self", {
      method: "POST",
      headers: { "X-Vault-Token": token },
    });
  }

  async testConnection(): Promise<boolean> {
    try {
      const response = await this.fetch("/v1/sys/health");
      return response.ok;
    } catch {
      return false;
    }
  }
}

// =============================================================================
// Mock DragonflyDB
// =============================================================================

export class MockDragonfly implements IDragonfly {
  private store: Map<string, any> = new Map();
  private hashes: Map<string, Map<string, any>> = new Map();
  private lists: Map<string, any[]> = new Map();
  private expirations: Map<string, number> = new Map();
  private subscribers: Map<string, Array<(msg: string) => void>> = new Map();

  isMock(): boolean { return true; }

  // String operations
  async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null> {
    if (options?.NX && this.store.has(key)) return null;
    this.store.set(key, value);
    if (options?.EX) {
      this.expirations.set(key, Date.now() + options.EX * 1000);
    }
    return "OK";
  }

  async get(key: string): Promise<any> {
    if (this.isExpired(key)) {
      this.store.delete(key);
      return null;
    }
    return this.store.get(key) || null;
  }

  async del(key: string): Promise<number> {
    return this.store.delete(key) ? 1 : 0;
  }

  async exists(key: string): Promise<number> {
    return this.store.has(key) && !this.isExpired(key) ? 1 : 0;
  }

  async expire(key: string, seconds: number): Promise<boolean> {
    if (!this.store.has(key)) return false;
    this.expirations.set(key, Date.now() + seconds * 1000);
    return true;
  }

  // Hash operations
  async hSet(key: string, field: string, value: any): Promise<number> {
    if (!this.hashes.has(key)) this.hashes.set(key, new Map());
    const existed = this.hashes.get(key)!.has(field);
    this.hashes.get(key)!.set(field, value);
    return existed ? 0 : 1;
  }

  async hGet(key: string, field: string): Promise<any> {
    return this.hashes.get(key)?.get(field) || null;
  }

  async hGetAll(key: string): Promise<Record<string, any>> {
    const hash = this.hashes.get(key);
    if (!hash) return {};
    const result: Record<string, any> = {};
    for (const [k, v] of hash) {
      result[k] = v;
    }
    return result;
  }

  async hIncrBy(key: string, field: string, increment: number): Promise<number> {
    if (!this.hashes.has(key)) this.hashes.set(key, new Map());
    const current = parseInt(this.hashes.get(key)!.get(field) || "0");
    const newValue = current + increment;
    this.hashes.get(key)!.set(field, newValue.toString());
    return newValue;
  }

  // List operations
  async rPush(key: string, ...values: any[]): Promise<number> {
    if (!this.lists.has(key)) this.lists.set(key, []);
    this.lists.get(key)!.push(...values);
    return this.lists.get(key)!.length;
  }

  async lRange(key: string, start: number, stop: number): Promise<any[]> {
    const list = this.lists.get(key) || [];
    if (stop < 0) stop = list.length + stop + 1;
    return list.slice(start, stop);
  }

  // Pub/Sub
  async subscribe(channel: string, handler: (msg: string) => void): Promise<void> {
    if (!this.subscribers.has(channel)) this.subscribers.set(channel, []);
    this.subscribers.get(channel)!.push(handler);
  }

  async publish(channel: string, message: string): Promise<number> {
    const handlers = this.subscribers.get(channel) || [];
    handlers.forEach(h => h(message));
    return handlers.length;
  }

  async unsubscribe(channel?: string): Promise<void> {
    if (channel) {
      this.subscribers.delete(channel);
    } else {
      this.subscribers.clear();
    }
  }

  // Utility
  private isExpired(key: string): boolean {
    const expiry = this.expirations.get(key);
    if (!expiry) return false;
    return Date.now() > expiry;
  }

  reset(): void {
    this.store.clear();
    this.hashes.clear();
    this.lists.clear();
    this.expirations.clear();
    this.subscribers.clear();
  }

  // Test helpers
  getState(): { store: Map<string, any>; hashes: Map<string, Map<string, any>>; lists: Map<string, any[]> } {
    return { store: this.store, hashes: this.hashes, lists: this.lists };
  }
}

// =============================================================================
// Real DragonflyDB Client
// =============================================================================

export class RealDragonfly implements IDragonfly {
  private client: RedisClientType;
  private connected: boolean = false;

  constructor(url: string, password?: string) {
    this.client = createClient({
      url,
      password: password || process.env.DRAGONFLY_PASSWORD || "governance2026",
    });
  }

  isMock(): boolean { return false; }

  async connect(): Promise<void> {
    if (!this.connected) {
      await this.client.connect();
      this.connected = true;
    }
  }

  async disconnect(): Promise<void> {
    if (this.connected) {
      await this.client.disconnect();
      this.connected = false;
    }
  }

  async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null> {
    await this.connect();
    const val = typeof value === "string" ? value : JSON.stringify(value);
    const opts: any = {};
    if (options?.EX) opts.EX = options.EX;
    if (options?.NX) opts.NX = true;
    return this.client.set(key, val, opts);
  }

  async get(key: string): Promise<any> {
    await this.connect();
    return this.client.get(key);
  }

  async del(key: string): Promise<number> {
    await this.connect();
    return this.client.del(key);
  }

  async exists(key: string): Promise<number> {
    await this.connect();
    return this.client.exists(key);
  }

  async hSet(key: string, field: string, value: any): Promise<number> {
    await this.connect();
    const val = typeof value === "string" ? value : JSON.stringify(value);
    return this.client.hSet(key, field, val);
  }

  async hGet(key: string, field: string): Promise<any> {
    await this.connect();
    return this.client.hGet(key, field);
  }

  async hGetAll(key: string): Promise<Record<string, any>> {
    await this.connect();
    return this.client.hGetAll(key);
  }

  async hIncrBy(key: string, field: string, increment: number): Promise<number> {
    await this.connect();
    return this.client.hIncrBy(key, field, increment);
  }

  async rPush(key: string, ...values: any[]): Promise<number> {
    await this.connect();
    return this.client.rPush(key, values.map(v => typeof v === "string" ? v : JSON.stringify(v)));
  }

  async lRange(key: string, start: number, stop: number): Promise<any[]> {
    await this.connect();
    return this.client.lRange(key, start, stop);
  }
}

// =============================================================================
// Mock LLM
// =============================================================================

export class MockLLM implements ILLM {
  private responses: Map<string, string> = new Map();
  private callLog: Array<{ prompt: string; response: string; timestamp: number }> = [];
  private defaultResponse: string = '{"confidence": 0.5, "steps": [], "assumptions": []}';
  private latencyMs: number = 0;
  private failureRate: number = 0;

  isMock(): boolean { return true; }

  setResponse(pattern: string, response: string): void {
    this.responses.set(pattern, response);
  }

  setDefaultResponse(response: string): void {
    this.defaultResponse = response;
  }

  setLatency(ms: number): void {
    this.latencyMs = ms;
  }

  setFailureRate(rate: number): void {
    this.failureRate = rate;
  }

  async complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise<string> {
    // Simulate latency
    if (this.latencyMs > 0) {
      await new Promise(r => setTimeout(r, this.latencyMs));
    }

    // Simulate failures
    if (Math.random() < this.failureRate) {
      throw new Error("LLM_ERROR: Simulated failure");
    }

    // Find matching response
    let response = this.defaultResponse;
    for (const [pattern, resp] of this.responses) {
      if (prompt.toLowerCase().includes(pattern.toLowerCase())) {
        response = resp;
        break;
      }
    }

    this.callLog.push({ prompt, response, timestamp: Date.now() });
    return response;
  }

  getCallLog(): Array<{ prompt: string; response: string; timestamp: number }> {
    return this.callLog;
  }

  getCallCount(): number {
    return this.callLog.length;
  }

  reset(): void {
    this.callLog = [];
    this.latencyMs = 0;
    this.failureRate = 0;
  }
}

// =============================================================================
// Test Utilities
// =============================================================================

/**
 * Create a test context. By default, requires real services.
 * Pass mode: TestMode.MOCK to explicitly use mocks.
 */
export async function createTestContext(
  options?: Partial<TestContext> & { mode?: TestMode }
): Promise<TestContext> {
  const mode = options?.mode ?? globalConfig.mode;
  const usingMocks: string[] = [];

  // Validate services if not in mock mode
  if (mode === TestMode.REAL) {
    const report = await validateServices(globalConfig);
    if (!report.allServicesAvailable) {
      const errors: string[] = [];
      for (const r of report.results) {
        if (!r.available) {
          errors.push(`${r.service}: ${r.error || "unavailable"}`);
        }
      }
      for (const f of report.missingFiles) {
        errors.push(`Missing file: ${f}`);
      }
      throw new Error(
        `REAL mode requires all services. Missing:\n  - ${errors.join("\n  - ")}\n\n` +
        `Use --use-mocks to explicitly enable mock mode.`
      );
    }
  }

  // Create Vault (real or mock)
  let vault: IVault;
  if (mode === TestMode.MOCK) {
    vault = new MockVault();
    usingMocks.push("Vault");
  } else {
    // Try to use real Vault
    const vaultToken = process.env.VAULT_TOKEN;
    if (vaultToken) {
      try {
        const realVault = new RealVault(globalConfig.vaultAddr);
        const connected = await realVault.testConnection();
        if (connected) {
          vault = realVault;
        } else {
          throw new Error("Vault connection test failed");
        }
      } catch (e) {
        if (mode === TestMode.HYBRID) {
          vault = new MockVault();
          usingMocks.push("Vault");
        } else {
          throw e;
        }
      }
    } else if (mode === TestMode.HYBRID) {
      vault = new MockVault();
      usingMocks.push("Vault");
    } else {
      // In REAL mode without token, still use mock but note it
      vault = new MockVault();
      usingMocks.push("Vault");
    }
  }

  // Create Dragonfly (real or mock)
  let dragonfly: IDragonfly;
  if (mode === TestMode.MOCK) {
    dragonfly = new MockDragonfly();
    usingMocks.push("DragonflyDB");
  } else {
    try {
      const realDragonfly = new RealDragonfly(globalConfig.dragonflyAddr);
      await realDragonfly.connect();
      dragonfly = realDragonfly;
    } catch (e) {
      if (mode === TestMode.HYBRID) {
        dragonfly = new MockDragonfly();
        usingMocks.push("DragonflyDB");
      } else {
        throw e;
      }
    }
  }

  // LLM is always mocked unless we have real API key
  const llm = new MockLLM();
  usingMocks.push("LLM");

  return {
    taskId: "test-task-" + Math.random().toString(36).slice(2, 8),
    agentId: "test-agent-" + Math.random().toString(36).slice(2, 8),
    startTime: Date.now(),
    mode,
    vault,
    dragonfly,
    llm,
    _usingMocks: usingMocks,
    ...options,
  };
}

export async function withTimeout<T>(promise: Promise<T>, ms: number, message: string = "Timeout"): Promise<T> {
  const timeout = new Promise<never>((_, reject) => {
    setTimeout(() => reject(new Error(message)), ms);
  });
  return Promise.race([promise, timeout]);
}

export function generateInstructionPacket(taskId: string, agentId: string, objective: string) {
  return {
    agent_id: agentId,
    task_id: taskId,
    created_for: "Test Task",
    objective,
    deliverables: ["plan", "report"],
    constraints: {
      scope: ["sandbox only"],
      forbidden: ["no prod access"],
      required_steps: ["plan before execute"],
    },
    success_criteria: ["plan generated"],
    error_budget: {
      max_total_errors: 5,
      max_same_error_repeats: 2,
      max_procedure_violations: 1,
    },
    escalation_rules: ["If confidence < 0.7 -> escalate"],
    created_at: new Date().toISOString(),
  };
}

// =============================================================================
// Test Harness
// =============================================================================

export class TestHarness {
  private scenarios: TestScenario[] = [];
  private results: Map<string, { passed: boolean; error?: string; duration: number; mocks: string[] }> = new Map();
  private config: TestConfig;

  constructor(config?: Partial<TestConfig>) {
    this.config = { ...globalConfig, ...config };
  }

  addScenario(scenario: TestScenario): void {
    this.scenarios.push(scenario);
  }

  async runAll(): Promise<TestMetrics> {
    const startTime = Date.now();
    let passed = 0;
    let failed = 0;
    let skipped = 0;
    const allMocksUsed: Set<string> = new Set();

    // Print mode banner
    this.printModeBanner();

    for (const scenario of this.scenarios) {
      const scenarioStart = Date.now();

      // Check if scenario requires real services
      if (scenario.requiresReal && scenario.requiresReal.length > 0 && this.config.mode === TestMode.MOCK) {
        console.log(`\n[SKIP] ${scenario.name} (requires real: ${scenario.requiresReal.join(", ")})`);
        skipped++;
        continue;
      }

      try {
        console.log(`\n[TEST] Running: ${scenario.name}`);

        const ctx = await createTestContext({ mode: this.config.mode });

        // Log mocks in use for this test
        if (ctx._usingMocks.length > 0) {
          console.log(`       [MOCKS: ${ctx._usingMocks.join(", ")}]`);
          ctx._usingMocks.forEach(m => allMocksUsed.add(m));
        }

        await scenario.setup();
        await scenario.execute(ctx);
        await scenario.assertions(ctx);
        await scenario.cleanup();

        // Cleanup real connections
        if (ctx.dragonfly.disconnect) {
          await ctx.dragonfly.disconnect();
        }

        this.results.set(scenario.name, {
          passed: true,
          duration: Date.now() - scenarioStart,
          mocks: ctx._usingMocks,
        });
        passed++;
        console.log(`[PASS] ${scenario.name} (${Date.now() - scenarioStart}ms)`);
      } catch (error: any) {
        this.results.set(scenario.name, {
          passed: false,
          error: error.message,
          duration: Date.now() - scenarioStart,
          mocks: [],
        });
        failed++;
        console.log(`[FAIL] ${scenario.name}: ${error.message}`);

        try {
          await scenario.cleanup();
        } catch {}
      }
    }

    return {
      passed,
      failed,
      skipped,
      duration: Date.now() - startTime,
      coverage: this.scenarios.length > 0 ? (passed / (this.scenarios.length - skipped)) * 100 : 0,
      mocksUsed: Array.from(allMocksUsed),
    };
  }

  private printModeBanner(): void {
    console.log("\n" + "=".repeat(60));
    if (this.config.mode === TestMode.MOCK) {
      console.log("⚠️  MOCK MODE ENABLED");
      console.log("    Tests are running against MOCK services.");
      console.log("    Results may not reflect real system behavior.");
      console.log("    Remove --use-mocks to test against real services.");
    } else if (this.config.mode === TestMode.HYBRID) {
      console.log("⚠️  HYBRID MODE");
      console.log("    Using real services where available, mocks otherwise.");
      console.log("    Check individual test output for mock usage.");
    } else {
      console.log("✅ REAL MODE");
      console.log("   Tests are running against REAL services.");
    }
    console.log("=".repeat(60));
  }

  getResults(): Map<string, { passed: boolean; error?: string; duration: number; mocks: string[] }> {
    return this.results;
  }
}

// =============================================================================
// Pre-built Test Scenarios
// =============================================================================

export const CommonScenarios = {
  // Happy path - agent completes successfully
  happyPath: (AgentClass: any): TestScenario => ({
    name: "Happy Path - Successful Completion",
    description: "Agent completes all phases without errors",
    setup: async () => {},
    execute: async (ctx) => {
      // Only works with mocks - log warning if real
      if (!ctx.llm.isMock()) {
        console.log("       [WARN] This scenario requires mock LLM to control responses");
      }

      // Set up successful LLM responses
      if (ctx.llm.isMock()) {
        (ctx.llm as MockLLM).setResponse("plan", JSON.stringify({
          title: "Test Plan",
          confidence: 0.85,
          steps: [{ step: 1, action: "Test action" }],
        }));
      }

      // Create instruction packet
      const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
      await ctx.dragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));
    },
    assertions: async (ctx) => {
      // Check state reached EXIT
      const stateStr = await ctx.dragonfly.get(`agent:${ctx.agentId}:state`);
      if (stateStr) {
        const state = JSON.parse(stateStr);
        expect(state.phase).toBe("EXIT");
        expect(state.status).toBe("COMPLETED");
      }
    },
    cleanup: async () => {},
  }),

  // Error budget exceeded
  errorBudgetExceeded: (AgentClass: any): TestScenario => ({
    name: "Error Budget Exceeded - Revocation",
    description: "Agent is revoked when error budget is exceeded",
    setup: async () => {},
    execute: async (ctx) => {
      if (ctx.llm.isMock()) {
        (ctx.llm as MockLLM).setFailureRate(1.0); // All LLM calls fail
      }

      const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
      packet.error_budget.max_total_errors = 2;
      await ctx.dragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));

      // Simulate errors
      await ctx.dragonfly.hIncrBy(`agent:${ctx.agentId}:errors`, "total_errors", 3);
    },
    assertions: async (ctx) => {
      const stateStr = await ctx.dragonfly.get(`agent:${ctx.agentId}:state`);
      if (stateStr) {
        const state = JSON.parse(stateStr);
        expect(state.status).toBe("REVOKED");
      }
    },
    cleanup: async () => {},
  }),

  // Stuck detection
  stuckDetection: (): TestScenario => ({
    name: "Stuck Detection - GAMMA Spawn",
    description: "GAMMA is spawned when agents are stuck",
    setup: async () => {},
    execute: async (ctx) => {
      // Set up agent state as stuck (old last_activity)
      const stuckState = {
        agent_id: ctx.agentId,
        role: "ALPHA",
        status: "WORKING",
        last_activity: new Date(Date.now() - 60000).toISOString(), // 60 seconds ago
      };
      await ctx.dragonfly.hSet(`agents:${ctx.taskId}`, "ALPHA", JSON.stringify(stuckState));
    },
    assertions: async (ctx) => {
      // Check that stuck would be detected
      const stateStr = await ctx.dragonfly.hGet(`agents:${ctx.taskId}`, "ALPHA");
      if (stateStr) {
        const state = JSON.parse(stateStr);
        const inactivity = (Date.now() - new Date(state.last_activity).getTime()) / 1000;
        expect(inactivity).toBeGreaterThan(30);
      }
    },
    cleanup: async () => {},
  }),

  // Conflict resolution
  conflictResolution: (): TestScenario => ({
    name: "Conflict Resolution",
    description: "Multiple proposals lead to conflict detection",
    setup: async () => {},
    execute: async (ctx) => {
      // Simulate conflicting proposals
      await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_1", JSON.stringify({
        author: "ALPHA",
        value: { approach: "Approach A", confidence: 0.8 },
      }));
      await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_2", JSON.stringify({
        author: "ALPHA",
        value: { approach: "Approach B", confidence: 0.7 },
      }));

      // BETA rejects proposal_2
      await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:progress`, "eval_proposal_2", JSON.stringify({
        accepted: false,
        score: 0.5,
      }));

      await ctx.dragonfly.hIncrBy(`metrics:${ctx.taskId}`, "conflicts_detected", 1);
    },
    assertions: async (ctx) => {
      const conflicts = await ctx.dragonfly.hGet(`metrics:${ctx.taskId}`, "conflicts_detected");
      expect(parseInt(conflicts || "0")).toBeGreaterThan(0);
    },
    cleanup: async () => {},
  }),

  // Real service connectivity test
  realServiceConnectivity: (): TestScenario => ({
    name: "Real Service Connectivity",
    description: "Verify connection to real Vault and DragonflyDB",
    requiresReal: ["Vault", "DragonflyDB"],
    setup: async () => {},
    execute: async (ctx) => {
      if (ctx.vault.isMock() || ctx.dragonfly.isMock()) {
        throw new Error("This test requires real services, but mocks are in use");
      }

      // Test DragonflyDB
      const testKey = `test:connectivity:${Date.now()}`;
      await ctx.dragonfly.set(testKey, "test-value");
      const value = await ctx.dragonfly.get(testKey);
      await ctx.dragonfly.del(testKey);

      if (value !== "test-value") {
        throw new Error(`DragonflyDB read/write failed: expected 'test-value', got '${value}'`);
      }
    },
    assertions: async (ctx) => {
      // If we got here, services are working
      expect(ctx.vault.isMock()).toBe(false);
      expect(ctx.dragonfly.isMock()).toBe(false);
    },
    cleanup: async () => {},
  }),
};

// =============================================================================
// Example Test Suite
// =============================================================================

export function runExampleTests() {
  describe("Agent Governance Tests", () => {
    let ctx: TestContext;

    beforeEach(async () => {
      ctx = await createTestContext({ mode: TestMode.MOCK });
    });

    describe("MockVault", () => {
      it("should store and retrieve secrets", async () => {
        (ctx.vault as MockVault).setSecret("test/secret", { key: "value" });
        const secret = await ctx.vault.getSecret("test/secret");
        expect(secret.key).toBe("value");
      });

      it("should create and validate tokens", async () => {
        const token = await ctx.vault.createToken("t0-observer", 60);
        expect(token).toStartWith("hvs.test-");
        expect(await ctx.vault.validateToken(token)).toBe(true);
      });

      it("should revoke tokens", async () => {
        const token = await ctx.vault.createToken("t0-observer");
        await ctx.vault.revokeToken(token);
        expect(await ctx.vault.validateToken(token)).toBe(false);
      });
    });

    describe("MockDragonfly", () => {
      it("should handle string operations", async () => {
        await ctx.dragonfly.set("key", "value");
        expect(await ctx.dragonfly.get("key")).toBe("value");
      });

      it("should handle hash operations", async () => {
        await ctx.dragonfly.hSet("hash", "field", "value");
        expect(await ctx.dragonfly.hGet("hash", "field")).toBe("value");
      });

      it("should handle list operations", async () => {
        await (ctx.dragonfly as MockDragonfly).rPush("list", "a", "b", "c");
        const items = await ctx.dragonfly.lRange("list", 0, -1);
        expect(items).toEqual(["a", "b", "c"]);
      });

      it("should handle NX option", async () => {
        await ctx.dragonfly.set("existing", "first");
        const result = await ctx.dragonfly.set("existing", "second", { NX: true });
        expect(result).toBeNull();
        expect(await ctx.dragonfly.get("existing")).toBe("first");
      });
    });

    describe("MockLLM", () => {
      it("should return default response", async () => {
        const response = await ctx.llm.complete("test prompt");
        expect(response).toContain("confidence");
      });

      it("should match patterns", async () => {
        (ctx.llm as MockLLM).setResponse("terraform", '{"tool": "terraform"}');
        const response = await ctx.llm.complete("Create a terraform plan");
        expect(response).toContain("terraform");
      });

      it("should simulate failures", async () => {
        (ctx.llm as MockLLM).setFailureRate(1.0);
        expect(ctx.llm.complete("test")).rejects.toThrow("LLM_ERROR");
      });

      it("should track call count", async () => {
        await ctx.llm.complete("prompt 1");
        await ctx.llm.complete("prompt 2");
        expect((ctx.llm as MockLLM).getCallCount()).toBe(2);
      });
    });

    describe("Instruction Packets", () => {
      it("should generate valid packets", () => {
        const packet = generateInstructionPacket("task-1", "agent-1", "Test objective");
        expect(packet.agent_id).toBe("agent-1");
        expect(packet.task_id).toBe("task-1");
        expect(packet.error_budget.max_total_errors).toBe(5);
      });
    });
  });
}

// =============================================================================
// CLI
// =============================================================================

function parseArgs(): { mode: TestMode; validateOnly: boolean; verbose: boolean } {
  const args = process.argv.slice(2);
  let mode = TestMode.REAL;
  let validateOnly = false;
  let verbose = false;

  for (const arg of args) {
    if (arg === "--use-mocks") {
      mode = TestMode.MOCK;
    } else if (arg === "--hybrid") {
      mode = TestMode.HYBRID;
    } else if (arg === "--validate-only") {
      validateOnly = true;
    } else if (arg === "-v" || arg === "--verbose") {
      verbose = true;
    }
  }

  return { mode, validateOnly, verbose };
}

if (import.meta.main) {
  const { mode, validateOnly, verbose } = parseArgs();
  globalConfig.mode = mode;
  globalConfig.verbose = verbose;

  console.log("Agent Testing Framework");
  console.log("=======================\n");

  if (validateOnly) {
    // Just validate services
    console.log("Validating services...\n");
    validateServices(globalConfig).then(report => {
      console.log(`Timestamp: ${report.timestamp}`);
      console.log(`Mode: ${report.mode}\n`);

      for (const r of report.results) {
        const icon = r.available ? "✅" : "❌";
        console.log(`${icon} ${r.service}: ${r.available ? "available" : r.error} (${r.latencyMs}ms)`);
      }

      if (report.missingFiles.length > 0) {
        console.log("\n❌ Missing files:");
        report.missingFiles.forEach(f => console.log(`   - ${f}`));
      }

      console.log(`\n${report.allServicesAvailable ? "✅ All services available" : "❌ Some services unavailable"}`);

      if (!report.allServicesAvailable && mode === TestMode.REAL) {
        console.log("\n⚠️  Use --use-mocks to run tests with mock services");
        process.exit(1);
      }
    });
  } else {
    // Run tests
    const harness = new TestHarness({ mode });

    // Add pre-built scenarios
    harness.addScenario(CommonScenarios.happyPath(null));
    harness.addScenario(CommonScenarios.errorBudgetExceeded(null));
    harness.addScenario(CommonScenarios.stuckDetection());
    harness.addScenario(CommonScenarios.conflictResolution());

    // Add real service test if in real mode
    if (mode === TestMode.REAL) {
      harness.addScenario(CommonScenarios.realServiceConnectivity());
    }

    harness.runAll().then(metrics => {
      console.log("\n" + "=".repeat(60));
      console.log("TEST RESULTS");
      console.log("=".repeat(60));
      console.log(`Mode:     ${mode.toUpperCase()}`);
      console.log(`Passed:   ${metrics.passed}`);
      console.log(`Failed:   ${metrics.failed}`);
      console.log(`Skipped:  ${metrics.skipped}`);
      console.log(`Duration: ${metrics.duration}ms`);
      console.log(`Coverage: ${metrics.coverage.toFixed(1)}%`);

      if (metrics.mocksUsed.length > 0) {
        console.log(`\n⚠️  Mocks used: ${metrics.mocksUsed.join(", ")}`);
      }

      console.log("=".repeat(60));

      process.exit(metrics.failed > 0 ? 1 : 0);
    }).catch(error => {
      console.error("\n❌ Test harness failed:", error.message);
      if (mode === TestMode.REAL) {
        console.log("\n💡 Tip: Use --use-mocks to run with mock services");
      }
      process.exit(1);
    });
  }
}