agent-governance/testing/framework.ts

/**
 * Agent Testing Framework
 * =======================
 * Provides mocks, utilities, and test harnesses for agent development.
 */

import { describe, it, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test";

// =============================================================================
// Type Definitions
// =============================================================================

export interface TestContext {
  taskId: string;
  agentId: string;
  startTime: number;
  mockVault: MockVault;
  mockDragonfly: MockDragonfly;
  mockLLM: MockLLM;
}

export interface TestScenario {
  name: string;
  description: string;
  setup: () => Promise<void>;
  execute: (ctx: TestContext) => Promise<void>;
  assertions: (ctx: TestContext) => Promise<void>;
  cleanup: () => Promise<void>;
}

export interface TestMetrics {
  passed: number;
  failed: number;
  skipped: number;
  duration: number;
  coverage: number;
}

// =============================================================================
// Mock Vault
// =============================================================================

export class MockVault {
  private secrets: Map<string, any> = new Map();
  private policies: Map<string, string[]> = new Map();
  private tokens: Map<string, { policy: string; ttl: number; created: number }> = new Map();
  private accessLog: Array<{ path: string; action: string; timestamp: number }> = [];

  constructor() {
    // Initialize with default test secrets
    this.secrets.set("api-keys/openrouter", { api_key: "test-key" });
    this.secrets.set("services/dragonfly", {
      host: "127.0.0.1",
      port: 6379,
      password: "test-password",
    });

    // Default policies
    this.policies.set("t0-observer", ["read:secret/data/docs/*", "read:secret/data/inventory/*"]);
    this.policies.set("t1-operator", ["read:ssh/creds/sandbox-*", "read:proxmox/creds/sandbox"]);
  }

  async getSecret(path: string): Promise<any> {
    this.accessLog.push({ path, action: "read", timestamp: Date.now() });
    return this.secrets.get(path) || null;
  }

  setSecret(path: string, value: any): void {
    this.secrets.set(path, value);
  }

  async createToken(policy: string, ttl: number = 3600): Promise<string> {
    const token = "hvs.test-" + Math.random().toString(36).slice(2);
    this.tokens.set(token, { policy, ttl, created: Date.now() });
    return token;
  }

  async validateToken(token: string): Promise<boolean> {
    const tokenData = this.tokens.get(token);
    if (!tokenData) return false;
    const elapsed = (Date.now() - tokenData.created) / 1000;
    return elapsed < tokenData.ttl;
  }

  async revokeToken(token: string): Promise<void> {
    this.tokens.delete(token);
  }

  checkAccess(token: string, path: string): boolean {
    const tokenData = this.tokens.get(token);
    if (!tokenData) return false;
    const allowedPaths = this.policies.get(tokenData.policy) || [];
    return allowedPaths.some(p => {
      const pattern = p.replace("*", ".*");
      return new RegExp(pattern).test(path);
    });
  }

  getAccessLog(): Array<{ path: string; action: string; timestamp: number }> {
    return this.accessLog;
  }

  reset(): void {
    this.accessLog = [];
    this.tokens.clear();
  }
}

// =============================================================================
// Mock DragonflyDB
// =============================================================================

export class MockDragonfly {
  private store: Map<string, any> = new Map();
  private hashes: Map<string, Map<string, any>> = new Map();
  private lists: Map<string, any[]> = new Map();
  private expirations: Map<string, number> = new Map();
  private subscribers: Map<string, Array<(msg: string) => void>> = new Map();

  // String operations
  async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null> {
    if (options?.NX && this.store.has(key)) return null;
    this.store.set(key, value);
    if (options?.EX) {
      this.expirations.set(key, Date.now() + options.EX * 1000);
    }
    return "OK";
  }

  async get(key: string): Promise<any> {
    if (this.isExpired(key)) {
      this.store.delete(key);
      return null;
    }
    return this.store.get(key) || null;
  }

  async del(key: string): Promise<number> {
    return this.store.delete(key) ? 1 : 0;
  }

  async exists(key: string): Promise<number> {
    return this.store.has(key) && !this.isExpired(key) ? 1 : 0;
  }

  async expire(key: string, seconds: number): Promise<boolean> {
    if (!this.store.has(key)) return false;
    this.expirations.set(key, Date.now() + seconds * 1000);
    return true;
  }

  // Hash operations
  async hSet(key: string, field: string, value: any): Promise<number> {
    if (!this.hashes.has(key)) this.hashes.set(key, new Map());
    const existed = this.hashes.get(key)!.has(field);
    this.hashes.get(key)!.set(field, value);
    return existed ? 0 : 1;
  }

  async hGet(key: string, field: string): Promise<any> {
    return this.hashes.get(key)?.get(field) || null;
  }

  async hGetAll(key: string): Promise<Record<string, any>> {
    const hash = this.hashes.get(key);
    if (!hash) return {};
    const result: Record<string, any> = {};
    for (const [k, v] of hash) {
      result[k] = v;
    }
    return result;
  }

  async hIncrBy(key: string, field: string, increment: number): Promise<number> {
    if (!this.hashes.has(key)) this.hashes.set(key, new Map());
    const current = parseInt(this.hashes.get(key)!.get(field) || "0");
    const newValue = current + increment;
    this.hashes.get(key)!.set(field, newValue.toString());
    return newValue;
  }

  // List operations
  async rPush(key: string, ...values: any[]): Promise<number> {
    if (!this.lists.has(key)) this.lists.set(key, []);
    this.lists.get(key)!.push(...values);
    return this.lists.get(key)!.length;
  }

  async lRange(key: string, start: number, stop: number): Promise<any[]> {
    const list = this.lists.get(key) || [];
    if (stop < 0) stop = list.length + stop + 1;
    return list.slice(start, stop);
  }

  // Pub/Sub
  async subscribe(channel: string, handler: (msg: string) => void): Promise<void> {
    if (!this.subscribers.has(channel)) this.subscribers.set(channel, []);
    this.subscribers.get(channel)!.push(handler);
  }

  async publish(channel: string, message: string): Promise<number> {
    const handlers = this.subscribers.get(channel) || [];
    handlers.forEach(h => h(message));
    return handlers.length;
  }

  async unsubscribe(channel?: string): Promise<void> {
    if (channel) {
      this.subscribers.delete(channel);
    } else {
      this.subscribers.clear();
    }
  }

  // Utility
  private isExpired(key: string): boolean {
    const expiry = this.expirations.get(key);
    if (!expiry) return false;
    return Date.now() > expiry;
  }

  reset(): void {
    this.store.clear();
    this.hashes.clear();
    this.lists.clear();
    this.expirations.clear();
    this.subscribers.clear();
  }

  // Test helpers
  getState(): { store: Map<string, any>; hashes: Map<string, Map<string, any>>; lists: Map<string, any[]> } {
    return { store: this.store, hashes: this.hashes, lists: this.lists };
  }
}

// =============================================================================
// Mock LLM
// =============================================================================

export class MockLLM {
  private responses: Map<string, string> = new Map();
  private callLog: Array<{ prompt: string; response: string; timestamp: number }> = [];
  private defaultResponse: string = '{"confidence": 0.5, "steps": [], "assumptions": []}';
  private latencyMs: number = 0;
  private failureRate: number = 0;

  setResponse(pattern: string, response: string): void {
    this.responses.set(pattern, response);
  }

  setDefaultResponse(response: string): void {
    this.defaultResponse = response;
  }

  setLatency(ms: number): void {
    this.latencyMs = ms;
  }

  setFailureRate(rate: number): void {
    this.failureRate = rate;
  }

  async complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise<string> {
    // Simulate latency
    if (this.latencyMs > 0) {
      await new Promise(r => setTimeout(r, this.latencyMs));
    }

    // Simulate failures
    if (Math.random() < this.failureRate) {
      throw new Error("LLM_ERROR: Simulated failure");
    }

    // Find matching response
    let response = this.defaultResponse;
    for (const [pattern, resp] of this.responses) {
      if (prompt.toLowerCase().includes(pattern.toLowerCase())) {
        response = resp;
        break;
      }
    }

    this.callLog.push({ prompt, response, timestamp: Date.now() });
    return response;
  }

  getCallLog(): Array<{ prompt: string; response: string; timestamp: number }> {
    return this.callLog;
  }

  getCallCount(): number {
    return this.callLog.length;
  }

  reset(): void {
    this.callLog = [];
    this.latencyMs = 0;
    this.failureRate = 0;
  }
}

// =============================================================================
// Test Utilities
// =============================================================================

export function createTestContext(overrides?: Partial<TestContext>): TestContext {
  return {
    taskId: "test-task-" + Math.random().toString(36).slice(2, 8),
    agentId: "test-agent-" + Math.random().toString(36).slice(2, 8),
    startTime: Date.now(),
    mockVault: new MockVault(),
    mockDragonfly: new MockDragonfly(),
    mockLLM: new MockLLM(),
    ...overrides,
  };
}

export async function withTimeout<T>(promise: Promise<T>, ms: number, message: string = "Timeout"): Promise<T> {
  const timeout = new Promise<never>((_, reject) => {
    setTimeout(() => reject(new Error(message)), ms);
  });
  return Promise.race([promise, timeout]);
}

export function generateInstructionPacket(taskId: string, agentId: string, objective: string) {
  return {
    agent_id: agentId,
    task_id: taskId,
    created_for: "Test Task",
    objective,
    deliverables: ["plan", "report"],
    constraints: {
      scope: ["sandbox only"],
      forbidden: ["no prod access"],
      required_steps: ["plan before execute"],
    },
    success_criteria: ["plan generated"],
    error_budget: {
      max_total_errors: 5,
      max_same_error_repeats: 2,
      max_procedure_violations: 1,
    },
    escalation_rules: ["If confidence < 0.7 -> escalate"],
    created_at: new Date().toISOString(),
  };
}

// =============================================================================
// Test Harness
// =============================================================================

export class TestHarness {
  private scenarios: TestScenario[] = [];
  private results: Map<string, { passed: boolean; error?: string; duration: number }> = new Map();

  addScenario(scenario: TestScenario): void {
    this.scenarios.push(scenario);
  }

  async runAll(): Promise<TestMetrics> {
    const startTime = Date.now();
    let passed = 0;
    let failed = 0;

    for (const scenario of this.scenarios) {
      const scenarioStart = Date.now();
      const ctx = createTestContext();

      try {
        console.log(`\n[TEST] Running: ${scenario.name}`);
        await scenario.setup();
        await scenario.execute(ctx);
        await scenario.assertions(ctx);
        await scenario.cleanup();

        this.results.set(scenario.name, {
          passed: true,
          duration: Date.now() - scenarioStart,
        });
        passed++;
        console.log(`[PASS] ${scenario.name} (${Date.now() - scenarioStart}ms)`);
      } catch (error: any) {
        this.results.set(scenario.name, {
          passed: false,
          error: error.message,
          duration: Date.now() - scenarioStart,
        });
        failed++;
        console.log(`[FAIL] ${scenario.name}: ${error.message}`);

        try {
          await scenario.cleanup();
        } catch {}
      }
    }

    return {
      passed,
      failed,
      skipped: 0,
      duration: Date.now() - startTime,
      coverage: this.scenarios.length > 0 ? (passed / this.scenarios.length) * 100 : 0,
    };
  }

  getResults(): Map<string, { passed: boolean; error?: string; duration: number }> {
    return this.results;
  }
}

// =============================================================================
// Pre-built Test Scenarios
// =============================================================================

export const CommonScenarios = {
  // Happy path - agent completes successfully
  happyPath: (AgentClass: any): TestScenario => ({
    name: "Happy Path - Successful Completion",
    description: "Agent completes all phases without errors",
    setup: async () => {},
    execute: async (ctx) => {
      // Set up successful LLM responses
      ctx.mockLLM.setResponse("plan", JSON.stringify({
        title: "Test Plan",
        confidence: 0.85,
        steps: [{ step: 1, action: "Test action" }],
      }));

      // Create instruction packet
      const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
      await ctx.mockDragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));
    },
    assertions: async (ctx) => {
      // Check state reached EXIT
      const stateStr = await ctx.mockDragonfly.get(`agent:${ctx.agentId}:state`);
      if (stateStr) {
        const state = JSON.parse(stateStr);
        expect(state.phase).toBe("EXIT");
        expect(state.status).toBe("COMPLETED");
      }
    },
    cleanup: async () => {},
  }),

  // Error budget exceeded
  errorBudgetExceeded: (AgentClass: any): TestScenario => ({
    name: "Error Budget Exceeded - Revocation",
    description: "Agent is revoked when error budget is exceeded",
    setup: async () => {},
    execute: async (ctx) => {
      ctx.mockLLM.setFailureRate(1.0); // All LLM calls fail

      const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
      packet.error_budget.max_total_errors = 2;
      await ctx.mockDragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));

      // Simulate errors
      await ctx.mockDragonfly.hIncrBy(`agent:${ctx.agentId}:errors`, "total_errors", 3);
    },
    assertions: async (ctx) => {
      const stateStr = await ctx.mockDragonfly.get(`agent:${ctx.agentId}:state`);
      if (stateStr) {
        const state = JSON.parse(stateStr);
        expect(state.status).toBe("REVOKED");
      }
    },
    cleanup: async () => {},
  }),

  // Stuck detection
  stuckDetection: (): TestScenario => ({
    name: "Stuck Detection - GAMMA Spawn",
    description: "GAMMA is spawned when agents are stuck",
    setup: async () => {},
    execute: async (ctx) => {
      // Set up agent state as stuck (old last_activity)
      const stuckState = {
        agent_id: ctx.agentId,
        role: "ALPHA",
        status: "WORKING",
        last_activity: new Date(Date.now() - 60000).toISOString(), // 60 seconds ago
      };
      await ctx.mockDragonfly.hSet(`agents:${ctx.taskId}`, "ALPHA", JSON.stringify(stuckState));
    },
    assertions: async (ctx) => {
      // Check that stuck would be detected
      const stateStr = await ctx.mockDragonfly.hGet(`agents:${ctx.taskId}`, "ALPHA");
      if (stateStr) {
        const state = JSON.parse(stateStr);
        const inactivity = (Date.now() - new Date(state.last_activity).getTime()) / 1000;
        expect(inactivity).toBeGreaterThan(30);
      }
    },
    cleanup: async () => {},
  }),

  // Conflict resolution
  conflictResolution: (): TestScenario => ({
    name: "Conflict Resolution",
    description: "Multiple proposals lead to conflict detection",
    setup: async () => {},
    execute: async (ctx) => {
      // Simulate conflicting proposals
      await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_1", JSON.stringify({
        author: "ALPHA",
        value: { approach: "Approach A", confidence: 0.8 },
      }));
      await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_2", JSON.stringify({
        author: "ALPHA",
        value: { approach: "Approach B", confidence: 0.7 },
      }));

      // BETA rejects proposal_2
      await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:progress`, "eval_proposal_2", JSON.stringify({
        accepted: false,
        score: 0.5,
      }));

      await ctx.mockDragonfly.hIncrBy(`metrics:${ctx.taskId}`, "conflicts_detected", 1);
    },
    assertions: async (ctx) => {
      const conflicts = await ctx.mockDragonfly.hGet(`metrics:${ctx.taskId}`, "conflicts_detected");
      expect(parseInt(conflicts || "0")).toBeGreaterThan(0);
    },
    cleanup: async () => {},
  }),
};

// =============================================================================
// Example Test Suite
// =============================================================================

export function runExampleTests() {
  describe("Agent Governance Tests", () => {
    let ctx: TestContext;

    beforeEach(() => {
      ctx = createTestContext();
    });

    describe("MockVault", () => {
      it("should store and retrieve secrets", async () => {
        ctx.mockVault.setSecret("test/secret", { key: "value" });
        const secret = await ctx.mockVault.getSecret("test/secret");
        expect(secret.key).toBe("value");
      });

      it("should create and validate tokens", async () => {
        const token = await ctx.mockVault.createToken("t0-observer", 60);
        expect(token).toStartWith("hvs.test-");
        expect(await ctx.mockVault.validateToken(token)).toBe(true);
      });

      it("should revoke tokens", async () => {
        const token = await ctx.mockVault.createToken("t0-observer");
        await ctx.mockVault.revokeToken(token);
        expect(await ctx.mockVault.validateToken(token)).toBe(false);
      });
    });

    describe("MockDragonfly", () => {
      it("should handle string operations", async () => {
        await ctx.mockDragonfly.set("key", "value");
        expect(await ctx.mockDragonfly.get("key")).toBe("value");
      });

      it("should handle hash operations", async () => {
        await ctx.mockDragonfly.hSet("hash", "field", "value");
        expect(await ctx.mockDragonfly.hGet("hash", "field")).toBe("value");
      });

      it("should handle list operations", async () => {
        await ctx.mockDragonfly.rPush("list", "a", "b", "c");
        const items = await ctx.mockDragonfly.lRange("list", 0, -1);
        expect(items).toEqual(["a", "b", "c"]);
      });

      it("should handle expiration", async () => {
        await ctx.mockDragonfly.set("expiring", "value", { EX: 1 });
        expect(await ctx.mockDragonfly.get("expiring")).toBe("value");
        // Note: In real tests, we'd wait for expiration
      });

      it("should handle NX option", async () => {
        await ctx.mockDragonfly.set("existing", "first");
        const result = await ctx.mockDragonfly.set("existing", "second", { NX: true });
        expect(result).toBeNull();
        expect(await ctx.mockDragonfly.get("existing")).toBe("first");
      });
    });

    describe("MockLLM", () => {
      it("should return default response", async () => {
        const response = await ctx.mockLLM.complete("test prompt");
        expect(response).toContain("confidence");
      });

      it("should match patterns", async () => {
        ctx.mockLLM.setResponse("terraform", '{"tool": "terraform"}');
        const response = await ctx.mockLLM.complete("Create a terraform plan");
        expect(response).toContain("terraform");
      });

      it("should simulate failures", async () => {
        ctx.mockLLM.setFailureRate(1.0);
        expect(ctx.mockLLM.complete("test")).rejects.toThrow("LLM_ERROR");
      });

      it("should track call count", async () => {
        await ctx.mockLLM.complete("prompt 1");
        await ctx.mockLLM.complete("prompt 2");
        expect(ctx.mockLLM.getCallCount()).toBe(2);
      });
    });

    describe("Instruction Packets", () => {
      it("should generate valid packets", () => {
        const packet = generateInstructionPacket("task-1", "agent-1", "Test objective");
        expect(packet.agent_id).toBe("agent-1");
        expect(packet.task_id).toBe("task-1");
        expect(packet.error_budget.max_total_errors).toBe(5);
      });
    });
  });
}

// =============================================================================
// CLI
// =============================================================================

if (import.meta.main) {
  console.log("Agent Testing Framework");
  console.log("=======================\n");

  const harness = new TestHarness();

  // Add pre-built scenarios
  harness.addScenario(CommonScenarios.happyPath(null));
  harness.addScenario(CommonScenarios.errorBudgetExceeded(null));
  harness.addScenario(CommonScenarios.stuckDetection());
  harness.addScenario(CommonScenarios.conflictResolution());

  harness.runAll().then(metrics => {
    console.log("\n" + "=".repeat(50));
    console.log("TEST RESULTS");
    console.log("=".repeat(50));
    console.log(`Passed:   ${metrics.passed}`);
    console.log(`Failed:   ${metrics.failed}`);
    console.log(`Skipped:  ${metrics.skipped}`);
    console.log(`Duration: ${metrics.duration}ms`);
    console.log(`Coverage: ${metrics.coverage.toFixed(1)}%`);
    console.log("=".repeat(50));

    process.exit(metrics.failed > 0 ? 1 : 0);
  });
}