/** * Agent Testing Framework * ======================= * Provides mocks, utilities, and test harnesses for agent development. */ import { describe, it, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test"; // ============================================================================= // Type Definitions // ============================================================================= export interface TestContext { taskId: string; agentId: string; startTime: number; mockVault: MockVault; mockDragonfly: MockDragonfly; mockLLM: MockLLM; } export interface TestScenario { name: string; description: string; setup: () => Promise; execute: (ctx: TestContext) => Promise; assertions: (ctx: TestContext) => Promise; cleanup: () => Promise; } export interface TestMetrics { passed: number; failed: number; skipped: number; duration: number; coverage: number; } // ============================================================================= // Mock Vault // ============================================================================= export class MockVault { private secrets: Map = new Map(); private policies: Map = new Map(); private tokens: Map = new Map(); private accessLog: Array<{ path: string; action: string; timestamp: number }> = []; constructor() { // Initialize with default test secrets this.secrets.set("api-keys/openrouter", { api_key: "test-key" }); this.secrets.set("services/dragonfly", { host: "127.0.0.1", port: 6379, password: "test-password", }); // Default policies this.policies.set("t0-observer", ["read:secret/data/docs/*", "read:secret/data/inventory/*"]); this.policies.set("t1-operator", ["read:ssh/creds/sandbox-*", "read:proxmox/creds/sandbox"]); } async getSecret(path: string): Promise { this.accessLog.push({ path, action: "read", timestamp: Date.now() }); return this.secrets.get(path) || null; } setSecret(path: string, value: any): void { this.secrets.set(path, value); } async createToken(policy: string, ttl: number = 3600): Promise { const token = "hvs.test-" + Math.random().toString(36).slice(2); this.tokens.set(token, { policy, ttl, created: Date.now() }); return token; } async validateToken(token: string): Promise { const tokenData = this.tokens.get(token); if (!tokenData) return false; const elapsed = (Date.now() - tokenData.created) / 1000; return elapsed < tokenData.ttl; } async revokeToken(token: string): Promise { this.tokens.delete(token); } checkAccess(token: string, path: string): boolean { const tokenData = this.tokens.get(token); if (!tokenData) return false; const allowedPaths = this.policies.get(tokenData.policy) || []; return allowedPaths.some(p => { const pattern = p.replace("*", ".*"); return new RegExp(pattern).test(path); }); } getAccessLog(): Array<{ path: string; action: string; timestamp: number }> { return this.accessLog; } reset(): void { this.accessLog = []; this.tokens.clear(); } } // ============================================================================= // Mock DragonflyDB // ============================================================================= export class MockDragonfly { private store: Map = new Map(); private hashes: Map> = new Map(); private lists: Map = new Map(); private expirations: Map = new Map(); private subscribers: Map void>> = new Map(); // String operations async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise { if (options?.NX && this.store.has(key)) return null; this.store.set(key, value); if (options?.EX) { this.expirations.set(key, Date.now() + options.EX * 1000); } return "OK"; } async get(key: string): Promise { if (this.isExpired(key)) { this.store.delete(key); return null; } return this.store.get(key) || null; } async del(key: string): Promise { return this.store.delete(key) ? 1 : 0; } async exists(key: string): Promise { return this.store.has(key) && !this.isExpired(key) ? 1 : 0; } async expire(key: string, seconds: number): Promise { if (!this.store.has(key)) return false; this.expirations.set(key, Date.now() + seconds * 1000); return true; } // Hash operations async hSet(key: string, field: string, value: any): Promise { if (!this.hashes.has(key)) this.hashes.set(key, new Map()); const existed = this.hashes.get(key)!.has(field); this.hashes.get(key)!.set(field, value); return existed ? 0 : 1; } async hGet(key: string, field: string): Promise { return this.hashes.get(key)?.get(field) || null; } async hGetAll(key: string): Promise> { const hash = this.hashes.get(key); if (!hash) return {}; const result: Record = {}; for (const [k, v] of hash) { result[k] = v; } return result; } async hIncrBy(key: string, field: string, increment: number): Promise { if (!this.hashes.has(key)) this.hashes.set(key, new Map()); const current = parseInt(this.hashes.get(key)!.get(field) || "0"); const newValue = current + increment; this.hashes.get(key)!.set(field, newValue.toString()); return newValue; } // List operations async rPush(key: string, ...values: any[]): Promise { if (!this.lists.has(key)) this.lists.set(key, []); this.lists.get(key)!.push(...values); return this.lists.get(key)!.length; } async lRange(key: string, start: number, stop: number): Promise { const list = this.lists.get(key) || []; if (stop < 0) stop = list.length + stop + 1; return list.slice(start, stop); } // Pub/Sub async subscribe(channel: string, handler: (msg: string) => void): Promise { if (!this.subscribers.has(channel)) this.subscribers.set(channel, []); this.subscribers.get(channel)!.push(handler); } async publish(channel: string, message: string): Promise { const handlers = this.subscribers.get(channel) || []; handlers.forEach(h => h(message)); return handlers.length; } async unsubscribe(channel?: string): Promise { if (channel) { this.subscribers.delete(channel); } else { this.subscribers.clear(); } } // Utility private isExpired(key: string): boolean { const expiry = this.expirations.get(key); if (!expiry) return false; return Date.now() > expiry; } reset(): void { this.store.clear(); this.hashes.clear(); this.lists.clear(); this.expirations.clear(); this.subscribers.clear(); } // Test helpers getState(): { store: Map; hashes: Map>; lists: Map } { return { store: this.store, hashes: this.hashes, lists: this.lists }; } } // ============================================================================= // Mock LLM // ============================================================================= export class MockLLM { private responses: Map = new Map(); private callLog: Array<{ prompt: string; response: string; timestamp: number }> = []; private defaultResponse: string = '{"confidence": 0.5, "steps": [], "assumptions": []}'; private latencyMs: number = 0; private failureRate: number = 0; setResponse(pattern: string, response: string): void { this.responses.set(pattern, response); } setDefaultResponse(response: string): void { this.defaultResponse = response; } setLatency(ms: number): void { this.latencyMs = ms; } setFailureRate(rate: number): void { this.failureRate = rate; } async complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise { // Simulate latency if (this.latencyMs > 0) { await new Promise(r => setTimeout(r, this.latencyMs)); } // Simulate failures if (Math.random() < this.failureRate) { throw new Error("LLM_ERROR: Simulated failure"); } // Find matching response let response = this.defaultResponse; for (const [pattern, resp] of this.responses) { if (prompt.toLowerCase().includes(pattern.toLowerCase())) { response = resp; break; } } this.callLog.push({ prompt, response, timestamp: Date.now() }); return response; } getCallLog(): Array<{ prompt: string; response: string; timestamp: number }> { return this.callLog; } getCallCount(): number { return this.callLog.length; } reset(): void { this.callLog = []; this.latencyMs = 0; this.failureRate = 0; } } // ============================================================================= // Test Utilities // ============================================================================= export function createTestContext(overrides?: Partial): TestContext { return { taskId: "test-task-" + Math.random().toString(36).slice(2, 8), agentId: "test-agent-" + Math.random().toString(36).slice(2, 8), startTime: Date.now(), mockVault: new MockVault(), mockDragonfly: new MockDragonfly(), mockLLM: new MockLLM(), ...overrides, }; } export async function withTimeout(promise: Promise, ms: number, message: string = "Timeout"): Promise { const timeout = new Promise((_, reject) => { setTimeout(() => reject(new Error(message)), ms); }); return Promise.race([promise, timeout]); } export function generateInstructionPacket(taskId: string, agentId: string, objective: string) { return { agent_id: agentId, task_id: taskId, created_for: "Test Task", objective, deliverables: ["plan", "report"], constraints: { scope: ["sandbox only"], forbidden: ["no prod access"], required_steps: ["plan before execute"], }, success_criteria: ["plan generated"], error_budget: { max_total_errors: 5, max_same_error_repeats: 2, max_procedure_violations: 1, }, escalation_rules: ["If confidence < 0.7 -> escalate"], created_at: new Date().toISOString(), }; } // ============================================================================= // Test Harness // ============================================================================= export class TestHarness { private scenarios: TestScenario[] = []; private results: Map = new Map(); addScenario(scenario: TestScenario): void { this.scenarios.push(scenario); } async runAll(): Promise { const startTime = Date.now(); let passed = 0; let failed = 0; for (const scenario of this.scenarios) { const scenarioStart = Date.now(); const ctx = createTestContext(); try { console.log(`\n[TEST] Running: ${scenario.name}`); await scenario.setup(); await scenario.execute(ctx); await scenario.assertions(ctx); await scenario.cleanup(); this.results.set(scenario.name, { passed: true, duration: Date.now() - scenarioStart, }); passed++; console.log(`[PASS] ${scenario.name} (${Date.now() - scenarioStart}ms)`); } catch (error: any) { this.results.set(scenario.name, { passed: false, error: error.message, duration: Date.now() - scenarioStart, }); failed++; console.log(`[FAIL] ${scenario.name}: ${error.message}`); try { await scenario.cleanup(); } catch {} } } return { passed, failed, skipped: 0, duration: Date.now() - startTime, coverage: this.scenarios.length > 0 ? (passed / this.scenarios.length) * 100 : 0, }; } getResults(): Map { return this.results; } } // ============================================================================= // Pre-built Test Scenarios // ============================================================================= export const CommonScenarios = { // Happy path - agent completes successfully happyPath: (AgentClass: any): TestScenario => ({ name: "Happy Path - Successful Completion", description: "Agent completes all phases without errors", setup: async () => {}, execute: async (ctx) => { // Set up successful LLM responses ctx.mockLLM.setResponse("plan", JSON.stringify({ title: "Test Plan", confidence: 0.85, steps: [{ step: 1, action: "Test action" }], })); // Create instruction packet const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective"); await ctx.mockDragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet)); }, assertions: async (ctx) => { // Check state reached EXIT const stateStr = await ctx.mockDragonfly.get(`agent:${ctx.agentId}:state`); if (stateStr) { const state = JSON.parse(stateStr); expect(state.phase).toBe("EXIT"); expect(state.status).toBe("COMPLETED"); } }, cleanup: async () => {}, }), // Error budget exceeded errorBudgetExceeded: (AgentClass: any): TestScenario => ({ name: "Error Budget Exceeded - Revocation", description: "Agent is revoked when error budget is exceeded", setup: async () => {}, execute: async (ctx) => { ctx.mockLLM.setFailureRate(1.0); // All LLM calls fail const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective"); packet.error_budget.max_total_errors = 2; await ctx.mockDragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet)); // Simulate errors await ctx.mockDragonfly.hIncrBy(`agent:${ctx.agentId}:errors`, "total_errors", 3); }, assertions: async (ctx) => { const stateStr = await ctx.mockDragonfly.get(`agent:${ctx.agentId}:state`); if (stateStr) { const state = JSON.parse(stateStr); expect(state.status).toBe("REVOKED"); } }, cleanup: async () => {}, }), // Stuck detection stuckDetection: (): TestScenario => ({ name: "Stuck Detection - GAMMA Spawn", description: "GAMMA is spawned when agents are stuck", setup: async () => {}, execute: async (ctx) => { // Set up agent state as stuck (old last_activity) const stuckState = { agent_id: ctx.agentId, role: "ALPHA", status: "WORKING", last_activity: new Date(Date.now() - 60000).toISOString(), // 60 seconds ago }; await ctx.mockDragonfly.hSet(`agents:${ctx.taskId}`, "ALPHA", JSON.stringify(stuckState)); }, assertions: async (ctx) => { // Check that stuck would be detected const stateStr = await ctx.mockDragonfly.hGet(`agents:${ctx.taskId}`, "ALPHA"); if (stateStr) { const state = JSON.parse(stateStr); const inactivity = (Date.now() - new Date(state.last_activity).getTime()) / 1000; expect(inactivity).toBeGreaterThan(30); } }, cleanup: async () => {}, }), // Conflict resolution conflictResolution: (): TestScenario => ({ name: "Conflict Resolution", description: "Multiple proposals lead to conflict detection", setup: async () => {}, execute: async (ctx) => { // Simulate conflicting proposals await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_1", JSON.stringify({ author: "ALPHA", value: { approach: "Approach A", confidence: 0.8 }, })); await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_2", JSON.stringify({ author: "ALPHA", value: { approach: "Approach B", confidence: 0.7 }, })); // BETA rejects proposal_2 await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:progress`, "eval_proposal_2", JSON.stringify({ accepted: false, score: 0.5, })); await ctx.mockDragonfly.hIncrBy(`metrics:${ctx.taskId}`, "conflicts_detected", 1); }, assertions: async (ctx) => { const conflicts = await ctx.mockDragonfly.hGet(`metrics:${ctx.taskId}`, "conflicts_detected"); expect(parseInt(conflicts || "0")).toBeGreaterThan(0); }, cleanup: async () => {}, }), }; // ============================================================================= // Example Test Suite // ============================================================================= export function runExampleTests() { describe("Agent Governance Tests", () => { let ctx: TestContext; beforeEach(() => { ctx = createTestContext(); }); describe("MockVault", () => { it("should store and retrieve secrets", async () => { ctx.mockVault.setSecret("test/secret", { key: "value" }); const secret = await ctx.mockVault.getSecret("test/secret"); expect(secret.key).toBe("value"); }); it("should create and validate tokens", async () => { const token = await ctx.mockVault.createToken("t0-observer", 60); expect(token).toStartWith("hvs.test-"); expect(await ctx.mockVault.validateToken(token)).toBe(true); }); it("should revoke tokens", async () => { const token = await ctx.mockVault.createToken("t0-observer"); await ctx.mockVault.revokeToken(token); expect(await ctx.mockVault.validateToken(token)).toBe(false); }); }); describe("MockDragonfly", () => { it("should handle string operations", async () => { await ctx.mockDragonfly.set("key", "value"); expect(await ctx.mockDragonfly.get("key")).toBe("value"); }); it("should handle hash operations", async () => { await ctx.mockDragonfly.hSet("hash", "field", "value"); expect(await ctx.mockDragonfly.hGet("hash", "field")).toBe("value"); }); it("should handle list operations", async () => { await ctx.mockDragonfly.rPush("list", "a", "b", "c"); const items = await ctx.mockDragonfly.lRange("list", 0, -1); expect(items).toEqual(["a", "b", "c"]); }); it("should handle expiration", async () => { await ctx.mockDragonfly.set("expiring", "value", { EX: 1 }); expect(await ctx.mockDragonfly.get("expiring")).toBe("value"); // Note: In real tests, we'd wait for expiration }); it("should handle NX option", async () => { await ctx.mockDragonfly.set("existing", "first"); const result = await ctx.mockDragonfly.set("existing", "second", { NX: true }); expect(result).toBeNull(); expect(await ctx.mockDragonfly.get("existing")).toBe("first"); }); }); describe("MockLLM", () => { it("should return default response", async () => { const response = await ctx.mockLLM.complete("test prompt"); expect(response).toContain("confidence"); }); it("should match patterns", async () => { ctx.mockLLM.setResponse("terraform", '{"tool": "terraform"}'); const response = await ctx.mockLLM.complete("Create a terraform plan"); expect(response).toContain("terraform"); }); it("should simulate failures", async () => { ctx.mockLLM.setFailureRate(1.0); expect(ctx.mockLLM.complete("test")).rejects.toThrow("LLM_ERROR"); }); it("should track call count", async () => { await ctx.mockLLM.complete("prompt 1"); await ctx.mockLLM.complete("prompt 2"); expect(ctx.mockLLM.getCallCount()).toBe(2); }); }); describe("Instruction Packets", () => { it("should generate valid packets", () => { const packet = generateInstructionPacket("task-1", "agent-1", "Test objective"); expect(packet.agent_id).toBe("agent-1"); expect(packet.task_id).toBe("task-1"); expect(packet.error_budget.max_total_errors).toBe(5); }); }); }); } // ============================================================================= // CLI // ============================================================================= if (import.meta.main) { console.log("Agent Testing Framework"); console.log("=======================\n"); const harness = new TestHarness(); // Add pre-built scenarios harness.addScenario(CommonScenarios.happyPath(null)); harness.addScenario(CommonScenarios.errorBudgetExceeded(null)); harness.addScenario(CommonScenarios.stuckDetection()); harness.addScenario(CommonScenarios.conflictResolution()); harness.runAll().then(metrics => { console.log("\n" + "=".repeat(50)); console.log("TEST RESULTS"); console.log("=".repeat(50)); console.log(`Passed: ${metrics.passed}`); console.log(`Failed: ${metrics.failed}`); console.log(`Skipped: ${metrics.skipped}`); console.log(`Duration: ${metrics.duration}ms`); console.log(`Coverage: ${metrics.coverage.toFixed(1)}%`); console.log("=".repeat(50)); process.exit(metrics.failed > 0 ? 1 : 0); }); }