/** * Agent Testing Framework * ======================= * Provides mocks, utilities, and test harnesses for agent development. * * IMPORTANT: By default, tests require real services (Vault, DragonflyDB). * Use --use-mocks flag to explicitly enable mock mode. * * Usage: * bun run framework.ts # Requires real services * bun run framework.ts --use-mocks # Explicitly use mocks * bun run framework.ts --validate-only # Check services without running tests */ import { describe, it, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test"; import { existsSync } from "fs"; import { createClient, type RedisClientType } from "redis"; // ============================================================================= // Test Mode Configuration // ============================================================================= export enum TestMode { REAL = "real", // Requires real services, fails if unavailable MOCK = "mock", // Uses mocks, clearly logged HYBRID = "hybrid", // Uses real where available, mocks otherwise (logged) } export interface TestConfig { mode: TestMode; vaultAddr: string; dragonflyAddr: string; requiredAgents: string[]; requiredFiles: string[]; verbose: boolean; } const DEFAULT_CONFIG: TestConfig = { mode: TestMode.REAL, // Default: require real services vaultAddr: process.env.VAULT_ADDR || "https://127.0.0.1:8200", dragonflyAddr: process.env.DRAGONFLY_ADDR || "redis://127.0.0.1:6379", requiredAgents: [ "/opt/agent-governance/agents/llm-planner-ts/index.ts", "/opt/agent-governance/agents/multi-agent/orchestrator.ts", ], requiredFiles: [ "/opt/agent-governance/runtime", "/opt/agent-governance/pipeline", ], verbose: false, }; // Global config, set by CLI let globalConfig: TestConfig = { ...DEFAULT_CONFIG }; // ============================================================================= // Service Validation // ============================================================================= export interface ValidationResult { service: string; available: boolean; error?: string; latencyMs?: number; } export interface ValidationReport { timestamp: string; mode: TestMode; allServicesAvailable: boolean; results: ValidationResult[]; missingFiles: string[]; mocksFallback: string[]; } export async function validateServices(config: TestConfig = globalConfig): Promise { const results: ValidationResult[] = []; const missingFiles: string[] = []; const mocksFallback: string[] = []; const timestamp = new Date().toISOString(); // Check Vault const vaultResult = await checkVault(config.vaultAddr); results.push(vaultResult); if (!vaultResult.available && config.mode !== TestMode.REAL) { mocksFallback.push("Vault"); } // Check DragonflyDB const dragonflyResult = await checkDragonfly(config.dragonflyAddr); results.push(dragonflyResult); if (!dragonflyResult.available && config.mode !== TestMode.REAL) { mocksFallback.push("DragonflyDB"); } // Check required files for (const file of [...config.requiredAgents, ...config.requiredFiles]) { if (!existsSync(file)) { missingFiles.push(file); } } const allServicesAvailable = results.every(r => r.available) && missingFiles.length === 0; return { timestamp, mode: config.mode, allServicesAvailable, results, missingFiles, mocksFallback, }; } async function checkVault(addr: string): Promise { const start = Date.now(); try { // Try to reach Vault health endpoint // Use rejectUnauthorized: false for local self-signed certs const response = await fetch(`${addr}/v1/sys/health`, { method: "GET", signal: AbortSignal.timeout(5000), // @ts-ignore - Bun supports this for self-signed certs tls: { rejectUnauthorized: false }, }); return { service: "Vault", available: response.status === 200 || response.status === 429 || response.status === 472 || response.status === 473, latencyMs: Date.now() - start, }; } catch (error: any) { // Fallback: try with NODE_TLS_REJECT_UNAUTHORIZED workaround try { const oldTls = process.env.NODE_TLS_REJECT_UNAUTHORIZED; process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0"; const response = await fetch(`${addr}/v1/sys/health`, { method: "GET", signal: AbortSignal.timeout(5000), }); process.env.NODE_TLS_REJECT_UNAUTHORIZED = oldTls; return { service: "Vault", available: response.status === 200 || response.status === 429 || response.status === 472 || response.status === 473, latencyMs: Date.now() - start, }; } catch (e2: any) { return { service: "Vault", available: false, error: error.message, latencyMs: Date.now() - start, }; } } } async function checkDragonfly(addr: string): Promise { const start = Date.now(); try { const client = createClient({ url: addr, password: process.env.DRAGONFLY_PASSWORD || "governance2026", }); await client.connect(); await client.ping(); await client.disconnect(); return { service: "DragonflyDB", available: true, latencyMs: Date.now() - start, }; } catch (error: any) { return { service: "DragonflyDB", available: false, error: error.message, latencyMs: Date.now() - start, }; } } // ============================================================================= // Type Definitions // ============================================================================= export interface TestContext { taskId: string; agentId: string; startTime: number; mode: TestMode; vault: IVault; dragonfly: IDragonfly; llm: ILLM; _usingMocks: string[]; // Track which services are mocked } export interface TestScenario { name: string; description: string; requiresReal?: string[]; // List of services that must be real setup: () => Promise; execute: (ctx: TestContext) => Promise; assertions: (ctx: TestContext) => Promise; cleanup: () => Promise; } export interface TestMetrics { passed: number; failed: number; skipped: number; duration: number; coverage: number; mocksUsed: string[]; } // ============================================================================= // Service Interfaces (shared by real and mock implementations) // ============================================================================= export interface IVault { getSecret(path: string): Promise; setSecret?(path: string, value: any): void; createToken(policy: string, ttl?: number): Promise; validateToken(token: string): Promise; revokeToken(token: string): Promise; isMock(): boolean; } export interface IDragonfly { set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise; get(key: string): Promise; del(key: string): Promise; exists(key: string): Promise; hSet(key: string, field: string, value: any): Promise; hGet(key: string, field: string): Promise; hGetAll(key: string): Promise>; hIncrBy(key: string, field: string, increment: number): Promise; rPush(key: string, ...values: any[]): Promise; lRange(key: string, start: number, stop: number): Promise; isMock(): boolean; disconnect?(): Promise; } export interface ILLM { complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise; isMock(): boolean; } // ============================================================================= // Mock Vault // ============================================================================= export class MockVault implements IVault { private secrets: Map = new Map(); private policies: Map = new Map(); private tokens: Map = new Map(); private accessLog: Array<{ path: string; action: string; timestamp: number }> = []; constructor() { // Initialize with default test secrets this.secrets.set("api-keys/openrouter", { api_key: "test-key" }); this.secrets.set("services/dragonfly", { host: "127.0.0.1", port: 6379, password: "test-password", }); // Default policies this.policies.set("t0-observer", ["read:secret/data/docs/*", "read:secret/data/inventory/*"]); this.policies.set("t1-operator", ["read:ssh/creds/sandbox-*", "read:proxmox/creds/sandbox"]); } isMock(): boolean { return true; } async getSecret(path: string): Promise { this.accessLog.push({ path, action: "read", timestamp: Date.now() }); return this.secrets.get(path) || null; } setSecret(path: string, value: any): void { this.secrets.set(path, value); } async createToken(policy: string, ttl: number = 3600): Promise { const token = "hvs.test-" + Math.random().toString(36).slice(2); this.tokens.set(token, { policy, ttl, created: Date.now() }); return token; } async validateToken(token: string): Promise { const tokenData = this.tokens.get(token); if (!tokenData) return false; const elapsed = (Date.now() - tokenData.created) / 1000; return elapsed < tokenData.ttl; } async revokeToken(token: string): Promise { this.tokens.delete(token); } checkAccess(token: string, path: string): boolean { const tokenData = this.tokens.get(token); if (!tokenData) return false; const allowedPaths = this.policies.get(tokenData.policy) || []; return allowedPaths.some(p => { const pattern = p.replace("*", ".*"); return new RegExp(pattern).test(path); }); } getAccessLog(): Array<{ path: string; action: string; timestamp: number }> { return this.accessLog; } reset(): void { this.accessLog = []; this.tokens.clear(); } } // ============================================================================= // Real Vault Client // ============================================================================= export class RealVault implements IVault { private addr: string; private token: string | null = null; constructor(addr: string) { this.addr = addr; } isMock(): boolean { return false; } private async fetch(path: string, options: RequestInit = {}): Promise { const token = this.token || process.env.VAULT_TOKEN; return fetch(`${this.addr}${path}`, { ...options, headers: { "X-Vault-Token": token || "", "Content-Type": "application/json", ...options.headers, }, // @ts-ignore - Bun supports this for self-signed certs tls: { rejectUnauthorized: false }, }); } async getSecret(path: string): Promise { try { const response = await this.fetch(`/v1/secret/data/${path}`); if (!response.ok) return null; const data = await response.json(); return data?.data?.data || null; } catch { return null; } } async createToken(policy: string, ttl: number = 3600): Promise { const response = await this.fetch("/v1/auth/token/create", { method: "POST", body: JSON.stringify({ policies: [policy], ttl: `${ttl}s`, }), }); const data = await response.json(); return data?.auth?.client_token || ""; } async validateToken(token: string): Promise { try { const response = await this.fetch("/v1/auth/token/lookup-self", { headers: { "X-Vault-Token": token }, }); return response.ok; } catch { return false; } } async revokeToken(token: string): Promise { await this.fetch("/v1/auth/token/revoke-self", { method: "POST", headers: { "X-Vault-Token": token }, }); } async testConnection(): Promise { try { const response = await this.fetch("/v1/sys/health"); return response.ok; } catch { return false; } } } // ============================================================================= // Mock DragonflyDB // ============================================================================= export class MockDragonfly implements IDragonfly { private store: Map = new Map(); private hashes: Map> = new Map(); private lists: Map = new Map(); private expirations: Map = new Map(); private subscribers: Map void>> = new Map(); isMock(): boolean { return true; } // String operations async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise { if (options?.NX && this.store.has(key)) return null; this.store.set(key, value); if (options?.EX) { this.expirations.set(key, Date.now() + options.EX * 1000); } return "OK"; } async get(key: string): Promise { if (this.isExpired(key)) { this.store.delete(key); return null; } return this.store.get(key) || null; } async del(key: string): Promise { return this.store.delete(key) ? 1 : 0; } async exists(key: string): Promise { return this.store.has(key) && !this.isExpired(key) ? 1 : 0; } async expire(key: string, seconds: number): Promise { if (!this.store.has(key)) return false; this.expirations.set(key, Date.now() + seconds * 1000); return true; } // Hash operations async hSet(key: string, field: string, value: any): Promise { if (!this.hashes.has(key)) this.hashes.set(key, new Map()); const existed = this.hashes.get(key)!.has(field); this.hashes.get(key)!.set(field, value); return existed ? 0 : 1; } async hGet(key: string, field: string): Promise { return this.hashes.get(key)?.get(field) || null; } async hGetAll(key: string): Promise> { const hash = this.hashes.get(key); if (!hash) return {}; const result: Record = {}; for (const [k, v] of hash) { result[k] = v; } return result; } async hIncrBy(key: string, field: string, increment: number): Promise { if (!this.hashes.has(key)) this.hashes.set(key, new Map()); const current = parseInt(this.hashes.get(key)!.get(field) || "0"); const newValue = current + increment; this.hashes.get(key)!.set(field, newValue.toString()); return newValue; } // List operations async rPush(key: string, ...values: any[]): Promise { if (!this.lists.has(key)) this.lists.set(key, []); this.lists.get(key)!.push(...values); return this.lists.get(key)!.length; } async lRange(key: string, start: number, stop: number): Promise { const list = this.lists.get(key) || []; if (stop < 0) stop = list.length + stop + 1; return list.slice(start, stop); } // Pub/Sub async subscribe(channel: string, handler: (msg: string) => void): Promise { if (!this.subscribers.has(channel)) this.subscribers.set(channel, []); this.subscribers.get(channel)!.push(handler); } async publish(channel: string, message: string): Promise { const handlers = this.subscribers.get(channel) || []; handlers.forEach(h => h(message)); return handlers.length; } async unsubscribe(channel?: string): Promise { if (channel) { this.subscribers.delete(channel); } else { this.subscribers.clear(); } } // Utility private isExpired(key: string): boolean { const expiry = this.expirations.get(key); if (!expiry) return false; return Date.now() > expiry; } reset(): void { this.store.clear(); this.hashes.clear(); this.lists.clear(); this.expirations.clear(); this.subscribers.clear(); } // Test helpers getState(): { store: Map; hashes: Map>; lists: Map } { return { store: this.store, hashes: this.hashes, lists: this.lists }; } } // ============================================================================= // Real DragonflyDB Client // ============================================================================= export class RealDragonfly implements IDragonfly { private client: RedisClientType; private connected: boolean = false; constructor(url: string, password?: string) { this.client = createClient({ url, password: password || process.env.DRAGONFLY_PASSWORD || "governance2026", }); } isMock(): boolean { return false; } async connect(): Promise { if (!this.connected) { await this.client.connect(); this.connected = true; } } async disconnect(): Promise { if (this.connected) { await this.client.disconnect(); this.connected = false; } } async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise { await this.connect(); const val = typeof value === "string" ? value : JSON.stringify(value); const opts: any = {}; if (options?.EX) opts.EX = options.EX; if (options?.NX) opts.NX = true; return this.client.set(key, val, opts); } async get(key: string): Promise { await this.connect(); return this.client.get(key); } async del(key: string): Promise { await this.connect(); return this.client.del(key); } async exists(key: string): Promise { await this.connect(); return this.client.exists(key); } async hSet(key: string, field: string, value: any): Promise { await this.connect(); const val = typeof value === "string" ? value : JSON.stringify(value); return this.client.hSet(key, field, val); } async hGet(key: string, field: string): Promise { await this.connect(); return this.client.hGet(key, field); } async hGetAll(key: string): Promise> { await this.connect(); return this.client.hGetAll(key); } async hIncrBy(key: string, field: string, increment: number): Promise { await this.connect(); return this.client.hIncrBy(key, field, increment); } async rPush(key: string, ...values: any[]): Promise { await this.connect(); return this.client.rPush(key, values.map(v => typeof v === "string" ? v : JSON.stringify(v))); } async lRange(key: string, start: number, stop: number): Promise { await this.connect(); return this.client.lRange(key, start, stop); } } // ============================================================================= // Mock LLM // ============================================================================= export class MockLLM implements ILLM { private responses: Map = new Map(); private callLog: Array<{ prompt: string; response: string; timestamp: number }> = []; private defaultResponse: string = '{"confidence": 0.5, "steps": [], "assumptions": []}'; private latencyMs: number = 0; private failureRate: number = 0; isMock(): boolean { return true; } setResponse(pattern: string, response: string): void { this.responses.set(pattern, response); } setDefaultResponse(response: string): void { this.defaultResponse = response; } setLatency(ms: number): void { this.latencyMs = ms; } setFailureRate(rate: number): void { this.failureRate = rate; } async complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise { // Simulate latency if (this.latencyMs > 0) { await new Promise(r => setTimeout(r, this.latencyMs)); } // Simulate failures if (Math.random() < this.failureRate) { throw new Error("LLM_ERROR: Simulated failure"); } // Find matching response let response = this.defaultResponse; for (const [pattern, resp] of this.responses) { if (prompt.toLowerCase().includes(pattern.toLowerCase())) { response = resp; break; } } this.callLog.push({ prompt, response, timestamp: Date.now() }); return response; } getCallLog(): Array<{ prompt: string; response: string; timestamp: number }> { return this.callLog; } getCallCount(): number { return this.callLog.length; } reset(): void { this.callLog = []; this.latencyMs = 0; this.failureRate = 0; } } // ============================================================================= // Test Utilities // ============================================================================= /** * Create a test context. By default, requires real services. * Pass mode: TestMode.MOCK to explicitly use mocks. */ export async function createTestContext( options?: Partial & { mode?: TestMode } ): Promise { const mode = options?.mode ?? globalConfig.mode; const usingMocks: string[] = []; // Validate services if not in mock mode if (mode === TestMode.REAL) { const report = await validateServices(globalConfig); if (!report.allServicesAvailable) { const errors: string[] = []; for (const r of report.results) { if (!r.available) { errors.push(`${r.service}: ${r.error || "unavailable"}`); } } for (const f of report.missingFiles) { errors.push(`Missing file: ${f}`); } throw new Error( `REAL mode requires all services. Missing:\n - ${errors.join("\n - ")}\n\n` + `Use --use-mocks to explicitly enable mock mode.` ); } } // Create Vault (real or mock) let vault: IVault; if (mode === TestMode.MOCK) { vault = new MockVault(); usingMocks.push("Vault"); } else { // Try to use real Vault const vaultToken = process.env.VAULT_TOKEN; if (vaultToken) { try { const realVault = new RealVault(globalConfig.vaultAddr); const connected = await realVault.testConnection(); if (connected) { vault = realVault; } else { throw new Error("Vault connection test failed"); } } catch (e) { if (mode === TestMode.HYBRID) { vault = new MockVault(); usingMocks.push("Vault"); } else { throw e; } } } else if (mode === TestMode.HYBRID) { vault = new MockVault(); usingMocks.push("Vault"); } else { // In REAL mode without token, still use mock but note it vault = new MockVault(); usingMocks.push("Vault"); } } // Create Dragonfly (real or mock) let dragonfly: IDragonfly; if (mode === TestMode.MOCK) { dragonfly = new MockDragonfly(); usingMocks.push("DragonflyDB"); } else { try { const realDragonfly = new RealDragonfly(globalConfig.dragonflyAddr); await realDragonfly.connect(); dragonfly = realDragonfly; } catch (e) { if (mode === TestMode.HYBRID) { dragonfly = new MockDragonfly(); usingMocks.push("DragonflyDB"); } else { throw e; } } } // LLM is always mocked unless we have real API key const llm = new MockLLM(); usingMocks.push("LLM"); return { taskId: "test-task-" + Math.random().toString(36).slice(2, 8), agentId: "test-agent-" + Math.random().toString(36).slice(2, 8), startTime: Date.now(), mode, vault, dragonfly, llm, _usingMocks: usingMocks, ...options, }; } export async function withTimeout(promise: Promise, ms: number, message: string = "Timeout"): Promise { const timeout = new Promise((_, reject) => { setTimeout(() => reject(new Error(message)), ms); }); return Promise.race([promise, timeout]); } export function generateInstructionPacket(taskId: string, agentId: string, objective: string) { return { agent_id: agentId, task_id: taskId, created_for: "Test Task", objective, deliverables: ["plan", "report"], constraints: { scope: ["sandbox only"], forbidden: ["no prod access"], required_steps: ["plan before execute"], }, success_criteria: ["plan generated"], error_budget: { max_total_errors: 5, max_same_error_repeats: 2, max_procedure_violations: 1, }, escalation_rules: ["If confidence < 0.7 -> escalate"], created_at: new Date().toISOString(), }; } // ============================================================================= // Test Harness // ============================================================================= export class TestHarness { private scenarios: TestScenario[] = []; private results: Map = new Map(); private config: TestConfig; constructor(config?: Partial) { this.config = { ...globalConfig, ...config }; } addScenario(scenario: TestScenario): void { this.scenarios.push(scenario); } async runAll(): Promise { const startTime = Date.now(); let passed = 0; let failed = 0; let skipped = 0; const allMocksUsed: Set = new Set(); // Print mode banner this.printModeBanner(); for (const scenario of this.scenarios) { const scenarioStart = Date.now(); // Check if scenario requires real services if (scenario.requiresReal && scenario.requiresReal.length > 0 && this.config.mode === TestMode.MOCK) { console.log(`\n[SKIP] ${scenario.name} (requires real: ${scenario.requiresReal.join(", ")})`); skipped++; continue; } try { console.log(`\n[TEST] Running: ${scenario.name}`); const ctx = await createTestContext({ mode: this.config.mode }); // Log mocks in use for this test if (ctx._usingMocks.length > 0) { console.log(` [MOCKS: ${ctx._usingMocks.join(", ")}]`); ctx._usingMocks.forEach(m => allMocksUsed.add(m)); } await scenario.setup(); await scenario.execute(ctx); await scenario.assertions(ctx); await scenario.cleanup(); // Cleanup real connections if (ctx.dragonfly.disconnect) { await ctx.dragonfly.disconnect(); } this.results.set(scenario.name, { passed: true, duration: Date.now() - scenarioStart, mocks: ctx._usingMocks, }); passed++; console.log(`[PASS] ${scenario.name} (${Date.now() - scenarioStart}ms)`); } catch (error: any) { this.results.set(scenario.name, { passed: false, error: error.message, duration: Date.now() - scenarioStart, mocks: [], }); failed++; console.log(`[FAIL] ${scenario.name}: ${error.message}`); try { await scenario.cleanup(); } catch {} } } return { passed, failed, skipped, duration: Date.now() - startTime, coverage: this.scenarios.length > 0 ? (passed / (this.scenarios.length - skipped)) * 100 : 0, mocksUsed: Array.from(allMocksUsed), }; } private printModeBanner(): void { console.log("\n" + "=".repeat(60)); if (this.config.mode === TestMode.MOCK) { console.log("⚠️ MOCK MODE ENABLED"); console.log(" Tests are running against MOCK services."); console.log(" Results may not reflect real system behavior."); console.log(" Remove --use-mocks to test against real services."); } else if (this.config.mode === TestMode.HYBRID) { console.log("⚠️ HYBRID MODE"); console.log(" Using real services where available, mocks otherwise."); console.log(" Check individual test output for mock usage."); } else { console.log("✅ REAL MODE"); console.log(" Tests are running against REAL services."); } console.log("=".repeat(60)); } getResults(): Map { return this.results; } } // ============================================================================= // Pre-built Test Scenarios // ============================================================================= export const CommonScenarios = { // Happy path - agent completes successfully happyPath: (AgentClass: any): TestScenario => ({ name: "Happy Path - Successful Completion", description: "Agent completes all phases without errors", setup: async () => {}, execute: async (ctx) => { // Only works with mocks - log warning if real if (!ctx.llm.isMock()) { console.log(" [WARN] This scenario requires mock LLM to control responses"); } // Set up successful LLM responses if (ctx.llm.isMock()) { (ctx.llm as MockLLM).setResponse("plan", JSON.stringify({ title: "Test Plan", confidence: 0.85, steps: [{ step: 1, action: "Test action" }], })); } // Create instruction packet const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective"); await ctx.dragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet)); }, assertions: async (ctx) => { // Check state reached EXIT const stateStr = await ctx.dragonfly.get(`agent:${ctx.agentId}:state`); if (stateStr) { const state = JSON.parse(stateStr); expect(state.phase).toBe("EXIT"); expect(state.status).toBe("COMPLETED"); } }, cleanup: async () => {}, }), // Error budget exceeded errorBudgetExceeded: (AgentClass: any): TestScenario => ({ name: "Error Budget Exceeded - Revocation", description: "Agent is revoked when error budget is exceeded", setup: async () => {}, execute: async (ctx) => { if (ctx.llm.isMock()) { (ctx.llm as MockLLM).setFailureRate(1.0); // All LLM calls fail } const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective"); packet.error_budget.max_total_errors = 2; await ctx.dragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet)); // Simulate errors await ctx.dragonfly.hIncrBy(`agent:${ctx.agentId}:errors`, "total_errors", 3); }, assertions: async (ctx) => { const stateStr = await ctx.dragonfly.get(`agent:${ctx.agentId}:state`); if (stateStr) { const state = JSON.parse(stateStr); expect(state.status).toBe("REVOKED"); } }, cleanup: async () => {}, }), // Stuck detection stuckDetection: (): TestScenario => ({ name: "Stuck Detection - GAMMA Spawn", description: "GAMMA is spawned when agents are stuck", setup: async () => {}, execute: async (ctx) => { // Set up agent state as stuck (old last_activity) const stuckState = { agent_id: ctx.agentId, role: "ALPHA", status: "WORKING", last_activity: new Date(Date.now() - 60000).toISOString(), // 60 seconds ago }; await ctx.dragonfly.hSet(`agents:${ctx.taskId}`, "ALPHA", JSON.stringify(stuckState)); }, assertions: async (ctx) => { // Check that stuck would be detected const stateStr = await ctx.dragonfly.hGet(`agents:${ctx.taskId}`, "ALPHA"); if (stateStr) { const state = JSON.parse(stateStr); const inactivity = (Date.now() - new Date(state.last_activity).getTime()) / 1000; expect(inactivity).toBeGreaterThan(30); } }, cleanup: async () => {}, }), // Conflict resolution conflictResolution: (): TestScenario => ({ name: "Conflict Resolution", description: "Multiple proposals lead to conflict detection", setup: async () => {}, execute: async (ctx) => { // Simulate conflicting proposals await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_1", JSON.stringify({ author: "ALPHA", value: { approach: "Approach A", confidence: 0.8 }, })); await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_2", JSON.stringify({ author: "ALPHA", value: { approach: "Approach B", confidence: 0.7 }, })); // BETA rejects proposal_2 await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:progress`, "eval_proposal_2", JSON.stringify({ accepted: false, score: 0.5, })); await ctx.dragonfly.hIncrBy(`metrics:${ctx.taskId}`, "conflicts_detected", 1); }, assertions: async (ctx) => { const conflicts = await ctx.dragonfly.hGet(`metrics:${ctx.taskId}`, "conflicts_detected"); expect(parseInt(conflicts || "0")).toBeGreaterThan(0); }, cleanup: async () => {}, }), // Real service connectivity test realServiceConnectivity: (): TestScenario => ({ name: "Real Service Connectivity", description: "Verify connection to real Vault and DragonflyDB", requiresReal: ["Vault", "DragonflyDB"], setup: async () => {}, execute: async (ctx) => { if (ctx.vault.isMock() || ctx.dragonfly.isMock()) { throw new Error("This test requires real services, but mocks are in use"); } // Test DragonflyDB const testKey = `test:connectivity:${Date.now()}`; await ctx.dragonfly.set(testKey, "test-value"); const value = await ctx.dragonfly.get(testKey); await ctx.dragonfly.del(testKey); if (value !== "test-value") { throw new Error(`DragonflyDB read/write failed: expected 'test-value', got '${value}'`); } }, assertions: async (ctx) => { // If we got here, services are working expect(ctx.vault.isMock()).toBe(false); expect(ctx.dragonfly.isMock()).toBe(false); }, cleanup: async () => {}, }), }; // ============================================================================= // Example Test Suite // ============================================================================= export function runExampleTests() { describe("Agent Governance Tests", () => { let ctx: TestContext; beforeEach(async () => { ctx = await createTestContext({ mode: TestMode.MOCK }); }); describe("MockVault", () => { it("should store and retrieve secrets", async () => { (ctx.vault as MockVault).setSecret("test/secret", { key: "value" }); const secret = await ctx.vault.getSecret("test/secret"); expect(secret.key).toBe("value"); }); it("should create and validate tokens", async () => { const token = await ctx.vault.createToken("t0-observer", 60); expect(token).toStartWith("hvs.test-"); expect(await ctx.vault.validateToken(token)).toBe(true); }); it("should revoke tokens", async () => { const token = await ctx.vault.createToken("t0-observer"); await ctx.vault.revokeToken(token); expect(await ctx.vault.validateToken(token)).toBe(false); }); }); describe("MockDragonfly", () => { it("should handle string operations", async () => { await ctx.dragonfly.set("key", "value"); expect(await ctx.dragonfly.get("key")).toBe("value"); }); it("should handle hash operations", async () => { await ctx.dragonfly.hSet("hash", "field", "value"); expect(await ctx.dragonfly.hGet("hash", "field")).toBe("value"); }); it("should handle list operations", async () => { await (ctx.dragonfly as MockDragonfly).rPush("list", "a", "b", "c"); const items = await ctx.dragonfly.lRange("list", 0, -1); expect(items).toEqual(["a", "b", "c"]); }); it("should handle NX option", async () => { await ctx.dragonfly.set("existing", "first"); const result = await ctx.dragonfly.set("existing", "second", { NX: true }); expect(result).toBeNull(); expect(await ctx.dragonfly.get("existing")).toBe("first"); }); }); describe("MockLLM", () => { it("should return default response", async () => { const response = await ctx.llm.complete("test prompt"); expect(response).toContain("confidence"); }); it("should match patterns", async () => { (ctx.llm as MockLLM).setResponse("terraform", '{"tool": "terraform"}'); const response = await ctx.llm.complete("Create a terraform plan"); expect(response).toContain("terraform"); }); it("should simulate failures", async () => { (ctx.llm as MockLLM).setFailureRate(1.0); expect(ctx.llm.complete("test")).rejects.toThrow("LLM_ERROR"); }); it("should track call count", async () => { await ctx.llm.complete("prompt 1"); await ctx.llm.complete("prompt 2"); expect((ctx.llm as MockLLM).getCallCount()).toBe(2); }); }); describe("Instruction Packets", () => { it("should generate valid packets", () => { const packet = generateInstructionPacket("task-1", "agent-1", "Test objective"); expect(packet.agent_id).toBe("agent-1"); expect(packet.task_id).toBe("task-1"); expect(packet.error_budget.max_total_errors).toBe(5); }); }); }); } // ============================================================================= // CLI // ============================================================================= function parseArgs(): { mode: TestMode; validateOnly: boolean; verbose: boolean } { const args = process.argv.slice(2); let mode = TestMode.REAL; let validateOnly = false; let verbose = false; for (const arg of args) { if (arg === "--use-mocks") { mode = TestMode.MOCK; } else if (arg === "--hybrid") { mode = TestMode.HYBRID; } else if (arg === "--validate-only") { validateOnly = true; } else if (arg === "-v" || arg === "--verbose") { verbose = true; } } return { mode, validateOnly, verbose }; } if (import.meta.main) { const { mode, validateOnly, verbose } = parseArgs(); globalConfig.mode = mode; globalConfig.verbose = verbose; console.log("Agent Testing Framework"); console.log("=======================\n"); if (validateOnly) { // Just validate services console.log("Validating services...\n"); validateServices(globalConfig).then(report => { console.log(`Timestamp: ${report.timestamp}`); console.log(`Mode: ${report.mode}\n`); for (const r of report.results) { const icon = r.available ? "✅" : "❌"; console.log(`${icon} ${r.service}: ${r.available ? "available" : r.error} (${r.latencyMs}ms)`); } if (report.missingFiles.length > 0) { console.log("\n❌ Missing files:"); report.missingFiles.forEach(f => console.log(` - ${f}`)); } console.log(`\n${report.allServicesAvailable ? "✅ All services available" : "❌ Some services unavailable"}`); if (!report.allServicesAvailable && mode === TestMode.REAL) { console.log("\n⚠️ Use --use-mocks to run tests with mock services"); process.exit(1); } }); } else { // Run tests const harness = new TestHarness({ mode }); // Add pre-built scenarios harness.addScenario(CommonScenarios.happyPath(null)); harness.addScenario(CommonScenarios.errorBudgetExceeded(null)); harness.addScenario(CommonScenarios.stuckDetection()); harness.addScenario(CommonScenarios.conflictResolution()); // Add real service test if in real mode if (mode === TestMode.REAL) { harness.addScenario(CommonScenarios.realServiceConnectivity()); } harness.runAll().then(metrics => { console.log("\n" + "=".repeat(60)); console.log("TEST RESULTS"); console.log("=".repeat(60)); console.log(`Mode: ${mode.toUpperCase()}`); console.log(`Passed: ${metrics.passed}`); console.log(`Failed: ${metrics.failed}`); console.log(`Skipped: ${metrics.skipped}`); console.log(`Duration: ${metrics.duration}ms`); console.log(`Coverage: ${metrics.coverage.toFixed(1)}%`); if (metrics.mocksUsed.length > 0) { console.log(`\n⚠️ Mocks used: ${metrics.mocksUsed.join(", ")}`); } console.log("=".repeat(60)); process.exit(metrics.failed > 0 ? 1 : 0); }).catch(error => { console.error("\n❌ Test harness failed:", error.message); if (mode === TestMode.REAL) { console.log("\n💡 Tip: Use --use-mocks to run with mock services"); } process.exit(1); }); } }