Phase 8 Production Hardening with complete governance infrastructure: - Vault integration with tiered policies (T0-T4) - DragonflyDB state management - SQLite audit ledger - Pipeline DSL and templates - Promotion/revocation engine - Checkpoint system for session persistence - Health manager and circuit breaker for fault tolerance - GitHub/Slack integrations - Architectural test pipeline with bug watcher, suggestion engine, council review - Multi-agent chaos testing framework Test Results: - Governance tests: 68/68 passing - E2E workflow: 16/16 passing - Phase 2 Vault: 14/14 passing - Integration tests: 27/27 passing Coverage: 57.6% average across 12 phases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
657 lines
21 KiB
TypeScript
657 lines
21 KiB
TypeScript
/**
|
|
* Agent Testing Framework
|
|
* =======================
|
|
* Provides mocks, utilities, and test harnesses for agent development.
|
|
*/
|
|
|
|
import { describe, it, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test";
|
|
|
|
// =============================================================================
|
|
// Type Definitions
|
|
// =============================================================================
|
|
|
|
export interface TestContext {
|
|
taskId: string;
|
|
agentId: string;
|
|
startTime: number;
|
|
mockVault: MockVault;
|
|
mockDragonfly: MockDragonfly;
|
|
mockLLM: MockLLM;
|
|
}
|
|
|
|
export interface TestScenario {
|
|
name: string;
|
|
description: string;
|
|
setup: () => Promise<void>;
|
|
execute: (ctx: TestContext) => Promise<void>;
|
|
assertions: (ctx: TestContext) => Promise<void>;
|
|
cleanup: () => Promise<void>;
|
|
}
|
|
|
|
export interface TestMetrics {
|
|
passed: number;
|
|
failed: number;
|
|
skipped: number;
|
|
duration: number;
|
|
coverage: number;
|
|
}
|
|
|
|
// =============================================================================
|
|
// Mock Vault
|
|
// =============================================================================
|
|
|
|
export class MockVault {
|
|
private secrets: Map<string, any> = new Map();
|
|
private policies: Map<string, string[]> = new Map();
|
|
private tokens: Map<string, { policy: string; ttl: number; created: number }> = new Map();
|
|
private accessLog: Array<{ path: string; action: string; timestamp: number }> = [];
|
|
|
|
constructor() {
|
|
// Initialize with default test secrets
|
|
this.secrets.set("api-keys/openrouter", { api_key: "test-key" });
|
|
this.secrets.set("services/dragonfly", {
|
|
host: "127.0.0.1",
|
|
port: 6379,
|
|
password: "test-password",
|
|
});
|
|
|
|
// Default policies
|
|
this.policies.set("t0-observer", ["read:secret/data/docs/*", "read:secret/data/inventory/*"]);
|
|
this.policies.set("t1-operator", ["read:ssh/creds/sandbox-*", "read:proxmox/creds/sandbox"]);
|
|
}
|
|
|
|
async getSecret(path: string): Promise<any> {
|
|
this.accessLog.push({ path, action: "read", timestamp: Date.now() });
|
|
return this.secrets.get(path) || null;
|
|
}
|
|
|
|
setSecret(path: string, value: any): void {
|
|
this.secrets.set(path, value);
|
|
}
|
|
|
|
async createToken(policy: string, ttl: number = 3600): Promise<string> {
|
|
const token = "hvs.test-" + Math.random().toString(36).slice(2);
|
|
this.tokens.set(token, { policy, ttl, created: Date.now() });
|
|
return token;
|
|
}
|
|
|
|
async validateToken(token: string): Promise<boolean> {
|
|
const tokenData = this.tokens.get(token);
|
|
if (!tokenData) return false;
|
|
const elapsed = (Date.now() - tokenData.created) / 1000;
|
|
return elapsed < tokenData.ttl;
|
|
}
|
|
|
|
async revokeToken(token: string): Promise<void> {
|
|
this.tokens.delete(token);
|
|
}
|
|
|
|
checkAccess(token: string, path: string): boolean {
|
|
const tokenData = this.tokens.get(token);
|
|
if (!tokenData) return false;
|
|
const allowedPaths = this.policies.get(tokenData.policy) || [];
|
|
return allowedPaths.some(p => {
|
|
const pattern = p.replace("*", ".*");
|
|
return new RegExp(pattern).test(path);
|
|
});
|
|
}
|
|
|
|
getAccessLog(): Array<{ path: string; action: string; timestamp: number }> {
|
|
return this.accessLog;
|
|
}
|
|
|
|
reset(): void {
|
|
this.accessLog = [];
|
|
this.tokens.clear();
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Mock DragonflyDB
|
|
// =============================================================================
|
|
|
|
export class MockDragonfly {
|
|
private store: Map<string, any> = new Map();
|
|
private hashes: Map<string, Map<string, any>> = new Map();
|
|
private lists: Map<string, any[]> = new Map();
|
|
private expirations: Map<string, number> = new Map();
|
|
private subscribers: Map<string, Array<(msg: string) => void>> = new Map();
|
|
|
|
// String operations
|
|
async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null> {
|
|
if (options?.NX && this.store.has(key)) return null;
|
|
this.store.set(key, value);
|
|
if (options?.EX) {
|
|
this.expirations.set(key, Date.now() + options.EX * 1000);
|
|
}
|
|
return "OK";
|
|
}
|
|
|
|
async get(key: string): Promise<any> {
|
|
if (this.isExpired(key)) {
|
|
this.store.delete(key);
|
|
return null;
|
|
}
|
|
return this.store.get(key) || null;
|
|
}
|
|
|
|
async del(key: string): Promise<number> {
|
|
return this.store.delete(key) ? 1 : 0;
|
|
}
|
|
|
|
async exists(key: string): Promise<number> {
|
|
return this.store.has(key) && !this.isExpired(key) ? 1 : 0;
|
|
}
|
|
|
|
async expire(key: string, seconds: number): Promise<boolean> {
|
|
if (!this.store.has(key)) return false;
|
|
this.expirations.set(key, Date.now() + seconds * 1000);
|
|
return true;
|
|
}
|
|
|
|
// Hash operations
|
|
async hSet(key: string, field: string, value: any): Promise<number> {
|
|
if (!this.hashes.has(key)) this.hashes.set(key, new Map());
|
|
const existed = this.hashes.get(key)!.has(field);
|
|
this.hashes.get(key)!.set(field, value);
|
|
return existed ? 0 : 1;
|
|
}
|
|
|
|
async hGet(key: string, field: string): Promise<any> {
|
|
return this.hashes.get(key)?.get(field) || null;
|
|
}
|
|
|
|
async hGetAll(key: string): Promise<Record<string, any>> {
|
|
const hash = this.hashes.get(key);
|
|
if (!hash) return {};
|
|
const result: Record<string, any> = {};
|
|
for (const [k, v] of hash) {
|
|
result[k] = v;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
async hIncrBy(key: string, field: string, increment: number): Promise<number> {
|
|
if (!this.hashes.has(key)) this.hashes.set(key, new Map());
|
|
const current = parseInt(this.hashes.get(key)!.get(field) || "0");
|
|
const newValue = current + increment;
|
|
this.hashes.get(key)!.set(field, newValue.toString());
|
|
return newValue;
|
|
}
|
|
|
|
// List operations
|
|
async rPush(key: string, ...values: any[]): Promise<number> {
|
|
if (!this.lists.has(key)) this.lists.set(key, []);
|
|
this.lists.get(key)!.push(...values);
|
|
return this.lists.get(key)!.length;
|
|
}
|
|
|
|
async lRange(key: string, start: number, stop: number): Promise<any[]> {
|
|
const list = this.lists.get(key) || [];
|
|
if (stop < 0) stop = list.length + stop + 1;
|
|
return list.slice(start, stop);
|
|
}
|
|
|
|
// Pub/Sub
|
|
async subscribe(channel: string, handler: (msg: string) => void): Promise<void> {
|
|
if (!this.subscribers.has(channel)) this.subscribers.set(channel, []);
|
|
this.subscribers.get(channel)!.push(handler);
|
|
}
|
|
|
|
async publish(channel: string, message: string): Promise<number> {
|
|
const handlers = this.subscribers.get(channel) || [];
|
|
handlers.forEach(h => h(message));
|
|
return handlers.length;
|
|
}
|
|
|
|
async unsubscribe(channel?: string): Promise<void> {
|
|
if (channel) {
|
|
this.subscribers.delete(channel);
|
|
} else {
|
|
this.subscribers.clear();
|
|
}
|
|
}
|
|
|
|
// Utility
|
|
private isExpired(key: string): boolean {
|
|
const expiry = this.expirations.get(key);
|
|
if (!expiry) return false;
|
|
return Date.now() > expiry;
|
|
}
|
|
|
|
reset(): void {
|
|
this.store.clear();
|
|
this.hashes.clear();
|
|
this.lists.clear();
|
|
this.expirations.clear();
|
|
this.subscribers.clear();
|
|
}
|
|
|
|
// Test helpers
|
|
getState(): { store: Map<string, any>; hashes: Map<string, Map<string, any>>; lists: Map<string, any[]> } {
|
|
return { store: this.store, hashes: this.hashes, lists: this.lists };
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Mock LLM
|
|
// =============================================================================
|
|
|
|
export class MockLLM {
|
|
private responses: Map<string, string> = new Map();
|
|
private callLog: Array<{ prompt: string; response: string; timestamp: number }> = [];
|
|
private defaultResponse: string = '{"confidence": 0.5, "steps": [], "assumptions": []}';
|
|
private latencyMs: number = 0;
|
|
private failureRate: number = 0;
|
|
|
|
setResponse(pattern: string, response: string): void {
|
|
this.responses.set(pattern, response);
|
|
}
|
|
|
|
setDefaultResponse(response: string): void {
|
|
this.defaultResponse = response;
|
|
}
|
|
|
|
setLatency(ms: number): void {
|
|
this.latencyMs = ms;
|
|
}
|
|
|
|
setFailureRate(rate: number): void {
|
|
this.failureRate = rate;
|
|
}
|
|
|
|
async complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise<string> {
|
|
// Simulate latency
|
|
if (this.latencyMs > 0) {
|
|
await new Promise(r => setTimeout(r, this.latencyMs));
|
|
}
|
|
|
|
// Simulate failures
|
|
if (Math.random() < this.failureRate) {
|
|
throw new Error("LLM_ERROR: Simulated failure");
|
|
}
|
|
|
|
// Find matching response
|
|
let response = this.defaultResponse;
|
|
for (const [pattern, resp] of this.responses) {
|
|
if (prompt.toLowerCase().includes(pattern.toLowerCase())) {
|
|
response = resp;
|
|
break;
|
|
}
|
|
}
|
|
|
|
this.callLog.push({ prompt, response, timestamp: Date.now() });
|
|
return response;
|
|
}
|
|
|
|
getCallLog(): Array<{ prompt: string; response: string; timestamp: number }> {
|
|
return this.callLog;
|
|
}
|
|
|
|
getCallCount(): number {
|
|
return this.callLog.length;
|
|
}
|
|
|
|
reset(): void {
|
|
this.callLog = [];
|
|
this.latencyMs = 0;
|
|
this.failureRate = 0;
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test Utilities
|
|
// =============================================================================
|
|
|
|
export function createTestContext(overrides?: Partial<TestContext>): TestContext {
|
|
return {
|
|
taskId: "test-task-" + Math.random().toString(36).slice(2, 8),
|
|
agentId: "test-agent-" + Math.random().toString(36).slice(2, 8),
|
|
startTime: Date.now(),
|
|
mockVault: new MockVault(),
|
|
mockDragonfly: new MockDragonfly(),
|
|
mockLLM: new MockLLM(),
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
export async function withTimeout<T>(promise: Promise<T>, ms: number, message: string = "Timeout"): Promise<T> {
|
|
const timeout = new Promise<never>((_, reject) => {
|
|
setTimeout(() => reject(new Error(message)), ms);
|
|
});
|
|
return Promise.race([promise, timeout]);
|
|
}
|
|
|
|
export function generateInstructionPacket(taskId: string, agentId: string, objective: string) {
|
|
return {
|
|
agent_id: agentId,
|
|
task_id: taskId,
|
|
created_for: "Test Task",
|
|
objective,
|
|
deliverables: ["plan", "report"],
|
|
constraints: {
|
|
scope: ["sandbox only"],
|
|
forbidden: ["no prod access"],
|
|
required_steps: ["plan before execute"],
|
|
},
|
|
success_criteria: ["plan generated"],
|
|
error_budget: {
|
|
max_total_errors: 5,
|
|
max_same_error_repeats: 2,
|
|
max_procedure_violations: 1,
|
|
},
|
|
escalation_rules: ["If confidence < 0.7 -> escalate"],
|
|
created_at: new Date().toISOString(),
|
|
};
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test Harness
|
|
// =============================================================================
|
|
|
|
export class TestHarness {
|
|
private scenarios: TestScenario[] = [];
|
|
private results: Map<string, { passed: boolean; error?: string; duration: number }> = new Map();
|
|
|
|
addScenario(scenario: TestScenario): void {
|
|
this.scenarios.push(scenario);
|
|
}
|
|
|
|
async runAll(): Promise<TestMetrics> {
|
|
const startTime = Date.now();
|
|
let passed = 0;
|
|
let failed = 0;
|
|
|
|
for (const scenario of this.scenarios) {
|
|
const scenarioStart = Date.now();
|
|
const ctx = createTestContext();
|
|
|
|
try {
|
|
console.log(`\n[TEST] Running: ${scenario.name}`);
|
|
await scenario.setup();
|
|
await scenario.execute(ctx);
|
|
await scenario.assertions(ctx);
|
|
await scenario.cleanup();
|
|
|
|
this.results.set(scenario.name, {
|
|
passed: true,
|
|
duration: Date.now() - scenarioStart,
|
|
});
|
|
passed++;
|
|
console.log(`[PASS] ${scenario.name} (${Date.now() - scenarioStart}ms)`);
|
|
} catch (error: any) {
|
|
this.results.set(scenario.name, {
|
|
passed: false,
|
|
error: error.message,
|
|
duration: Date.now() - scenarioStart,
|
|
});
|
|
failed++;
|
|
console.log(`[FAIL] ${scenario.name}: ${error.message}`);
|
|
|
|
try {
|
|
await scenario.cleanup();
|
|
} catch {}
|
|
}
|
|
}
|
|
|
|
return {
|
|
passed,
|
|
failed,
|
|
skipped: 0,
|
|
duration: Date.now() - startTime,
|
|
coverage: this.scenarios.length > 0 ? (passed / this.scenarios.length) * 100 : 0,
|
|
};
|
|
}
|
|
|
|
getResults(): Map<string, { passed: boolean; error?: string; duration: number }> {
|
|
return this.results;
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Pre-built Test Scenarios
|
|
// =============================================================================
|
|
|
|
export const CommonScenarios = {
|
|
// Happy path - agent completes successfully
|
|
happyPath: (AgentClass: any): TestScenario => ({
|
|
name: "Happy Path - Successful Completion",
|
|
description: "Agent completes all phases without errors",
|
|
setup: async () => {},
|
|
execute: async (ctx) => {
|
|
// Set up successful LLM responses
|
|
ctx.mockLLM.setResponse("plan", JSON.stringify({
|
|
title: "Test Plan",
|
|
confidence: 0.85,
|
|
steps: [{ step: 1, action: "Test action" }],
|
|
}));
|
|
|
|
// Create instruction packet
|
|
const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
|
|
await ctx.mockDragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));
|
|
},
|
|
assertions: async (ctx) => {
|
|
// Check state reached EXIT
|
|
const stateStr = await ctx.mockDragonfly.get(`agent:${ctx.agentId}:state`);
|
|
if (stateStr) {
|
|
const state = JSON.parse(stateStr);
|
|
expect(state.phase).toBe("EXIT");
|
|
expect(state.status).toBe("COMPLETED");
|
|
}
|
|
},
|
|
cleanup: async () => {},
|
|
}),
|
|
|
|
// Error budget exceeded
|
|
errorBudgetExceeded: (AgentClass: any): TestScenario => ({
|
|
name: "Error Budget Exceeded - Revocation",
|
|
description: "Agent is revoked when error budget is exceeded",
|
|
setup: async () => {},
|
|
execute: async (ctx) => {
|
|
ctx.mockLLM.setFailureRate(1.0); // All LLM calls fail
|
|
|
|
const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
|
|
packet.error_budget.max_total_errors = 2;
|
|
await ctx.mockDragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));
|
|
|
|
// Simulate errors
|
|
await ctx.mockDragonfly.hIncrBy(`agent:${ctx.agentId}:errors`, "total_errors", 3);
|
|
},
|
|
assertions: async (ctx) => {
|
|
const stateStr = await ctx.mockDragonfly.get(`agent:${ctx.agentId}:state`);
|
|
if (stateStr) {
|
|
const state = JSON.parse(stateStr);
|
|
expect(state.status).toBe("REVOKED");
|
|
}
|
|
},
|
|
cleanup: async () => {},
|
|
}),
|
|
|
|
// Stuck detection
|
|
stuckDetection: (): TestScenario => ({
|
|
name: "Stuck Detection - GAMMA Spawn",
|
|
description: "GAMMA is spawned when agents are stuck",
|
|
setup: async () => {},
|
|
execute: async (ctx) => {
|
|
// Set up agent state as stuck (old last_activity)
|
|
const stuckState = {
|
|
agent_id: ctx.agentId,
|
|
role: "ALPHA",
|
|
status: "WORKING",
|
|
last_activity: new Date(Date.now() - 60000).toISOString(), // 60 seconds ago
|
|
};
|
|
await ctx.mockDragonfly.hSet(`agents:${ctx.taskId}`, "ALPHA", JSON.stringify(stuckState));
|
|
},
|
|
assertions: async (ctx) => {
|
|
// Check that stuck would be detected
|
|
const stateStr = await ctx.mockDragonfly.hGet(`agents:${ctx.taskId}`, "ALPHA");
|
|
if (stateStr) {
|
|
const state = JSON.parse(stateStr);
|
|
const inactivity = (Date.now() - new Date(state.last_activity).getTime()) / 1000;
|
|
expect(inactivity).toBeGreaterThan(30);
|
|
}
|
|
},
|
|
cleanup: async () => {},
|
|
}),
|
|
|
|
// Conflict resolution
|
|
conflictResolution: (): TestScenario => ({
|
|
name: "Conflict Resolution",
|
|
description: "Multiple proposals lead to conflict detection",
|
|
setup: async () => {},
|
|
execute: async (ctx) => {
|
|
// Simulate conflicting proposals
|
|
await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_1", JSON.stringify({
|
|
author: "ALPHA",
|
|
value: { approach: "Approach A", confidence: 0.8 },
|
|
}));
|
|
await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_2", JSON.stringify({
|
|
author: "ALPHA",
|
|
value: { approach: "Approach B", confidence: 0.7 },
|
|
}));
|
|
|
|
// BETA rejects proposal_2
|
|
await ctx.mockDragonfly.hSet(`blackboard:${ctx.taskId}:progress`, "eval_proposal_2", JSON.stringify({
|
|
accepted: false,
|
|
score: 0.5,
|
|
}));
|
|
|
|
await ctx.mockDragonfly.hIncrBy(`metrics:${ctx.taskId}`, "conflicts_detected", 1);
|
|
},
|
|
assertions: async (ctx) => {
|
|
const conflicts = await ctx.mockDragonfly.hGet(`metrics:${ctx.taskId}`, "conflicts_detected");
|
|
expect(parseInt(conflicts || "0")).toBeGreaterThan(0);
|
|
},
|
|
cleanup: async () => {},
|
|
}),
|
|
};
|
|
|
|
// =============================================================================
|
|
// Example Test Suite
|
|
// =============================================================================
|
|
|
|
export function runExampleTests() {
|
|
describe("Agent Governance Tests", () => {
|
|
let ctx: TestContext;
|
|
|
|
beforeEach(() => {
|
|
ctx = createTestContext();
|
|
});
|
|
|
|
describe("MockVault", () => {
|
|
it("should store and retrieve secrets", async () => {
|
|
ctx.mockVault.setSecret("test/secret", { key: "value" });
|
|
const secret = await ctx.mockVault.getSecret("test/secret");
|
|
expect(secret.key).toBe("value");
|
|
});
|
|
|
|
it("should create and validate tokens", async () => {
|
|
const token = await ctx.mockVault.createToken("t0-observer", 60);
|
|
expect(token).toStartWith("hvs.test-");
|
|
expect(await ctx.mockVault.validateToken(token)).toBe(true);
|
|
});
|
|
|
|
it("should revoke tokens", async () => {
|
|
const token = await ctx.mockVault.createToken("t0-observer");
|
|
await ctx.mockVault.revokeToken(token);
|
|
expect(await ctx.mockVault.validateToken(token)).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe("MockDragonfly", () => {
|
|
it("should handle string operations", async () => {
|
|
await ctx.mockDragonfly.set("key", "value");
|
|
expect(await ctx.mockDragonfly.get("key")).toBe("value");
|
|
});
|
|
|
|
it("should handle hash operations", async () => {
|
|
await ctx.mockDragonfly.hSet("hash", "field", "value");
|
|
expect(await ctx.mockDragonfly.hGet("hash", "field")).toBe("value");
|
|
});
|
|
|
|
it("should handle list operations", async () => {
|
|
await ctx.mockDragonfly.rPush("list", "a", "b", "c");
|
|
const items = await ctx.mockDragonfly.lRange("list", 0, -1);
|
|
expect(items).toEqual(["a", "b", "c"]);
|
|
});
|
|
|
|
it("should handle expiration", async () => {
|
|
await ctx.mockDragonfly.set("expiring", "value", { EX: 1 });
|
|
expect(await ctx.mockDragonfly.get("expiring")).toBe("value");
|
|
// Note: In real tests, we'd wait for expiration
|
|
});
|
|
|
|
it("should handle NX option", async () => {
|
|
await ctx.mockDragonfly.set("existing", "first");
|
|
const result = await ctx.mockDragonfly.set("existing", "second", { NX: true });
|
|
expect(result).toBeNull();
|
|
expect(await ctx.mockDragonfly.get("existing")).toBe("first");
|
|
});
|
|
});
|
|
|
|
describe("MockLLM", () => {
|
|
it("should return default response", async () => {
|
|
const response = await ctx.mockLLM.complete("test prompt");
|
|
expect(response).toContain("confidence");
|
|
});
|
|
|
|
it("should match patterns", async () => {
|
|
ctx.mockLLM.setResponse("terraform", '{"tool": "terraform"}');
|
|
const response = await ctx.mockLLM.complete("Create a terraform plan");
|
|
expect(response).toContain("terraform");
|
|
});
|
|
|
|
it("should simulate failures", async () => {
|
|
ctx.mockLLM.setFailureRate(1.0);
|
|
expect(ctx.mockLLM.complete("test")).rejects.toThrow("LLM_ERROR");
|
|
});
|
|
|
|
it("should track call count", async () => {
|
|
await ctx.mockLLM.complete("prompt 1");
|
|
await ctx.mockLLM.complete("prompt 2");
|
|
expect(ctx.mockLLM.getCallCount()).toBe(2);
|
|
});
|
|
});
|
|
|
|
describe("Instruction Packets", () => {
|
|
it("should generate valid packets", () => {
|
|
const packet = generateInstructionPacket("task-1", "agent-1", "Test objective");
|
|
expect(packet.agent_id).toBe("agent-1");
|
|
expect(packet.task_id).toBe("task-1");
|
|
expect(packet.error_budget.max_total_errors).toBe(5);
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
// =============================================================================
|
|
// CLI
|
|
// =============================================================================
|
|
|
|
if (import.meta.main) {
|
|
console.log("Agent Testing Framework");
|
|
console.log("=======================\n");
|
|
|
|
const harness = new TestHarness();
|
|
|
|
// Add pre-built scenarios
|
|
harness.addScenario(CommonScenarios.happyPath(null));
|
|
harness.addScenario(CommonScenarios.errorBudgetExceeded(null));
|
|
harness.addScenario(CommonScenarios.stuckDetection());
|
|
harness.addScenario(CommonScenarios.conflictResolution());
|
|
|
|
harness.runAll().then(metrics => {
|
|
console.log("\n" + "=".repeat(50));
|
|
console.log("TEST RESULTS");
|
|
console.log("=".repeat(50));
|
|
console.log(`Passed: ${metrics.passed}`);
|
|
console.log(`Failed: ${metrics.failed}`);
|
|
console.log(`Skipped: ${metrics.skipped}`);
|
|
console.log(`Duration: ${metrics.duration}ms`);
|
|
console.log(`Coverage: ${metrics.coverage.toFixed(1)}%`);
|
|
console.log("=".repeat(50));
|
|
|
|
process.exit(metrics.failed > 0 ? 1 : 0);
|
|
});
|
|
}
|