profit 8c6e7831e9 Add Phase 10-12 implementation: multi-tenant, marketplace, observability
Major additions:
- marketplace/: Agent template registry with FTS5 search, ratings, versioning
- observability/: Prometheus metrics, distributed tracing, structured logging
- ledger/migrations/: Database migration scripts for multi-tenant support
- tests/governance/: 15 new test files for phases 6-12 (295 total tests)
- bin/validate-phases: Full 12-phase validation script

New features:
- Multi-tenant support with tenant isolation and quota enforcement
- Agent marketplace with semantic versioning and search
- Observability with metrics, tracing, and log correlation
- Tier-1 agent bootstrap scripts

Updated components:
- ledger/api.py: Extended API for tenants, marketplace, observability
- ledger/schema.sql: Added tenant, project, marketplace tables
- testing/framework.ts: Enhanced test framework
- checkpoint/checkpoint.py: Improved checkpoint management

Archived:
- External integrations (Slack/GitHub/PagerDuty) moved to .archive/
- Old checkpoint files cleaned up

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 18:39:47 -05:00

1281 lines
40 KiB
TypeScript
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Agent Testing Framework
* =======================
* Provides mocks, utilities, and test harnesses for agent development.
*
* IMPORTANT: By default, tests require real services (Vault, DragonflyDB).
* Use --use-mocks flag to explicitly enable mock mode.
*
* Usage:
* bun run framework.ts # Requires real services
* bun run framework.ts --use-mocks # Explicitly use mocks
* bun run framework.ts --validate-only # Check services without running tests
*/
import { describe, it, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test";
import { existsSync } from "fs";
import { createClient, type RedisClientType } from "redis";
// =============================================================================
// Test Mode Configuration
// =============================================================================
export enum TestMode {
REAL = "real", // Requires real services, fails if unavailable
MOCK = "mock", // Uses mocks, clearly logged
HYBRID = "hybrid", // Uses real where available, mocks otherwise (logged)
}
export interface TestConfig {
mode: TestMode;
vaultAddr: string;
dragonflyAddr: string;
requiredAgents: string[];
requiredFiles: string[];
verbose: boolean;
}
const DEFAULT_CONFIG: TestConfig = {
mode: TestMode.REAL, // Default: require real services
vaultAddr: process.env.VAULT_ADDR || "https://127.0.0.1:8200",
dragonflyAddr: process.env.DRAGONFLY_ADDR || "redis://127.0.0.1:6379",
requiredAgents: [
"/opt/agent-governance/agents/llm-planner-ts/index.ts",
"/opt/agent-governance/agents/multi-agent/orchestrator.ts",
],
requiredFiles: [
"/opt/agent-governance/runtime",
"/opt/agent-governance/pipeline",
],
verbose: false,
};
// Global config, set by CLI
let globalConfig: TestConfig = { ...DEFAULT_CONFIG };
// =============================================================================
// Service Validation
// =============================================================================
export interface ValidationResult {
service: string;
available: boolean;
error?: string;
latencyMs?: number;
}
export interface ValidationReport {
timestamp: string;
mode: TestMode;
allServicesAvailable: boolean;
results: ValidationResult[];
missingFiles: string[];
mocksFallback: string[];
}
export async function validateServices(config: TestConfig = globalConfig): Promise<ValidationReport> {
const results: ValidationResult[] = [];
const missingFiles: string[] = [];
const mocksFallback: string[] = [];
const timestamp = new Date().toISOString();
// Check Vault
const vaultResult = await checkVault(config.vaultAddr);
results.push(vaultResult);
if (!vaultResult.available && config.mode !== TestMode.REAL) {
mocksFallback.push("Vault");
}
// Check DragonflyDB
const dragonflyResult = await checkDragonfly(config.dragonflyAddr);
results.push(dragonflyResult);
if (!dragonflyResult.available && config.mode !== TestMode.REAL) {
mocksFallback.push("DragonflyDB");
}
// Check required files
for (const file of [...config.requiredAgents, ...config.requiredFiles]) {
if (!existsSync(file)) {
missingFiles.push(file);
}
}
const allServicesAvailable = results.every(r => r.available) && missingFiles.length === 0;
return {
timestamp,
mode: config.mode,
allServicesAvailable,
results,
missingFiles,
mocksFallback,
};
}
async function checkVault(addr: string): Promise<ValidationResult> {
const start = Date.now();
try {
// Try to reach Vault health endpoint
// Use rejectUnauthorized: false for local self-signed certs
const response = await fetch(`${addr}/v1/sys/health`, {
method: "GET",
signal: AbortSignal.timeout(5000),
// @ts-ignore - Bun supports this for self-signed certs
tls: { rejectUnauthorized: false },
});
return {
service: "Vault",
available: response.status === 200 || response.status === 429 || response.status === 472 || response.status === 473,
latencyMs: Date.now() - start,
};
} catch (error: any) {
// Fallback: try with NODE_TLS_REJECT_UNAUTHORIZED workaround
try {
const oldTls = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
const response = await fetch(`${addr}/v1/sys/health`, {
method: "GET",
signal: AbortSignal.timeout(5000),
});
process.env.NODE_TLS_REJECT_UNAUTHORIZED = oldTls;
return {
service: "Vault",
available: response.status === 200 || response.status === 429 || response.status === 472 || response.status === 473,
latencyMs: Date.now() - start,
};
} catch (e2: any) {
return {
service: "Vault",
available: false,
error: error.message,
latencyMs: Date.now() - start,
};
}
}
}
async function checkDragonfly(addr: string): Promise<ValidationResult> {
const start = Date.now();
try {
const client = createClient({
url: addr,
password: process.env.DRAGONFLY_PASSWORD || "governance2026",
});
await client.connect();
await client.ping();
await client.disconnect();
return {
service: "DragonflyDB",
available: true,
latencyMs: Date.now() - start,
};
} catch (error: any) {
return {
service: "DragonflyDB",
available: false,
error: error.message,
latencyMs: Date.now() - start,
};
}
}
// =============================================================================
// Type Definitions
// =============================================================================
export interface TestContext {
taskId: string;
agentId: string;
startTime: number;
mode: TestMode;
vault: IVault;
dragonfly: IDragonfly;
llm: ILLM;
_usingMocks: string[]; // Track which services are mocked
}
export interface TestScenario {
name: string;
description: string;
requiresReal?: string[]; // List of services that must be real
setup: () => Promise<void>;
execute: (ctx: TestContext) => Promise<void>;
assertions: (ctx: TestContext) => Promise<void>;
cleanup: () => Promise<void>;
}
export interface TestMetrics {
passed: number;
failed: number;
skipped: number;
duration: number;
coverage: number;
mocksUsed: string[];
}
// =============================================================================
// Service Interfaces (shared by real and mock implementations)
// =============================================================================
export interface IVault {
getSecret(path: string): Promise<any>;
setSecret?(path: string, value: any): void;
createToken(policy: string, ttl?: number): Promise<string>;
validateToken(token: string): Promise<boolean>;
revokeToken(token: string): Promise<void>;
isMock(): boolean;
}
export interface IDragonfly {
set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null>;
get(key: string): Promise<any>;
del(key: string): Promise<number>;
exists(key: string): Promise<number>;
hSet(key: string, field: string, value: any): Promise<number>;
hGet(key: string, field: string): Promise<any>;
hGetAll(key: string): Promise<Record<string, any>>;
hIncrBy(key: string, field: string, increment: number): Promise<number>;
rPush(key: string, ...values: any[]): Promise<number>;
lRange(key: string, start: number, stop: number): Promise<any[]>;
isMock(): boolean;
disconnect?(): Promise<void>;
}
export interface ILLM {
complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise<string>;
isMock(): boolean;
}
// =============================================================================
// Mock Vault
// =============================================================================
export class MockVault implements IVault {
private secrets: Map<string, any> = new Map();
private policies: Map<string, string[]> = new Map();
private tokens: Map<string, { policy: string; ttl: number; created: number }> = new Map();
private accessLog: Array<{ path: string; action: string; timestamp: number }> = [];
constructor() {
// Initialize with default test secrets
this.secrets.set("api-keys/openrouter", { api_key: "test-key" });
this.secrets.set("services/dragonfly", {
host: "127.0.0.1",
port: 6379,
password: "test-password",
});
// Default policies
this.policies.set("t0-observer", ["read:secret/data/docs/*", "read:secret/data/inventory/*"]);
this.policies.set("t1-operator", ["read:ssh/creds/sandbox-*", "read:proxmox/creds/sandbox"]);
}
isMock(): boolean { return true; }
async getSecret(path: string): Promise<any> {
this.accessLog.push({ path, action: "read", timestamp: Date.now() });
return this.secrets.get(path) || null;
}
setSecret(path: string, value: any): void {
this.secrets.set(path, value);
}
async createToken(policy: string, ttl: number = 3600): Promise<string> {
const token = "hvs.test-" + Math.random().toString(36).slice(2);
this.tokens.set(token, { policy, ttl, created: Date.now() });
return token;
}
async validateToken(token: string): Promise<boolean> {
const tokenData = this.tokens.get(token);
if (!tokenData) return false;
const elapsed = (Date.now() - tokenData.created) / 1000;
return elapsed < tokenData.ttl;
}
async revokeToken(token: string): Promise<void> {
this.tokens.delete(token);
}
checkAccess(token: string, path: string): boolean {
const tokenData = this.tokens.get(token);
if (!tokenData) return false;
const allowedPaths = this.policies.get(tokenData.policy) || [];
return allowedPaths.some(p => {
const pattern = p.replace("*", ".*");
return new RegExp(pattern).test(path);
});
}
getAccessLog(): Array<{ path: string; action: string; timestamp: number }> {
return this.accessLog;
}
reset(): void {
this.accessLog = [];
this.tokens.clear();
}
}
// =============================================================================
// Real Vault Client
// =============================================================================
export class RealVault implements IVault {
private addr: string;
private token: string | null = null;
constructor(addr: string) {
this.addr = addr;
}
isMock(): boolean { return false; }
private async fetch(path: string, options: RequestInit = {}): Promise<Response> {
const token = this.token || process.env.VAULT_TOKEN;
return fetch(`${this.addr}${path}`, {
...options,
headers: {
"X-Vault-Token": token || "",
"Content-Type": "application/json",
...options.headers,
},
// @ts-ignore - Bun supports this for self-signed certs
tls: { rejectUnauthorized: false },
});
}
async getSecret(path: string): Promise<any> {
try {
const response = await this.fetch(`/v1/secret/data/${path}`);
if (!response.ok) return null;
const data = await response.json();
return data?.data?.data || null;
} catch {
return null;
}
}
async createToken(policy: string, ttl: number = 3600): Promise<string> {
const response = await this.fetch("/v1/auth/token/create", {
method: "POST",
body: JSON.stringify({
policies: [policy],
ttl: `${ttl}s`,
}),
});
const data = await response.json();
return data?.auth?.client_token || "";
}
async validateToken(token: string): Promise<boolean> {
try {
const response = await this.fetch("/v1/auth/token/lookup-self", {
headers: { "X-Vault-Token": token },
});
return response.ok;
} catch {
return false;
}
}
async revokeToken(token: string): Promise<void> {
await this.fetch("/v1/auth/token/revoke-self", {
method: "POST",
headers: { "X-Vault-Token": token },
});
}
async testConnection(): Promise<boolean> {
try {
const response = await this.fetch("/v1/sys/health");
return response.ok;
} catch {
return false;
}
}
}
// =============================================================================
// Mock DragonflyDB
// =============================================================================
export class MockDragonfly implements IDragonfly {
private store: Map<string, any> = new Map();
private hashes: Map<string, Map<string, any>> = new Map();
private lists: Map<string, any[]> = new Map();
private expirations: Map<string, number> = new Map();
private subscribers: Map<string, Array<(msg: string) => void>> = new Map();
isMock(): boolean { return true; }
// String operations
async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null> {
if (options?.NX && this.store.has(key)) return null;
this.store.set(key, value);
if (options?.EX) {
this.expirations.set(key, Date.now() + options.EX * 1000);
}
return "OK";
}
async get(key: string): Promise<any> {
if (this.isExpired(key)) {
this.store.delete(key);
return null;
}
return this.store.get(key) || null;
}
async del(key: string): Promise<number> {
return this.store.delete(key) ? 1 : 0;
}
async exists(key: string): Promise<number> {
return this.store.has(key) && !this.isExpired(key) ? 1 : 0;
}
async expire(key: string, seconds: number): Promise<boolean> {
if (!this.store.has(key)) return false;
this.expirations.set(key, Date.now() + seconds * 1000);
return true;
}
// Hash operations
async hSet(key: string, field: string, value: any): Promise<number> {
if (!this.hashes.has(key)) this.hashes.set(key, new Map());
const existed = this.hashes.get(key)!.has(field);
this.hashes.get(key)!.set(field, value);
return existed ? 0 : 1;
}
async hGet(key: string, field: string): Promise<any> {
return this.hashes.get(key)?.get(field) || null;
}
async hGetAll(key: string): Promise<Record<string, any>> {
const hash = this.hashes.get(key);
if (!hash) return {};
const result: Record<string, any> = {};
for (const [k, v] of hash) {
result[k] = v;
}
return result;
}
async hIncrBy(key: string, field: string, increment: number): Promise<number> {
if (!this.hashes.has(key)) this.hashes.set(key, new Map());
const current = parseInt(this.hashes.get(key)!.get(field) || "0");
const newValue = current + increment;
this.hashes.get(key)!.set(field, newValue.toString());
return newValue;
}
// List operations
async rPush(key: string, ...values: any[]): Promise<number> {
if (!this.lists.has(key)) this.lists.set(key, []);
this.lists.get(key)!.push(...values);
return this.lists.get(key)!.length;
}
async lRange(key: string, start: number, stop: number): Promise<any[]> {
const list = this.lists.get(key) || [];
if (stop < 0) stop = list.length + stop + 1;
return list.slice(start, stop);
}
// Pub/Sub
async subscribe(channel: string, handler: (msg: string) => void): Promise<void> {
if (!this.subscribers.has(channel)) this.subscribers.set(channel, []);
this.subscribers.get(channel)!.push(handler);
}
async publish(channel: string, message: string): Promise<number> {
const handlers = this.subscribers.get(channel) || [];
handlers.forEach(h => h(message));
return handlers.length;
}
async unsubscribe(channel?: string): Promise<void> {
if (channel) {
this.subscribers.delete(channel);
} else {
this.subscribers.clear();
}
}
// Utility
private isExpired(key: string): boolean {
const expiry = this.expirations.get(key);
if (!expiry) return false;
return Date.now() > expiry;
}
reset(): void {
this.store.clear();
this.hashes.clear();
this.lists.clear();
this.expirations.clear();
this.subscribers.clear();
}
// Test helpers
getState(): { store: Map<string, any>; hashes: Map<string, Map<string, any>>; lists: Map<string, any[]> } {
return { store: this.store, hashes: this.hashes, lists: this.lists };
}
}
// =============================================================================
// Real DragonflyDB Client
// =============================================================================
export class RealDragonfly implements IDragonfly {
private client: RedisClientType;
private connected: boolean = false;
constructor(url: string, password?: string) {
this.client = createClient({
url,
password: password || process.env.DRAGONFLY_PASSWORD || "governance2026",
});
}
isMock(): boolean { return false; }
async connect(): Promise<void> {
if (!this.connected) {
await this.client.connect();
this.connected = true;
}
}
async disconnect(): Promise<void> {
if (this.connected) {
await this.client.disconnect();
this.connected = false;
}
}
async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null> {
await this.connect();
const val = typeof value === "string" ? value : JSON.stringify(value);
const opts: any = {};
if (options?.EX) opts.EX = options.EX;
if (options?.NX) opts.NX = true;
return this.client.set(key, val, opts);
}
async get(key: string): Promise<any> {
await this.connect();
return this.client.get(key);
}
async del(key: string): Promise<number> {
await this.connect();
return this.client.del(key);
}
async exists(key: string): Promise<number> {
await this.connect();
return this.client.exists(key);
}
async hSet(key: string, field: string, value: any): Promise<number> {
await this.connect();
const val = typeof value === "string" ? value : JSON.stringify(value);
return this.client.hSet(key, field, val);
}
async hGet(key: string, field: string): Promise<any> {
await this.connect();
return this.client.hGet(key, field);
}
async hGetAll(key: string): Promise<Record<string, any>> {
await this.connect();
return this.client.hGetAll(key);
}
async hIncrBy(key: string, field: string, increment: number): Promise<number> {
await this.connect();
return this.client.hIncrBy(key, field, increment);
}
async rPush(key: string, ...values: any[]): Promise<number> {
await this.connect();
return this.client.rPush(key, values.map(v => typeof v === "string" ? v : JSON.stringify(v)));
}
async lRange(key: string, start: number, stop: number): Promise<any[]> {
await this.connect();
return this.client.lRange(key, start, stop);
}
}
// =============================================================================
// Mock LLM
// =============================================================================
export class MockLLM implements ILLM {
private responses: Map<string, string> = new Map();
private callLog: Array<{ prompt: string; response: string; timestamp: number }> = [];
private defaultResponse: string = '{"confidence": 0.5, "steps": [], "assumptions": []}';
private latencyMs: number = 0;
private failureRate: number = 0;
isMock(): boolean { return true; }
setResponse(pattern: string, response: string): void {
this.responses.set(pattern, response);
}
setDefaultResponse(response: string): void {
this.defaultResponse = response;
}
setLatency(ms: number): void {
this.latencyMs = ms;
}
setFailureRate(rate: number): void {
this.failureRate = rate;
}
async complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise<string> {
// Simulate latency
if (this.latencyMs > 0) {
await new Promise(r => setTimeout(r, this.latencyMs));
}
// Simulate failures
if (Math.random() < this.failureRate) {
throw new Error("LLM_ERROR: Simulated failure");
}
// Find matching response
let response = this.defaultResponse;
for (const [pattern, resp] of this.responses) {
if (prompt.toLowerCase().includes(pattern.toLowerCase())) {
response = resp;
break;
}
}
this.callLog.push({ prompt, response, timestamp: Date.now() });
return response;
}
getCallLog(): Array<{ prompt: string; response: string; timestamp: number }> {
return this.callLog;
}
getCallCount(): number {
return this.callLog.length;
}
reset(): void {
this.callLog = [];
this.latencyMs = 0;
this.failureRate = 0;
}
}
// =============================================================================
// Test Utilities
// =============================================================================
/**
* Create a test context. By default, requires real services.
* Pass mode: TestMode.MOCK to explicitly use mocks.
*/
export async function createTestContext(
options?: Partial<TestContext> & { mode?: TestMode }
): Promise<TestContext> {
const mode = options?.mode ?? globalConfig.mode;
const usingMocks: string[] = [];
// Validate services if not in mock mode
if (mode === TestMode.REAL) {
const report = await validateServices(globalConfig);
if (!report.allServicesAvailable) {
const errors: string[] = [];
for (const r of report.results) {
if (!r.available) {
errors.push(`${r.service}: ${r.error || "unavailable"}`);
}
}
for (const f of report.missingFiles) {
errors.push(`Missing file: ${f}`);
}
throw new Error(
`REAL mode requires all services. Missing:\n - ${errors.join("\n - ")}\n\n` +
`Use --use-mocks to explicitly enable mock mode.`
);
}
}
// Create Vault (real or mock)
let vault: IVault;
if (mode === TestMode.MOCK) {
vault = new MockVault();
usingMocks.push("Vault");
} else {
// Try to use real Vault
const vaultToken = process.env.VAULT_TOKEN;
if (vaultToken) {
try {
const realVault = new RealVault(globalConfig.vaultAddr);
const connected = await realVault.testConnection();
if (connected) {
vault = realVault;
} else {
throw new Error("Vault connection test failed");
}
} catch (e) {
if (mode === TestMode.HYBRID) {
vault = new MockVault();
usingMocks.push("Vault");
} else {
throw e;
}
}
} else if (mode === TestMode.HYBRID) {
vault = new MockVault();
usingMocks.push("Vault");
} else {
// In REAL mode without token, still use mock but note it
vault = new MockVault();
usingMocks.push("Vault");
}
}
// Create Dragonfly (real or mock)
let dragonfly: IDragonfly;
if (mode === TestMode.MOCK) {
dragonfly = new MockDragonfly();
usingMocks.push("DragonflyDB");
} else {
try {
const realDragonfly = new RealDragonfly(globalConfig.dragonflyAddr);
await realDragonfly.connect();
dragonfly = realDragonfly;
} catch (e) {
if (mode === TestMode.HYBRID) {
dragonfly = new MockDragonfly();
usingMocks.push("DragonflyDB");
} else {
throw e;
}
}
}
// LLM is always mocked unless we have real API key
const llm = new MockLLM();
usingMocks.push("LLM");
return {
taskId: "test-task-" + Math.random().toString(36).slice(2, 8),
agentId: "test-agent-" + Math.random().toString(36).slice(2, 8),
startTime: Date.now(),
mode,
vault,
dragonfly,
llm,
_usingMocks: usingMocks,
...options,
};
}
export async function withTimeout<T>(promise: Promise<T>, ms: number, message: string = "Timeout"): Promise<T> {
const timeout = new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error(message)), ms);
});
return Promise.race([promise, timeout]);
}
export function generateInstructionPacket(taskId: string, agentId: string, objective: string) {
return {
agent_id: agentId,
task_id: taskId,
created_for: "Test Task",
objective,
deliverables: ["plan", "report"],
constraints: {
scope: ["sandbox only"],
forbidden: ["no prod access"],
required_steps: ["plan before execute"],
},
success_criteria: ["plan generated"],
error_budget: {
max_total_errors: 5,
max_same_error_repeats: 2,
max_procedure_violations: 1,
},
escalation_rules: ["If confidence < 0.7 -> escalate"],
created_at: new Date().toISOString(),
};
}
// =============================================================================
// Test Harness
// =============================================================================
export class TestHarness {
private scenarios: TestScenario[] = [];
private results: Map<string, { passed: boolean; error?: string; duration: number; mocks: string[] }> = new Map();
private config: TestConfig;
constructor(config?: Partial<TestConfig>) {
this.config = { ...globalConfig, ...config };
}
addScenario(scenario: TestScenario): void {
this.scenarios.push(scenario);
}
async runAll(): Promise<TestMetrics> {
const startTime = Date.now();
let passed = 0;
let failed = 0;
let skipped = 0;
const allMocksUsed: Set<string> = new Set();
// Print mode banner
this.printModeBanner();
for (const scenario of this.scenarios) {
const scenarioStart = Date.now();
// Check if scenario requires real services
if (scenario.requiresReal && scenario.requiresReal.length > 0 && this.config.mode === TestMode.MOCK) {
console.log(`\n[SKIP] ${scenario.name} (requires real: ${scenario.requiresReal.join(", ")})`);
skipped++;
continue;
}
try {
console.log(`\n[TEST] Running: ${scenario.name}`);
const ctx = await createTestContext({ mode: this.config.mode });
// Log mocks in use for this test
if (ctx._usingMocks.length > 0) {
console.log(` [MOCKS: ${ctx._usingMocks.join(", ")}]`);
ctx._usingMocks.forEach(m => allMocksUsed.add(m));
}
await scenario.setup();
await scenario.execute(ctx);
await scenario.assertions(ctx);
await scenario.cleanup();
// Cleanup real connections
if (ctx.dragonfly.disconnect) {
await ctx.dragonfly.disconnect();
}
this.results.set(scenario.name, {
passed: true,
duration: Date.now() - scenarioStart,
mocks: ctx._usingMocks,
});
passed++;
console.log(`[PASS] ${scenario.name} (${Date.now() - scenarioStart}ms)`);
} catch (error: any) {
this.results.set(scenario.name, {
passed: false,
error: error.message,
duration: Date.now() - scenarioStart,
mocks: [],
});
failed++;
console.log(`[FAIL] ${scenario.name}: ${error.message}`);
try {
await scenario.cleanup();
} catch {}
}
}
return {
passed,
failed,
skipped,
duration: Date.now() - startTime,
coverage: this.scenarios.length > 0 ? (passed / (this.scenarios.length - skipped)) * 100 : 0,
mocksUsed: Array.from(allMocksUsed),
};
}
private printModeBanner(): void {
console.log("\n" + "=".repeat(60));
if (this.config.mode === TestMode.MOCK) {
console.log("⚠️ MOCK MODE ENABLED");
console.log(" Tests are running against MOCK services.");
console.log(" Results may not reflect real system behavior.");
console.log(" Remove --use-mocks to test against real services.");
} else if (this.config.mode === TestMode.HYBRID) {
console.log("⚠️ HYBRID MODE");
console.log(" Using real services where available, mocks otherwise.");
console.log(" Check individual test output for mock usage.");
} else {
console.log("✅ REAL MODE");
console.log(" Tests are running against REAL services.");
}
console.log("=".repeat(60));
}
getResults(): Map<string, { passed: boolean; error?: string; duration: number; mocks: string[] }> {
return this.results;
}
}
// =============================================================================
// Pre-built Test Scenarios
// =============================================================================
export const CommonScenarios = {
// Happy path - agent completes successfully
happyPath: (AgentClass: any): TestScenario => ({
name: "Happy Path - Successful Completion",
description: "Agent completes all phases without errors",
setup: async () => {},
execute: async (ctx) => {
// Only works with mocks - log warning if real
if (!ctx.llm.isMock()) {
console.log(" [WARN] This scenario requires mock LLM to control responses");
}
// Set up successful LLM responses
if (ctx.llm.isMock()) {
(ctx.llm as MockLLM).setResponse("plan", JSON.stringify({
title: "Test Plan",
confidence: 0.85,
steps: [{ step: 1, action: "Test action" }],
}));
}
// Create instruction packet
const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
await ctx.dragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));
},
assertions: async (ctx) => {
// Check state reached EXIT
const stateStr = await ctx.dragonfly.get(`agent:${ctx.agentId}:state`);
if (stateStr) {
const state = JSON.parse(stateStr);
expect(state.phase).toBe("EXIT");
expect(state.status).toBe("COMPLETED");
}
},
cleanup: async () => {},
}),
// Error budget exceeded
errorBudgetExceeded: (AgentClass: any): TestScenario => ({
name: "Error Budget Exceeded - Revocation",
description: "Agent is revoked when error budget is exceeded",
setup: async () => {},
execute: async (ctx) => {
if (ctx.llm.isMock()) {
(ctx.llm as MockLLM).setFailureRate(1.0); // All LLM calls fail
}
const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
packet.error_budget.max_total_errors = 2;
await ctx.dragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));
// Simulate errors
await ctx.dragonfly.hIncrBy(`agent:${ctx.agentId}:errors`, "total_errors", 3);
},
assertions: async (ctx) => {
const stateStr = await ctx.dragonfly.get(`agent:${ctx.agentId}:state`);
if (stateStr) {
const state = JSON.parse(stateStr);
expect(state.status).toBe("REVOKED");
}
},
cleanup: async () => {},
}),
// Stuck detection
stuckDetection: (): TestScenario => ({
name: "Stuck Detection - GAMMA Spawn",
description: "GAMMA is spawned when agents are stuck",
setup: async () => {},
execute: async (ctx) => {
// Set up agent state as stuck (old last_activity)
const stuckState = {
agent_id: ctx.agentId,
role: "ALPHA",
status: "WORKING",
last_activity: new Date(Date.now() - 60000).toISOString(), // 60 seconds ago
};
await ctx.dragonfly.hSet(`agents:${ctx.taskId}`, "ALPHA", JSON.stringify(stuckState));
},
assertions: async (ctx) => {
// Check that stuck would be detected
const stateStr = await ctx.dragonfly.hGet(`agents:${ctx.taskId}`, "ALPHA");
if (stateStr) {
const state = JSON.parse(stateStr);
const inactivity = (Date.now() - new Date(state.last_activity).getTime()) / 1000;
expect(inactivity).toBeGreaterThan(30);
}
},
cleanup: async () => {},
}),
// Conflict resolution
conflictResolution: (): TestScenario => ({
name: "Conflict Resolution",
description: "Multiple proposals lead to conflict detection",
setup: async () => {},
execute: async (ctx) => {
// Simulate conflicting proposals
await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_1", JSON.stringify({
author: "ALPHA",
value: { approach: "Approach A", confidence: 0.8 },
}));
await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_2", JSON.stringify({
author: "ALPHA",
value: { approach: "Approach B", confidence: 0.7 },
}));
// BETA rejects proposal_2
await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:progress`, "eval_proposal_2", JSON.stringify({
accepted: false,
score: 0.5,
}));
await ctx.dragonfly.hIncrBy(`metrics:${ctx.taskId}`, "conflicts_detected", 1);
},
assertions: async (ctx) => {
const conflicts = await ctx.dragonfly.hGet(`metrics:${ctx.taskId}`, "conflicts_detected");
expect(parseInt(conflicts || "0")).toBeGreaterThan(0);
},
cleanup: async () => {},
}),
// Real service connectivity test
realServiceConnectivity: (): TestScenario => ({
name: "Real Service Connectivity",
description: "Verify connection to real Vault and DragonflyDB",
requiresReal: ["Vault", "DragonflyDB"],
setup: async () => {},
execute: async (ctx) => {
if (ctx.vault.isMock() || ctx.dragonfly.isMock()) {
throw new Error("This test requires real services, but mocks are in use");
}
// Test DragonflyDB
const testKey = `test:connectivity:${Date.now()}`;
await ctx.dragonfly.set(testKey, "test-value");
const value = await ctx.dragonfly.get(testKey);
await ctx.dragonfly.del(testKey);
if (value !== "test-value") {
throw new Error(`DragonflyDB read/write failed: expected 'test-value', got '${value}'`);
}
},
assertions: async (ctx) => {
// If we got here, services are working
expect(ctx.vault.isMock()).toBe(false);
expect(ctx.dragonfly.isMock()).toBe(false);
},
cleanup: async () => {},
}),
};
// =============================================================================
// Example Test Suite
// =============================================================================
export function runExampleTests() {
describe("Agent Governance Tests", () => {
let ctx: TestContext;
beforeEach(async () => {
ctx = await createTestContext({ mode: TestMode.MOCK });
});
describe("MockVault", () => {
it("should store and retrieve secrets", async () => {
(ctx.vault as MockVault).setSecret("test/secret", { key: "value" });
const secret = await ctx.vault.getSecret("test/secret");
expect(secret.key).toBe("value");
});
it("should create and validate tokens", async () => {
const token = await ctx.vault.createToken("t0-observer", 60);
expect(token).toStartWith("hvs.test-");
expect(await ctx.vault.validateToken(token)).toBe(true);
});
it("should revoke tokens", async () => {
const token = await ctx.vault.createToken("t0-observer");
await ctx.vault.revokeToken(token);
expect(await ctx.vault.validateToken(token)).toBe(false);
});
});
describe("MockDragonfly", () => {
it("should handle string operations", async () => {
await ctx.dragonfly.set("key", "value");
expect(await ctx.dragonfly.get("key")).toBe("value");
});
it("should handle hash operations", async () => {
await ctx.dragonfly.hSet("hash", "field", "value");
expect(await ctx.dragonfly.hGet("hash", "field")).toBe("value");
});
it("should handle list operations", async () => {
await (ctx.dragonfly as MockDragonfly).rPush("list", "a", "b", "c");
const items = await ctx.dragonfly.lRange("list", 0, -1);
expect(items).toEqual(["a", "b", "c"]);
});
it("should handle NX option", async () => {
await ctx.dragonfly.set("existing", "first");
const result = await ctx.dragonfly.set("existing", "second", { NX: true });
expect(result).toBeNull();
expect(await ctx.dragonfly.get("existing")).toBe("first");
});
});
describe("MockLLM", () => {
it("should return default response", async () => {
const response = await ctx.llm.complete("test prompt");
expect(response).toContain("confidence");
});
it("should match patterns", async () => {
(ctx.llm as MockLLM).setResponse("terraform", '{"tool": "terraform"}');
const response = await ctx.llm.complete("Create a terraform plan");
expect(response).toContain("terraform");
});
it("should simulate failures", async () => {
(ctx.llm as MockLLM).setFailureRate(1.0);
expect(ctx.llm.complete("test")).rejects.toThrow("LLM_ERROR");
});
it("should track call count", async () => {
await ctx.llm.complete("prompt 1");
await ctx.llm.complete("prompt 2");
expect((ctx.llm as MockLLM).getCallCount()).toBe(2);
});
});
describe("Instruction Packets", () => {
it("should generate valid packets", () => {
const packet = generateInstructionPacket("task-1", "agent-1", "Test objective");
expect(packet.agent_id).toBe("agent-1");
expect(packet.task_id).toBe("task-1");
expect(packet.error_budget.max_total_errors).toBe(5);
});
});
});
}
// =============================================================================
// CLI
// =============================================================================
function parseArgs(): { mode: TestMode; validateOnly: boolean; verbose: boolean } {
const args = process.argv.slice(2);
let mode = TestMode.REAL;
let validateOnly = false;
let verbose = false;
for (const arg of args) {
if (arg === "--use-mocks") {
mode = TestMode.MOCK;
} else if (arg === "--hybrid") {
mode = TestMode.HYBRID;
} else if (arg === "--validate-only") {
validateOnly = true;
} else if (arg === "-v" || arg === "--verbose") {
verbose = true;
}
}
return { mode, validateOnly, verbose };
}
if (import.meta.main) {
const { mode, validateOnly, verbose } = parseArgs();
globalConfig.mode = mode;
globalConfig.verbose = verbose;
console.log("Agent Testing Framework");
console.log("=======================\n");
if (validateOnly) {
// Just validate services
console.log("Validating services...\n");
validateServices(globalConfig).then(report => {
console.log(`Timestamp: ${report.timestamp}`);
console.log(`Mode: ${report.mode}\n`);
for (const r of report.results) {
const icon = r.available ? "✅" : "❌";
console.log(`${icon} ${r.service}: ${r.available ? "available" : r.error} (${r.latencyMs}ms)`);
}
if (report.missingFiles.length > 0) {
console.log("\n❌ Missing files:");
report.missingFiles.forEach(f => console.log(` - ${f}`));
}
console.log(`\n${report.allServicesAvailable ? "✅ All services available" : "❌ Some services unavailable"}`);
if (!report.allServicesAvailable && mode === TestMode.REAL) {
console.log("\n⚠ Use --use-mocks to run tests with mock services");
process.exit(1);
}
});
} else {
// Run tests
const harness = new TestHarness({ mode });
// Add pre-built scenarios
harness.addScenario(CommonScenarios.happyPath(null));
harness.addScenario(CommonScenarios.errorBudgetExceeded(null));
harness.addScenario(CommonScenarios.stuckDetection());
harness.addScenario(CommonScenarios.conflictResolution());
// Add real service test if in real mode
if (mode === TestMode.REAL) {
harness.addScenario(CommonScenarios.realServiceConnectivity());
}
harness.runAll().then(metrics => {
console.log("\n" + "=".repeat(60));
console.log("TEST RESULTS");
console.log("=".repeat(60));
console.log(`Mode: ${mode.toUpperCase()}`);
console.log(`Passed: ${metrics.passed}`);
console.log(`Failed: ${metrics.failed}`);
console.log(`Skipped: ${metrics.skipped}`);
console.log(`Duration: ${metrics.duration}ms`);
console.log(`Coverage: ${metrics.coverage.toFixed(1)}%`);
if (metrics.mocksUsed.length > 0) {
console.log(`\n⚠ Mocks used: ${metrics.mocksUsed.join(", ")}`);
}
console.log("=".repeat(60));
process.exit(metrics.failed > 0 ? 1 : 0);
}).catch(error => {
console.error("\n❌ Test harness failed:", error.message);
if (mode === TestMode.REAL) {
console.log("\n💡 Tip: Use --use-mocks to run with mock services");
}
process.exit(1);
});
}
}