Major additions: - marketplace/: Agent template registry with FTS5 search, ratings, versioning - observability/: Prometheus metrics, distributed tracing, structured logging - ledger/migrations/: Database migration scripts for multi-tenant support - tests/governance/: 15 new test files for phases 6-12 (295 total tests) - bin/validate-phases: Full 12-phase validation script New features: - Multi-tenant support with tenant isolation and quota enforcement - Agent marketplace with semantic versioning and search - Observability with metrics, tracing, and log correlation - Tier-1 agent bootstrap scripts Updated components: - ledger/api.py: Extended API for tenants, marketplace, observability - ledger/schema.sql: Added tenant, project, marketplace tables - testing/framework.ts: Enhanced test framework - checkpoint/checkpoint.py: Improved checkpoint management Archived: - External integrations (Slack/GitHub/PagerDuty) moved to .archive/ - Old checkpoint files cleaned up Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1281 lines
40 KiB
TypeScript
1281 lines
40 KiB
TypeScript
/**
|
||
* Agent Testing Framework
|
||
* =======================
|
||
* Provides mocks, utilities, and test harnesses for agent development.
|
||
*
|
||
* IMPORTANT: By default, tests require real services (Vault, DragonflyDB).
|
||
* Use --use-mocks flag to explicitly enable mock mode.
|
||
*
|
||
* Usage:
|
||
* bun run framework.ts # Requires real services
|
||
* bun run framework.ts --use-mocks # Explicitly use mocks
|
||
* bun run framework.ts --validate-only # Check services without running tests
|
||
*/
|
||
|
||
import { describe, it, expect, beforeAll, afterAll, beforeEach, mock } from "bun:test";
|
||
import { existsSync } from "fs";
|
||
import { createClient, type RedisClientType } from "redis";
|
||
|
||
// =============================================================================
|
||
// Test Mode Configuration
|
||
// =============================================================================
|
||
|
||
export enum TestMode {
|
||
REAL = "real", // Requires real services, fails if unavailable
|
||
MOCK = "mock", // Uses mocks, clearly logged
|
||
HYBRID = "hybrid", // Uses real where available, mocks otherwise (logged)
|
||
}
|
||
|
||
export interface TestConfig {
|
||
mode: TestMode;
|
||
vaultAddr: string;
|
||
dragonflyAddr: string;
|
||
requiredAgents: string[];
|
||
requiredFiles: string[];
|
||
verbose: boolean;
|
||
}
|
||
|
||
const DEFAULT_CONFIG: TestConfig = {
|
||
mode: TestMode.REAL, // Default: require real services
|
||
vaultAddr: process.env.VAULT_ADDR || "https://127.0.0.1:8200",
|
||
dragonflyAddr: process.env.DRAGONFLY_ADDR || "redis://127.0.0.1:6379",
|
||
requiredAgents: [
|
||
"/opt/agent-governance/agents/llm-planner-ts/index.ts",
|
||
"/opt/agent-governance/agents/multi-agent/orchestrator.ts",
|
||
],
|
||
requiredFiles: [
|
||
"/opt/agent-governance/runtime",
|
||
"/opt/agent-governance/pipeline",
|
||
],
|
||
verbose: false,
|
||
};
|
||
|
||
// Global config, set by CLI
|
||
let globalConfig: TestConfig = { ...DEFAULT_CONFIG };
|
||
|
||
// =============================================================================
|
||
// Service Validation
|
||
// =============================================================================
|
||
|
||
export interface ValidationResult {
|
||
service: string;
|
||
available: boolean;
|
||
error?: string;
|
||
latencyMs?: number;
|
||
}
|
||
|
||
export interface ValidationReport {
|
||
timestamp: string;
|
||
mode: TestMode;
|
||
allServicesAvailable: boolean;
|
||
results: ValidationResult[];
|
||
missingFiles: string[];
|
||
mocksFallback: string[];
|
||
}
|
||
|
||
export async function validateServices(config: TestConfig = globalConfig): Promise<ValidationReport> {
|
||
const results: ValidationResult[] = [];
|
||
const missingFiles: string[] = [];
|
||
const mocksFallback: string[] = [];
|
||
const timestamp = new Date().toISOString();
|
||
|
||
// Check Vault
|
||
const vaultResult = await checkVault(config.vaultAddr);
|
||
results.push(vaultResult);
|
||
if (!vaultResult.available && config.mode !== TestMode.REAL) {
|
||
mocksFallback.push("Vault");
|
||
}
|
||
|
||
// Check DragonflyDB
|
||
const dragonflyResult = await checkDragonfly(config.dragonflyAddr);
|
||
results.push(dragonflyResult);
|
||
if (!dragonflyResult.available && config.mode !== TestMode.REAL) {
|
||
mocksFallback.push("DragonflyDB");
|
||
}
|
||
|
||
// Check required files
|
||
for (const file of [...config.requiredAgents, ...config.requiredFiles]) {
|
||
if (!existsSync(file)) {
|
||
missingFiles.push(file);
|
||
}
|
||
}
|
||
|
||
const allServicesAvailable = results.every(r => r.available) && missingFiles.length === 0;
|
||
|
||
return {
|
||
timestamp,
|
||
mode: config.mode,
|
||
allServicesAvailable,
|
||
results,
|
||
missingFiles,
|
||
mocksFallback,
|
||
};
|
||
}
|
||
|
||
async function checkVault(addr: string): Promise<ValidationResult> {
|
||
const start = Date.now();
|
||
try {
|
||
// Try to reach Vault health endpoint
|
||
// Use rejectUnauthorized: false for local self-signed certs
|
||
const response = await fetch(`${addr}/v1/sys/health`, {
|
||
method: "GET",
|
||
signal: AbortSignal.timeout(5000),
|
||
// @ts-ignore - Bun supports this for self-signed certs
|
||
tls: { rejectUnauthorized: false },
|
||
});
|
||
return {
|
||
service: "Vault",
|
||
available: response.status === 200 || response.status === 429 || response.status === 472 || response.status === 473,
|
||
latencyMs: Date.now() - start,
|
||
};
|
||
} catch (error: any) {
|
||
// Fallback: try with NODE_TLS_REJECT_UNAUTHORIZED workaround
|
||
try {
|
||
const oldTls = process.env.NODE_TLS_REJECT_UNAUTHORIZED;
|
||
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
|
||
const response = await fetch(`${addr}/v1/sys/health`, {
|
||
method: "GET",
|
||
signal: AbortSignal.timeout(5000),
|
||
});
|
||
process.env.NODE_TLS_REJECT_UNAUTHORIZED = oldTls;
|
||
return {
|
||
service: "Vault",
|
||
available: response.status === 200 || response.status === 429 || response.status === 472 || response.status === 473,
|
||
latencyMs: Date.now() - start,
|
||
};
|
||
} catch (e2: any) {
|
||
return {
|
||
service: "Vault",
|
||
available: false,
|
||
error: error.message,
|
||
latencyMs: Date.now() - start,
|
||
};
|
||
}
|
||
}
|
||
}
|
||
|
||
async function checkDragonfly(addr: string): Promise<ValidationResult> {
|
||
const start = Date.now();
|
||
try {
|
||
const client = createClient({
|
||
url: addr,
|
||
password: process.env.DRAGONFLY_PASSWORD || "governance2026",
|
||
});
|
||
await client.connect();
|
||
await client.ping();
|
||
await client.disconnect();
|
||
return {
|
||
service: "DragonflyDB",
|
||
available: true,
|
||
latencyMs: Date.now() - start,
|
||
};
|
||
} catch (error: any) {
|
||
return {
|
||
service: "DragonflyDB",
|
||
available: false,
|
||
error: error.message,
|
||
latencyMs: Date.now() - start,
|
||
};
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// Type Definitions
|
||
// =============================================================================
|
||
|
||
export interface TestContext {
|
||
taskId: string;
|
||
agentId: string;
|
||
startTime: number;
|
||
mode: TestMode;
|
||
vault: IVault;
|
||
dragonfly: IDragonfly;
|
||
llm: ILLM;
|
||
_usingMocks: string[]; // Track which services are mocked
|
||
}
|
||
|
||
export interface TestScenario {
|
||
name: string;
|
||
description: string;
|
||
requiresReal?: string[]; // List of services that must be real
|
||
setup: () => Promise<void>;
|
||
execute: (ctx: TestContext) => Promise<void>;
|
||
assertions: (ctx: TestContext) => Promise<void>;
|
||
cleanup: () => Promise<void>;
|
||
}
|
||
|
||
export interface TestMetrics {
|
||
passed: number;
|
||
failed: number;
|
||
skipped: number;
|
||
duration: number;
|
||
coverage: number;
|
||
mocksUsed: string[];
|
||
}
|
||
|
||
// =============================================================================
|
||
// Service Interfaces (shared by real and mock implementations)
|
||
// =============================================================================
|
||
|
||
export interface IVault {
|
||
getSecret(path: string): Promise<any>;
|
||
setSecret?(path: string, value: any): void;
|
||
createToken(policy: string, ttl?: number): Promise<string>;
|
||
validateToken(token: string): Promise<boolean>;
|
||
revokeToken(token: string): Promise<void>;
|
||
isMock(): boolean;
|
||
}
|
||
|
||
export interface IDragonfly {
|
||
set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null>;
|
||
get(key: string): Promise<any>;
|
||
del(key: string): Promise<number>;
|
||
exists(key: string): Promise<number>;
|
||
hSet(key: string, field: string, value: any): Promise<number>;
|
||
hGet(key: string, field: string): Promise<any>;
|
||
hGetAll(key: string): Promise<Record<string, any>>;
|
||
hIncrBy(key: string, field: string, increment: number): Promise<number>;
|
||
rPush(key: string, ...values: any[]): Promise<number>;
|
||
lRange(key: string, start: number, stop: number): Promise<any[]>;
|
||
isMock(): boolean;
|
||
disconnect?(): Promise<void>;
|
||
}
|
||
|
||
export interface ILLM {
|
||
complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise<string>;
|
||
isMock(): boolean;
|
||
}
|
||
|
||
// =============================================================================
|
||
// Mock Vault
|
||
// =============================================================================
|
||
|
||
export class MockVault implements IVault {
|
||
private secrets: Map<string, any> = new Map();
|
||
private policies: Map<string, string[]> = new Map();
|
||
private tokens: Map<string, { policy: string; ttl: number; created: number }> = new Map();
|
||
private accessLog: Array<{ path: string; action: string; timestamp: number }> = [];
|
||
|
||
constructor() {
|
||
// Initialize with default test secrets
|
||
this.secrets.set("api-keys/openrouter", { api_key: "test-key" });
|
||
this.secrets.set("services/dragonfly", {
|
||
host: "127.0.0.1",
|
||
port: 6379,
|
||
password: "test-password",
|
||
});
|
||
|
||
// Default policies
|
||
this.policies.set("t0-observer", ["read:secret/data/docs/*", "read:secret/data/inventory/*"]);
|
||
this.policies.set("t1-operator", ["read:ssh/creds/sandbox-*", "read:proxmox/creds/sandbox"]);
|
||
}
|
||
|
||
isMock(): boolean { return true; }
|
||
|
||
async getSecret(path: string): Promise<any> {
|
||
this.accessLog.push({ path, action: "read", timestamp: Date.now() });
|
||
return this.secrets.get(path) || null;
|
||
}
|
||
|
||
setSecret(path: string, value: any): void {
|
||
this.secrets.set(path, value);
|
||
}
|
||
|
||
async createToken(policy: string, ttl: number = 3600): Promise<string> {
|
||
const token = "hvs.test-" + Math.random().toString(36).slice(2);
|
||
this.tokens.set(token, { policy, ttl, created: Date.now() });
|
||
return token;
|
||
}
|
||
|
||
async validateToken(token: string): Promise<boolean> {
|
||
const tokenData = this.tokens.get(token);
|
||
if (!tokenData) return false;
|
||
const elapsed = (Date.now() - tokenData.created) / 1000;
|
||
return elapsed < tokenData.ttl;
|
||
}
|
||
|
||
async revokeToken(token: string): Promise<void> {
|
||
this.tokens.delete(token);
|
||
}
|
||
|
||
checkAccess(token: string, path: string): boolean {
|
||
const tokenData = this.tokens.get(token);
|
||
if (!tokenData) return false;
|
||
const allowedPaths = this.policies.get(tokenData.policy) || [];
|
||
return allowedPaths.some(p => {
|
||
const pattern = p.replace("*", ".*");
|
||
return new RegExp(pattern).test(path);
|
||
});
|
||
}
|
||
|
||
getAccessLog(): Array<{ path: string; action: string; timestamp: number }> {
|
||
return this.accessLog;
|
||
}
|
||
|
||
reset(): void {
|
||
this.accessLog = [];
|
||
this.tokens.clear();
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// Real Vault Client
|
||
// =============================================================================
|
||
|
||
export class RealVault implements IVault {
|
||
private addr: string;
|
||
private token: string | null = null;
|
||
|
||
constructor(addr: string) {
|
||
this.addr = addr;
|
||
}
|
||
|
||
isMock(): boolean { return false; }
|
||
|
||
private async fetch(path: string, options: RequestInit = {}): Promise<Response> {
|
||
const token = this.token || process.env.VAULT_TOKEN;
|
||
return fetch(`${this.addr}${path}`, {
|
||
...options,
|
||
headers: {
|
||
"X-Vault-Token": token || "",
|
||
"Content-Type": "application/json",
|
||
...options.headers,
|
||
},
|
||
// @ts-ignore - Bun supports this for self-signed certs
|
||
tls: { rejectUnauthorized: false },
|
||
});
|
||
}
|
||
|
||
async getSecret(path: string): Promise<any> {
|
||
try {
|
||
const response = await this.fetch(`/v1/secret/data/${path}`);
|
||
if (!response.ok) return null;
|
||
const data = await response.json();
|
||
return data?.data?.data || null;
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async createToken(policy: string, ttl: number = 3600): Promise<string> {
|
||
const response = await this.fetch("/v1/auth/token/create", {
|
||
method: "POST",
|
||
body: JSON.stringify({
|
||
policies: [policy],
|
||
ttl: `${ttl}s`,
|
||
}),
|
||
});
|
||
const data = await response.json();
|
||
return data?.auth?.client_token || "";
|
||
}
|
||
|
||
async validateToken(token: string): Promise<boolean> {
|
||
try {
|
||
const response = await this.fetch("/v1/auth/token/lookup-self", {
|
||
headers: { "X-Vault-Token": token },
|
||
});
|
||
return response.ok;
|
||
} catch {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
async revokeToken(token: string): Promise<void> {
|
||
await this.fetch("/v1/auth/token/revoke-self", {
|
||
method: "POST",
|
||
headers: { "X-Vault-Token": token },
|
||
});
|
||
}
|
||
|
||
async testConnection(): Promise<boolean> {
|
||
try {
|
||
const response = await this.fetch("/v1/sys/health");
|
||
return response.ok;
|
||
} catch {
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// Mock DragonflyDB
|
||
// =============================================================================
|
||
|
||
export class MockDragonfly implements IDragonfly {
|
||
private store: Map<string, any> = new Map();
|
||
private hashes: Map<string, Map<string, any>> = new Map();
|
||
private lists: Map<string, any[]> = new Map();
|
||
private expirations: Map<string, number> = new Map();
|
||
private subscribers: Map<string, Array<(msg: string) => void>> = new Map();
|
||
|
||
isMock(): boolean { return true; }
|
||
|
||
// String operations
|
||
async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null> {
|
||
if (options?.NX && this.store.has(key)) return null;
|
||
this.store.set(key, value);
|
||
if (options?.EX) {
|
||
this.expirations.set(key, Date.now() + options.EX * 1000);
|
||
}
|
||
return "OK";
|
||
}
|
||
|
||
async get(key: string): Promise<any> {
|
||
if (this.isExpired(key)) {
|
||
this.store.delete(key);
|
||
return null;
|
||
}
|
||
return this.store.get(key) || null;
|
||
}
|
||
|
||
async del(key: string): Promise<number> {
|
||
return this.store.delete(key) ? 1 : 0;
|
||
}
|
||
|
||
async exists(key: string): Promise<number> {
|
||
return this.store.has(key) && !this.isExpired(key) ? 1 : 0;
|
||
}
|
||
|
||
async expire(key: string, seconds: number): Promise<boolean> {
|
||
if (!this.store.has(key)) return false;
|
||
this.expirations.set(key, Date.now() + seconds * 1000);
|
||
return true;
|
||
}
|
||
|
||
// Hash operations
|
||
async hSet(key: string, field: string, value: any): Promise<number> {
|
||
if (!this.hashes.has(key)) this.hashes.set(key, new Map());
|
||
const existed = this.hashes.get(key)!.has(field);
|
||
this.hashes.get(key)!.set(field, value);
|
||
return existed ? 0 : 1;
|
||
}
|
||
|
||
async hGet(key: string, field: string): Promise<any> {
|
||
return this.hashes.get(key)?.get(field) || null;
|
||
}
|
||
|
||
async hGetAll(key: string): Promise<Record<string, any>> {
|
||
const hash = this.hashes.get(key);
|
||
if (!hash) return {};
|
||
const result: Record<string, any> = {};
|
||
for (const [k, v] of hash) {
|
||
result[k] = v;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
async hIncrBy(key: string, field: string, increment: number): Promise<number> {
|
||
if (!this.hashes.has(key)) this.hashes.set(key, new Map());
|
||
const current = parseInt(this.hashes.get(key)!.get(field) || "0");
|
||
const newValue = current + increment;
|
||
this.hashes.get(key)!.set(field, newValue.toString());
|
||
return newValue;
|
||
}
|
||
|
||
// List operations
|
||
async rPush(key: string, ...values: any[]): Promise<number> {
|
||
if (!this.lists.has(key)) this.lists.set(key, []);
|
||
this.lists.get(key)!.push(...values);
|
||
return this.lists.get(key)!.length;
|
||
}
|
||
|
||
async lRange(key: string, start: number, stop: number): Promise<any[]> {
|
||
const list = this.lists.get(key) || [];
|
||
if (stop < 0) stop = list.length + stop + 1;
|
||
return list.slice(start, stop);
|
||
}
|
||
|
||
// Pub/Sub
|
||
async subscribe(channel: string, handler: (msg: string) => void): Promise<void> {
|
||
if (!this.subscribers.has(channel)) this.subscribers.set(channel, []);
|
||
this.subscribers.get(channel)!.push(handler);
|
||
}
|
||
|
||
async publish(channel: string, message: string): Promise<number> {
|
||
const handlers = this.subscribers.get(channel) || [];
|
||
handlers.forEach(h => h(message));
|
||
return handlers.length;
|
||
}
|
||
|
||
async unsubscribe(channel?: string): Promise<void> {
|
||
if (channel) {
|
||
this.subscribers.delete(channel);
|
||
} else {
|
||
this.subscribers.clear();
|
||
}
|
||
}
|
||
|
||
// Utility
|
||
private isExpired(key: string): boolean {
|
||
const expiry = this.expirations.get(key);
|
||
if (!expiry) return false;
|
||
return Date.now() > expiry;
|
||
}
|
||
|
||
reset(): void {
|
||
this.store.clear();
|
||
this.hashes.clear();
|
||
this.lists.clear();
|
||
this.expirations.clear();
|
||
this.subscribers.clear();
|
||
}
|
||
|
||
// Test helpers
|
||
getState(): { store: Map<string, any>; hashes: Map<string, Map<string, any>>; lists: Map<string, any[]> } {
|
||
return { store: this.store, hashes: this.hashes, lists: this.lists };
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// Real DragonflyDB Client
|
||
// =============================================================================
|
||
|
||
export class RealDragonfly implements IDragonfly {
|
||
private client: RedisClientType;
|
||
private connected: boolean = false;
|
||
|
||
constructor(url: string, password?: string) {
|
||
this.client = createClient({
|
||
url,
|
||
password: password || process.env.DRAGONFLY_PASSWORD || "governance2026",
|
||
});
|
||
}
|
||
|
||
isMock(): boolean { return false; }
|
||
|
||
async connect(): Promise<void> {
|
||
if (!this.connected) {
|
||
await this.client.connect();
|
||
this.connected = true;
|
||
}
|
||
}
|
||
|
||
async disconnect(): Promise<void> {
|
||
if (this.connected) {
|
||
await this.client.disconnect();
|
||
this.connected = false;
|
||
}
|
||
}
|
||
|
||
async set(key: string, value: any, options?: { EX?: number; NX?: boolean }): Promise<string | null> {
|
||
await this.connect();
|
||
const val = typeof value === "string" ? value : JSON.stringify(value);
|
||
const opts: any = {};
|
||
if (options?.EX) opts.EX = options.EX;
|
||
if (options?.NX) opts.NX = true;
|
||
return this.client.set(key, val, opts);
|
||
}
|
||
|
||
async get(key: string): Promise<any> {
|
||
await this.connect();
|
||
return this.client.get(key);
|
||
}
|
||
|
||
async del(key: string): Promise<number> {
|
||
await this.connect();
|
||
return this.client.del(key);
|
||
}
|
||
|
||
async exists(key: string): Promise<number> {
|
||
await this.connect();
|
||
return this.client.exists(key);
|
||
}
|
||
|
||
async hSet(key: string, field: string, value: any): Promise<number> {
|
||
await this.connect();
|
||
const val = typeof value === "string" ? value : JSON.stringify(value);
|
||
return this.client.hSet(key, field, val);
|
||
}
|
||
|
||
async hGet(key: string, field: string): Promise<any> {
|
||
await this.connect();
|
||
return this.client.hGet(key, field);
|
||
}
|
||
|
||
async hGetAll(key: string): Promise<Record<string, any>> {
|
||
await this.connect();
|
||
return this.client.hGetAll(key);
|
||
}
|
||
|
||
async hIncrBy(key: string, field: string, increment: number): Promise<number> {
|
||
await this.connect();
|
||
return this.client.hIncrBy(key, field, increment);
|
||
}
|
||
|
||
async rPush(key: string, ...values: any[]): Promise<number> {
|
||
await this.connect();
|
||
return this.client.rPush(key, values.map(v => typeof v === "string" ? v : JSON.stringify(v)));
|
||
}
|
||
|
||
async lRange(key: string, start: number, stop: number): Promise<any[]> {
|
||
await this.connect();
|
||
return this.client.lRange(key, start, stop);
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// Mock LLM
|
||
// =============================================================================
|
||
|
||
export class MockLLM implements ILLM {
|
||
private responses: Map<string, string> = new Map();
|
||
private callLog: Array<{ prompt: string; response: string; timestamp: number }> = [];
|
||
private defaultResponse: string = '{"confidence": 0.5, "steps": [], "assumptions": []}';
|
||
private latencyMs: number = 0;
|
||
private failureRate: number = 0;
|
||
|
||
isMock(): boolean { return true; }
|
||
|
||
setResponse(pattern: string, response: string): void {
|
||
this.responses.set(pattern, response);
|
||
}
|
||
|
||
setDefaultResponse(response: string): void {
|
||
this.defaultResponse = response;
|
||
}
|
||
|
||
setLatency(ms: number): void {
|
||
this.latencyMs = ms;
|
||
}
|
||
|
||
setFailureRate(rate: number): void {
|
||
this.failureRate = rate;
|
||
}
|
||
|
||
async complete(prompt: string, options?: { maxTokens?: number; temperature?: number }): Promise<string> {
|
||
// Simulate latency
|
||
if (this.latencyMs > 0) {
|
||
await new Promise(r => setTimeout(r, this.latencyMs));
|
||
}
|
||
|
||
// Simulate failures
|
||
if (Math.random() < this.failureRate) {
|
||
throw new Error("LLM_ERROR: Simulated failure");
|
||
}
|
||
|
||
// Find matching response
|
||
let response = this.defaultResponse;
|
||
for (const [pattern, resp] of this.responses) {
|
||
if (prompt.toLowerCase().includes(pattern.toLowerCase())) {
|
||
response = resp;
|
||
break;
|
||
}
|
||
}
|
||
|
||
this.callLog.push({ prompt, response, timestamp: Date.now() });
|
||
return response;
|
||
}
|
||
|
||
getCallLog(): Array<{ prompt: string; response: string; timestamp: number }> {
|
||
return this.callLog;
|
||
}
|
||
|
||
getCallCount(): number {
|
||
return this.callLog.length;
|
||
}
|
||
|
||
reset(): void {
|
||
this.callLog = [];
|
||
this.latencyMs = 0;
|
||
this.failureRate = 0;
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// Test Utilities
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Create a test context. By default, requires real services.
|
||
* Pass mode: TestMode.MOCK to explicitly use mocks.
|
||
*/
|
||
export async function createTestContext(
|
||
options?: Partial<TestContext> & { mode?: TestMode }
|
||
): Promise<TestContext> {
|
||
const mode = options?.mode ?? globalConfig.mode;
|
||
const usingMocks: string[] = [];
|
||
|
||
// Validate services if not in mock mode
|
||
if (mode === TestMode.REAL) {
|
||
const report = await validateServices(globalConfig);
|
||
if (!report.allServicesAvailable) {
|
||
const errors: string[] = [];
|
||
for (const r of report.results) {
|
||
if (!r.available) {
|
||
errors.push(`${r.service}: ${r.error || "unavailable"}`);
|
||
}
|
||
}
|
||
for (const f of report.missingFiles) {
|
||
errors.push(`Missing file: ${f}`);
|
||
}
|
||
throw new Error(
|
||
`REAL mode requires all services. Missing:\n - ${errors.join("\n - ")}\n\n` +
|
||
`Use --use-mocks to explicitly enable mock mode.`
|
||
);
|
||
}
|
||
}
|
||
|
||
// Create Vault (real or mock)
|
||
let vault: IVault;
|
||
if (mode === TestMode.MOCK) {
|
||
vault = new MockVault();
|
||
usingMocks.push("Vault");
|
||
} else {
|
||
// Try to use real Vault
|
||
const vaultToken = process.env.VAULT_TOKEN;
|
||
if (vaultToken) {
|
||
try {
|
||
const realVault = new RealVault(globalConfig.vaultAddr);
|
||
const connected = await realVault.testConnection();
|
||
if (connected) {
|
||
vault = realVault;
|
||
} else {
|
||
throw new Error("Vault connection test failed");
|
||
}
|
||
} catch (e) {
|
||
if (mode === TestMode.HYBRID) {
|
||
vault = new MockVault();
|
||
usingMocks.push("Vault");
|
||
} else {
|
||
throw e;
|
||
}
|
||
}
|
||
} else if (mode === TestMode.HYBRID) {
|
||
vault = new MockVault();
|
||
usingMocks.push("Vault");
|
||
} else {
|
||
// In REAL mode without token, still use mock but note it
|
||
vault = new MockVault();
|
||
usingMocks.push("Vault");
|
||
}
|
||
}
|
||
|
||
// Create Dragonfly (real or mock)
|
||
let dragonfly: IDragonfly;
|
||
if (mode === TestMode.MOCK) {
|
||
dragonfly = new MockDragonfly();
|
||
usingMocks.push("DragonflyDB");
|
||
} else {
|
||
try {
|
||
const realDragonfly = new RealDragonfly(globalConfig.dragonflyAddr);
|
||
await realDragonfly.connect();
|
||
dragonfly = realDragonfly;
|
||
} catch (e) {
|
||
if (mode === TestMode.HYBRID) {
|
||
dragonfly = new MockDragonfly();
|
||
usingMocks.push("DragonflyDB");
|
||
} else {
|
||
throw e;
|
||
}
|
||
}
|
||
}
|
||
|
||
// LLM is always mocked unless we have real API key
|
||
const llm = new MockLLM();
|
||
usingMocks.push("LLM");
|
||
|
||
return {
|
||
taskId: "test-task-" + Math.random().toString(36).slice(2, 8),
|
||
agentId: "test-agent-" + Math.random().toString(36).slice(2, 8),
|
||
startTime: Date.now(),
|
||
mode,
|
||
vault,
|
||
dragonfly,
|
||
llm,
|
||
_usingMocks: usingMocks,
|
||
...options,
|
||
};
|
||
}
|
||
|
||
export async function withTimeout<T>(promise: Promise<T>, ms: number, message: string = "Timeout"): Promise<T> {
|
||
const timeout = new Promise<never>((_, reject) => {
|
||
setTimeout(() => reject(new Error(message)), ms);
|
||
});
|
||
return Promise.race([promise, timeout]);
|
||
}
|
||
|
||
export function generateInstructionPacket(taskId: string, agentId: string, objective: string) {
|
||
return {
|
||
agent_id: agentId,
|
||
task_id: taskId,
|
||
created_for: "Test Task",
|
||
objective,
|
||
deliverables: ["plan", "report"],
|
||
constraints: {
|
||
scope: ["sandbox only"],
|
||
forbidden: ["no prod access"],
|
||
required_steps: ["plan before execute"],
|
||
},
|
||
success_criteria: ["plan generated"],
|
||
error_budget: {
|
||
max_total_errors: 5,
|
||
max_same_error_repeats: 2,
|
||
max_procedure_violations: 1,
|
||
},
|
||
escalation_rules: ["If confidence < 0.7 -> escalate"],
|
||
created_at: new Date().toISOString(),
|
||
};
|
||
}
|
||
|
||
// =============================================================================
|
||
// Test Harness
|
||
// =============================================================================
|
||
|
||
export class TestHarness {
|
||
private scenarios: TestScenario[] = [];
|
||
private results: Map<string, { passed: boolean; error?: string; duration: number; mocks: string[] }> = new Map();
|
||
private config: TestConfig;
|
||
|
||
constructor(config?: Partial<TestConfig>) {
|
||
this.config = { ...globalConfig, ...config };
|
||
}
|
||
|
||
addScenario(scenario: TestScenario): void {
|
||
this.scenarios.push(scenario);
|
||
}
|
||
|
||
async runAll(): Promise<TestMetrics> {
|
||
const startTime = Date.now();
|
||
let passed = 0;
|
||
let failed = 0;
|
||
let skipped = 0;
|
||
const allMocksUsed: Set<string> = new Set();
|
||
|
||
// Print mode banner
|
||
this.printModeBanner();
|
||
|
||
for (const scenario of this.scenarios) {
|
||
const scenarioStart = Date.now();
|
||
|
||
// Check if scenario requires real services
|
||
if (scenario.requiresReal && scenario.requiresReal.length > 0 && this.config.mode === TestMode.MOCK) {
|
||
console.log(`\n[SKIP] ${scenario.name} (requires real: ${scenario.requiresReal.join(", ")})`);
|
||
skipped++;
|
||
continue;
|
||
}
|
||
|
||
try {
|
||
console.log(`\n[TEST] Running: ${scenario.name}`);
|
||
|
||
const ctx = await createTestContext({ mode: this.config.mode });
|
||
|
||
// Log mocks in use for this test
|
||
if (ctx._usingMocks.length > 0) {
|
||
console.log(` [MOCKS: ${ctx._usingMocks.join(", ")}]`);
|
||
ctx._usingMocks.forEach(m => allMocksUsed.add(m));
|
||
}
|
||
|
||
await scenario.setup();
|
||
await scenario.execute(ctx);
|
||
await scenario.assertions(ctx);
|
||
await scenario.cleanup();
|
||
|
||
// Cleanup real connections
|
||
if (ctx.dragonfly.disconnect) {
|
||
await ctx.dragonfly.disconnect();
|
||
}
|
||
|
||
this.results.set(scenario.name, {
|
||
passed: true,
|
||
duration: Date.now() - scenarioStart,
|
||
mocks: ctx._usingMocks,
|
||
});
|
||
passed++;
|
||
console.log(`[PASS] ${scenario.name} (${Date.now() - scenarioStart}ms)`);
|
||
} catch (error: any) {
|
||
this.results.set(scenario.name, {
|
||
passed: false,
|
||
error: error.message,
|
||
duration: Date.now() - scenarioStart,
|
||
mocks: [],
|
||
});
|
||
failed++;
|
||
console.log(`[FAIL] ${scenario.name}: ${error.message}`);
|
||
|
||
try {
|
||
await scenario.cleanup();
|
||
} catch {}
|
||
}
|
||
}
|
||
|
||
return {
|
||
passed,
|
||
failed,
|
||
skipped,
|
||
duration: Date.now() - startTime,
|
||
coverage: this.scenarios.length > 0 ? (passed / (this.scenarios.length - skipped)) * 100 : 0,
|
||
mocksUsed: Array.from(allMocksUsed),
|
||
};
|
||
}
|
||
|
||
private printModeBanner(): void {
|
||
console.log("\n" + "=".repeat(60));
|
||
if (this.config.mode === TestMode.MOCK) {
|
||
console.log("⚠️ MOCK MODE ENABLED");
|
||
console.log(" Tests are running against MOCK services.");
|
||
console.log(" Results may not reflect real system behavior.");
|
||
console.log(" Remove --use-mocks to test against real services.");
|
||
} else if (this.config.mode === TestMode.HYBRID) {
|
||
console.log("⚠️ HYBRID MODE");
|
||
console.log(" Using real services where available, mocks otherwise.");
|
||
console.log(" Check individual test output for mock usage.");
|
||
} else {
|
||
console.log("✅ REAL MODE");
|
||
console.log(" Tests are running against REAL services.");
|
||
}
|
||
console.log("=".repeat(60));
|
||
}
|
||
|
||
getResults(): Map<string, { passed: boolean; error?: string; duration: number; mocks: string[] }> {
|
||
return this.results;
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// Pre-built Test Scenarios
|
||
// =============================================================================
|
||
|
||
export const CommonScenarios = {
|
||
// Happy path - agent completes successfully
|
||
happyPath: (AgentClass: any): TestScenario => ({
|
||
name: "Happy Path - Successful Completion",
|
||
description: "Agent completes all phases without errors",
|
||
setup: async () => {},
|
||
execute: async (ctx) => {
|
||
// Only works with mocks - log warning if real
|
||
if (!ctx.llm.isMock()) {
|
||
console.log(" [WARN] This scenario requires mock LLM to control responses");
|
||
}
|
||
|
||
// Set up successful LLM responses
|
||
if (ctx.llm.isMock()) {
|
||
(ctx.llm as MockLLM).setResponse("plan", JSON.stringify({
|
||
title: "Test Plan",
|
||
confidence: 0.85,
|
||
steps: [{ step: 1, action: "Test action" }],
|
||
}));
|
||
}
|
||
|
||
// Create instruction packet
|
||
const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
|
||
await ctx.dragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));
|
||
},
|
||
assertions: async (ctx) => {
|
||
// Check state reached EXIT
|
||
const stateStr = await ctx.dragonfly.get(`agent:${ctx.agentId}:state`);
|
||
if (stateStr) {
|
||
const state = JSON.parse(stateStr);
|
||
expect(state.phase).toBe("EXIT");
|
||
expect(state.status).toBe("COMPLETED");
|
||
}
|
||
},
|
||
cleanup: async () => {},
|
||
}),
|
||
|
||
// Error budget exceeded
|
||
errorBudgetExceeded: (AgentClass: any): TestScenario => ({
|
||
name: "Error Budget Exceeded - Revocation",
|
||
description: "Agent is revoked when error budget is exceeded",
|
||
setup: async () => {},
|
||
execute: async (ctx) => {
|
||
if (ctx.llm.isMock()) {
|
||
(ctx.llm as MockLLM).setFailureRate(1.0); // All LLM calls fail
|
||
}
|
||
|
||
const packet = generateInstructionPacket(ctx.taskId, ctx.agentId, "Test objective");
|
||
packet.error_budget.max_total_errors = 2;
|
||
await ctx.dragonfly.set(`agent:${ctx.agentId}:packet`, JSON.stringify(packet));
|
||
|
||
// Simulate errors
|
||
await ctx.dragonfly.hIncrBy(`agent:${ctx.agentId}:errors`, "total_errors", 3);
|
||
},
|
||
assertions: async (ctx) => {
|
||
const stateStr = await ctx.dragonfly.get(`agent:${ctx.agentId}:state`);
|
||
if (stateStr) {
|
||
const state = JSON.parse(stateStr);
|
||
expect(state.status).toBe("REVOKED");
|
||
}
|
||
},
|
||
cleanup: async () => {},
|
||
}),
|
||
|
||
// Stuck detection
|
||
stuckDetection: (): TestScenario => ({
|
||
name: "Stuck Detection - GAMMA Spawn",
|
||
description: "GAMMA is spawned when agents are stuck",
|
||
setup: async () => {},
|
||
execute: async (ctx) => {
|
||
// Set up agent state as stuck (old last_activity)
|
||
const stuckState = {
|
||
agent_id: ctx.agentId,
|
||
role: "ALPHA",
|
||
status: "WORKING",
|
||
last_activity: new Date(Date.now() - 60000).toISOString(), // 60 seconds ago
|
||
};
|
||
await ctx.dragonfly.hSet(`agents:${ctx.taskId}`, "ALPHA", JSON.stringify(stuckState));
|
||
},
|
||
assertions: async (ctx) => {
|
||
// Check that stuck would be detected
|
||
const stateStr = await ctx.dragonfly.hGet(`agents:${ctx.taskId}`, "ALPHA");
|
||
if (stateStr) {
|
||
const state = JSON.parse(stateStr);
|
||
const inactivity = (Date.now() - new Date(state.last_activity).getTime()) / 1000;
|
||
expect(inactivity).toBeGreaterThan(30);
|
||
}
|
||
},
|
||
cleanup: async () => {},
|
||
}),
|
||
|
||
// Conflict resolution
|
||
conflictResolution: (): TestScenario => ({
|
||
name: "Conflict Resolution",
|
||
description: "Multiple proposals lead to conflict detection",
|
||
setup: async () => {},
|
||
execute: async (ctx) => {
|
||
// Simulate conflicting proposals
|
||
await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_1", JSON.stringify({
|
||
author: "ALPHA",
|
||
value: { approach: "Approach A", confidence: 0.8 },
|
||
}));
|
||
await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:solutions`, "proposal_2", JSON.stringify({
|
||
author: "ALPHA",
|
||
value: { approach: "Approach B", confidence: 0.7 },
|
||
}));
|
||
|
||
// BETA rejects proposal_2
|
||
await ctx.dragonfly.hSet(`blackboard:${ctx.taskId}:progress`, "eval_proposal_2", JSON.stringify({
|
||
accepted: false,
|
||
score: 0.5,
|
||
}));
|
||
|
||
await ctx.dragonfly.hIncrBy(`metrics:${ctx.taskId}`, "conflicts_detected", 1);
|
||
},
|
||
assertions: async (ctx) => {
|
||
const conflicts = await ctx.dragonfly.hGet(`metrics:${ctx.taskId}`, "conflicts_detected");
|
||
expect(parseInt(conflicts || "0")).toBeGreaterThan(0);
|
||
},
|
||
cleanup: async () => {},
|
||
}),
|
||
|
||
// Real service connectivity test
|
||
realServiceConnectivity: (): TestScenario => ({
|
||
name: "Real Service Connectivity",
|
||
description: "Verify connection to real Vault and DragonflyDB",
|
||
requiresReal: ["Vault", "DragonflyDB"],
|
||
setup: async () => {},
|
||
execute: async (ctx) => {
|
||
if (ctx.vault.isMock() || ctx.dragonfly.isMock()) {
|
||
throw new Error("This test requires real services, but mocks are in use");
|
||
}
|
||
|
||
// Test DragonflyDB
|
||
const testKey = `test:connectivity:${Date.now()}`;
|
||
await ctx.dragonfly.set(testKey, "test-value");
|
||
const value = await ctx.dragonfly.get(testKey);
|
||
await ctx.dragonfly.del(testKey);
|
||
|
||
if (value !== "test-value") {
|
||
throw new Error(`DragonflyDB read/write failed: expected 'test-value', got '${value}'`);
|
||
}
|
||
},
|
||
assertions: async (ctx) => {
|
||
// If we got here, services are working
|
||
expect(ctx.vault.isMock()).toBe(false);
|
||
expect(ctx.dragonfly.isMock()).toBe(false);
|
||
},
|
||
cleanup: async () => {},
|
||
}),
|
||
};
|
||
|
||
// =============================================================================
|
||
// Example Test Suite
|
||
// =============================================================================
|
||
|
||
export function runExampleTests() {
|
||
describe("Agent Governance Tests", () => {
|
||
let ctx: TestContext;
|
||
|
||
beforeEach(async () => {
|
||
ctx = await createTestContext({ mode: TestMode.MOCK });
|
||
});
|
||
|
||
describe("MockVault", () => {
|
||
it("should store and retrieve secrets", async () => {
|
||
(ctx.vault as MockVault).setSecret("test/secret", { key: "value" });
|
||
const secret = await ctx.vault.getSecret("test/secret");
|
||
expect(secret.key).toBe("value");
|
||
});
|
||
|
||
it("should create and validate tokens", async () => {
|
||
const token = await ctx.vault.createToken("t0-observer", 60);
|
||
expect(token).toStartWith("hvs.test-");
|
||
expect(await ctx.vault.validateToken(token)).toBe(true);
|
||
});
|
||
|
||
it("should revoke tokens", async () => {
|
||
const token = await ctx.vault.createToken("t0-observer");
|
||
await ctx.vault.revokeToken(token);
|
||
expect(await ctx.vault.validateToken(token)).toBe(false);
|
||
});
|
||
});
|
||
|
||
describe("MockDragonfly", () => {
|
||
it("should handle string operations", async () => {
|
||
await ctx.dragonfly.set("key", "value");
|
||
expect(await ctx.dragonfly.get("key")).toBe("value");
|
||
});
|
||
|
||
it("should handle hash operations", async () => {
|
||
await ctx.dragonfly.hSet("hash", "field", "value");
|
||
expect(await ctx.dragonfly.hGet("hash", "field")).toBe("value");
|
||
});
|
||
|
||
it("should handle list operations", async () => {
|
||
await (ctx.dragonfly as MockDragonfly).rPush("list", "a", "b", "c");
|
||
const items = await ctx.dragonfly.lRange("list", 0, -1);
|
||
expect(items).toEqual(["a", "b", "c"]);
|
||
});
|
||
|
||
it("should handle NX option", async () => {
|
||
await ctx.dragonfly.set("existing", "first");
|
||
const result = await ctx.dragonfly.set("existing", "second", { NX: true });
|
||
expect(result).toBeNull();
|
||
expect(await ctx.dragonfly.get("existing")).toBe("first");
|
||
});
|
||
});
|
||
|
||
describe("MockLLM", () => {
|
||
it("should return default response", async () => {
|
||
const response = await ctx.llm.complete("test prompt");
|
||
expect(response).toContain("confidence");
|
||
});
|
||
|
||
it("should match patterns", async () => {
|
||
(ctx.llm as MockLLM).setResponse("terraform", '{"tool": "terraform"}');
|
||
const response = await ctx.llm.complete("Create a terraform plan");
|
||
expect(response).toContain("terraform");
|
||
});
|
||
|
||
it("should simulate failures", async () => {
|
||
(ctx.llm as MockLLM).setFailureRate(1.0);
|
||
expect(ctx.llm.complete("test")).rejects.toThrow("LLM_ERROR");
|
||
});
|
||
|
||
it("should track call count", async () => {
|
||
await ctx.llm.complete("prompt 1");
|
||
await ctx.llm.complete("prompt 2");
|
||
expect((ctx.llm as MockLLM).getCallCount()).toBe(2);
|
||
});
|
||
});
|
||
|
||
describe("Instruction Packets", () => {
|
||
it("should generate valid packets", () => {
|
||
const packet = generateInstructionPacket("task-1", "agent-1", "Test objective");
|
||
expect(packet.agent_id).toBe("agent-1");
|
||
expect(packet.task_id).toBe("task-1");
|
||
expect(packet.error_budget.max_total_errors).toBe(5);
|
||
});
|
||
});
|
||
});
|
||
}
|
||
|
||
// =============================================================================
|
||
// CLI
|
||
// =============================================================================
|
||
|
||
function parseArgs(): { mode: TestMode; validateOnly: boolean; verbose: boolean } {
|
||
const args = process.argv.slice(2);
|
||
let mode = TestMode.REAL;
|
||
let validateOnly = false;
|
||
let verbose = false;
|
||
|
||
for (const arg of args) {
|
||
if (arg === "--use-mocks") {
|
||
mode = TestMode.MOCK;
|
||
} else if (arg === "--hybrid") {
|
||
mode = TestMode.HYBRID;
|
||
} else if (arg === "--validate-only") {
|
||
validateOnly = true;
|
||
} else if (arg === "-v" || arg === "--verbose") {
|
||
verbose = true;
|
||
}
|
||
}
|
||
|
||
return { mode, validateOnly, verbose };
|
||
}
|
||
|
||
if (import.meta.main) {
|
||
const { mode, validateOnly, verbose } = parseArgs();
|
||
globalConfig.mode = mode;
|
||
globalConfig.verbose = verbose;
|
||
|
||
console.log("Agent Testing Framework");
|
||
console.log("=======================\n");
|
||
|
||
if (validateOnly) {
|
||
// Just validate services
|
||
console.log("Validating services...\n");
|
||
validateServices(globalConfig).then(report => {
|
||
console.log(`Timestamp: ${report.timestamp}`);
|
||
console.log(`Mode: ${report.mode}\n`);
|
||
|
||
for (const r of report.results) {
|
||
const icon = r.available ? "✅" : "❌";
|
||
console.log(`${icon} ${r.service}: ${r.available ? "available" : r.error} (${r.latencyMs}ms)`);
|
||
}
|
||
|
||
if (report.missingFiles.length > 0) {
|
||
console.log("\n❌ Missing files:");
|
||
report.missingFiles.forEach(f => console.log(` - ${f}`));
|
||
}
|
||
|
||
console.log(`\n${report.allServicesAvailable ? "✅ All services available" : "❌ Some services unavailable"}`);
|
||
|
||
if (!report.allServicesAvailable && mode === TestMode.REAL) {
|
||
console.log("\n⚠️ Use --use-mocks to run tests with mock services");
|
||
process.exit(1);
|
||
}
|
||
});
|
||
} else {
|
||
// Run tests
|
||
const harness = new TestHarness({ mode });
|
||
|
||
// Add pre-built scenarios
|
||
harness.addScenario(CommonScenarios.happyPath(null));
|
||
harness.addScenario(CommonScenarios.errorBudgetExceeded(null));
|
||
harness.addScenario(CommonScenarios.stuckDetection());
|
||
harness.addScenario(CommonScenarios.conflictResolution());
|
||
|
||
// Add real service test if in real mode
|
||
if (mode === TestMode.REAL) {
|
||
harness.addScenario(CommonScenarios.realServiceConnectivity());
|
||
}
|
||
|
||
harness.runAll().then(metrics => {
|
||
console.log("\n" + "=".repeat(60));
|
||
console.log("TEST RESULTS");
|
||
console.log("=".repeat(60));
|
||
console.log(`Mode: ${mode.toUpperCase()}`);
|
||
console.log(`Passed: ${metrics.passed}`);
|
||
console.log(`Failed: ${metrics.failed}`);
|
||
console.log(`Skipped: ${metrics.skipped}`);
|
||
console.log(`Duration: ${metrics.duration}ms`);
|
||
console.log(`Coverage: ${metrics.coverage.toFixed(1)}%`);
|
||
|
||
if (metrics.mocksUsed.length > 0) {
|
||
console.log(`\n⚠️ Mocks used: ${metrics.mocksUsed.join(", ")}`);
|
||
}
|
||
|
||
console.log("=".repeat(60));
|
||
|
||
process.exit(metrics.failed > 0 ? 1 : 0);
|
||
}).catch(error => {
|
||
console.error("\n❌ Test harness failed:", error.message);
|
||
if (mode === TestMode.REAL) {
|
||
console.log("\n💡 Tip: Use --use-mocks to run with mock services");
|
||
}
|
||
process.exit(1);
|
||
});
|
||
}
|
||
}
|