#!/usr/bin/env python3 """ Real End-to-End Test for Agent Governance System Tests the full agent lifecycle using actual infrastructure: - HashiCorp Vault (authentication, policies) - DragonflyDB (state management) - SQLite Ledger (audit trail) """ import os import sys import json import sqlite3 import subprocess import time from datetime import datetime from pathlib import Path import redis import requests # Configuration VAULT_ADDR = os.environ.get("VAULT_ADDR", "https://127.0.0.1:8200") VAULT_TOKEN_FILE = "/opt/vault/init-keys.json" REDIS_HOST = "127.0.0.1" REDIS_PORT = 6379 REDIS_PASSWORD = "governance2026" LEDGER_PATH = "/opt/agent-governance/ledger/governance.db" # Test agent configuration TEST_AGENT_ID = f"e2e-test-agent-{int(time.time())}" TEST_TASK_ID = f"e2e-task-{int(time.time())}" class Colors: GREEN = '\033[92m' RED = '\033[91m' YELLOW = '\033[93m' BLUE = '\033[94m' RESET = '\033[0m' BOLD = '\033[1m' def log(msg, status="info"): colors = {"ok": Colors.GREEN, "fail": Colors.RED, "info": Colors.BLUE, "warn": Colors.YELLOW} prefix = {"ok": "✓", "fail": "✗", "info": "→", "warn": "!"} print(f"{colors.get(status, '')}{prefix.get(status, '')} {msg}{Colors.RESET}") def get_vault_token(): """Get root token from init file""" with open(VAULT_TOKEN_FILE) as f: data = json.load(f) return data["root_token"] def vault_request(method, path, token, data=None): """Make authenticated Vault request""" url = f"{VAULT_ADDR}/v1/{path}" headers = {"X-Vault-Token": token} resp = requests.request( method, url, headers=headers, json=data, verify=False, timeout=10 ) return resp class RealE2ETest: def __init__(self): self.vault_token = get_vault_token() self.redis = redis.Redis( host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, decode_responses=True ) self.agent_token = None self.results = [] def run_all(self): """Run all tests""" print(f"\n{Colors.BOLD}{'='*60}") print("REAL END-TO-END TEST") print(f"{'='*60}{Colors.RESET}\n") print(f"Agent ID: {TEST_AGENT_ID}") print(f"Task ID: {TEST_TASK_ID}") print(f"Timestamp: {datetime.utcnow().isoformat()}\n") tests = [ ("Vault Connection", self.test_vault_connection), ("DragonflyDB Connection", self.test_redis_connection), ("Ledger Connection", self.test_ledger_connection), ("Register Test Agent", self.test_register_agent), ("AppRole Authentication", self.test_approle_auth), ("Policy Enforcement (Allow)", self.test_policy_allow), ("Policy Enforcement (Deny)", self.test_policy_deny), ("Create Instruction Packet", self.test_create_packet), ("Acquire Execution Lock", self.test_acquire_lock), ("Update Agent State", self.test_update_state), ("Record Heartbeat", self.test_heartbeat), ("Record Action in Ledger", self.test_record_action), ("Track Error Budget", self.test_error_budget), ("Release Lock", self.test_release_lock), ("Verify Ledger Entry", self.test_verify_ledger), ("Cleanup", self.test_cleanup), ] passed = 0 failed = 0 for name, test_func in tests: try: test_func() log(name, "ok") self.results.append((name, True, None)) passed += 1 except AssertionError as e: log(f"{name}: {e}", "fail") self.results.append((name, False, str(e))) failed += 1 except Exception as e: log(f"{name}: {type(e).__name__}: {e}", "fail") self.results.append((name, False, str(e))) failed += 1 print(f"\n{Colors.BOLD}{'='*60}") print(f"RESULTS: {passed}/{passed+failed} passed") print(f"{'='*60}{Colors.RESET}\n") return failed == 0 def test_vault_connection(self): """Test Vault is accessible""" resp = vault_request("GET", "sys/health", self.vault_token) assert resp.status_code == 200, f"Vault unhealthy: {resp.status_code}" data = resp.json() assert not data.get("sealed"), "Vault is sealed" def test_redis_connection(self): """Test DragonflyDB is accessible""" assert self.redis.ping(), "Redis ping failed" def test_ledger_connection(self): """Test SQLite ledger is accessible""" conn = sqlite3.connect(LEDGER_PATH) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM agent_actions") count = cursor.fetchone()[0] conn.close() assert count >= 0, "Ledger query failed" def test_register_agent(self): """Register test agent in Vault""" agent_data = { "agent_id": TEST_AGENT_ID, "agent_role": "operator", "tier": 1, "owner": "e2e-test", "version": "1.0.0", "allowed_side_effects": '["read_docs","generate_plan","ssh_sandbox"]', "forbidden_actions": '["ssh_prod","terraform_apply_prod"]', "status": "registered", "created_at": datetime.utcnow().isoformat() } resp = vault_request( "POST", f"secret/data/agents/{TEST_AGENT_ID}", self.vault_token, {"data": agent_data} ) assert resp.status_code in [200, 204], f"Failed to register agent: {resp.text}" def test_approle_auth(self): """Authenticate using AppRole""" # Get role-id resp = vault_request("GET", "auth/approle/role/tier1-agent/role-id", self.vault_token) assert resp.status_code == 200, f"Failed to get role-id: {resp.text}" role_id = resp.json()["data"]["role_id"] # Generate secret-id resp = vault_request("POST", "auth/approle/role/tier1-agent/secret-id", self.vault_token) assert resp.status_code == 200, f"Failed to generate secret-id: {resp.text}" secret_id = resp.json()["data"]["secret_id"] # Login resp = requests.post( f"{VAULT_ADDR}/v1/auth/approle/login", json={"role_id": role_id, "secret_id": secret_id}, verify=False, timeout=10 ) assert resp.status_code == 200, f"AppRole login failed: {resp.text}" auth_data = resp.json()["auth"] self.agent_token = auth_data["client_token"] assert "t1-operator" in auth_data["policies"], "Missing t1-operator policy" def test_policy_allow(self): """Test that allowed operations work""" # T1 operator should read inventory resp = vault_request("GET", "secret/data/inventory/proxmox", self.agent_token) # May be 200 (exists) or 404 (doesn't exist yet) - both are valid policy responses assert resp.status_code in [200, 404], f"Policy check failed: {resp.status_code}" def test_policy_deny(self): """Test that forbidden operations are denied""" # T1 operator should NOT access governance secrets resp = vault_request("GET", "secret/data/governance/policies", self.agent_token) assert resp.status_code == 403, f"Should be denied, got: {resp.status_code}" def test_create_packet(self): """Create instruction packet in DragonflyDB""" packet = { "agent_id": TEST_AGENT_ID, "task_id": TEST_TASK_ID, "objective": "E2E test execution", "deliverables": ["test_output"], "constraints": { "scope": ["sandbox"], "forbidden": ["prod_access"], "required_steps": ["preflight", "plan", "execute"] }, "success_criteria": ["all_tests_pass"], "error_budget": { "max_total_errors": 3, "max_same_error_repeats": 2, "max_procedure_violations": 1 }, "created_at": datetime.utcnow().isoformat() } self.redis.set( f"agent:{TEST_AGENT_ID}:packet", json.dumps(packet) ) # Verify stored = self.redis.get(f"agent:{TEST_AGENT_ID}:packet") assert stored is not None, "Packet not stored" assert json.loads(stored)["task_id"] == TEST_TASK_ID def test_acquire_lock(self): """Acquire execution lock""" lock_key = f"agent:{TEST_AGENT_ID}:lock" # Set lock with TTL result = self.redis.set(lock_key, TEST_AGENT_ID, ex=300, nx=True) assert result, "Failed to acquire lock" # Verify lock owner = self.redis.get(lock_key) assert owner == TEST_AGENT_ID, f"Lock owner mismatch: {owner}" def test_update_state(self): """Update agent state through lifecycle phases""" phases = ["BOOTSTRAP", "PREFLIGHT", "PLAN", "EXECUTE", "VERIFY"] for phase in phases: self.redis.hset(f"agent:{TEST_AGENT_ID}:state", mapping={ "phase": phase, "step": "1", "status": "running", "updated_at": datetime.utcnow().isoformat() }) time.sleep(0.1) # Small delay between phases # Verify final state state = self.redis.hgetall(f"agent:{TEST_AGENT_ID}:state") assert state["phase"] == "VERIFY", f"Phase mismatch: {state['phase']}" def test_heartbeat(self): """Record heartbeat""" heartbeat_key = f"agent:{TEST_AGENT_ID}:heartbeat" self.redis.set(heartbeat_key, datetime.utcnow().isoformat(), ex=60) # Verify hb = self.redis.get(heartbeat_key) assert hb is not None, "Heartbeat not recorded" def test_record_action(self): """Record action in governance ledger""" conn = sqlite3.connect(LEDGER_PATH) cursor = conn.cursor() cursor.execute(""" INSERT INTO agent_actions (timestamp, agent_id, agent_version, tier, action, decision, confidence, target, success, session_id, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( datetime.utcnow().isoformat(), TEST_AGENT_ID, "1.0.0", 1, "e2e_test_action", "EXECUTE", 0.95, "sandbox-test", 1, TEST_TASK_ID, datetime.utcnow().isoformat() )) conn.commit() conn.close() def test_error_budget(self): """Test error budget tracking""" error_key = f"agent:{TEST_AGENT_ID}:errors" # Initialize self.redis.hset(error_key, mapping={ "total_errors": "0", "same_error_count": "0", "procedure_violations": "0" }) # Simulate an error self.redis.hincrby(error_key, "total_errors", 1) # Check budget errors = self.redis.hgetall(error_key) total = int(errors["total_errors"]) # Budget check (max 3 for this test) within_budget = total < 3 assert within_budget, f"Error budget exceeded: {total}" def test_release_lock(self): """Release execution lock""" lock_key = f"agent:{TEST_AGENT_ID}:lock" # Only release if we own it owner = self.redis.get(lock_key) if owner == TEST_AGENT_ID: self.redis.delete(lock_key) # Verify released assert self.redis.get(lock_key) is None, "Lock not released" def test_verify_ledger(self): """Verify action was recorded in ledger""" conn = sqlite3.connect(LEDGER_PATH) cursor = conn.cursor() cursor.execute(""" SELECT action, decision, success FROM agent_actions WHERE agent_id = ? AND session_id = ? """, (TEST_AGENT_ID, TEST_TASK_ID)) row = cursor.fetchone() conn.close() assert row is not None, "Action not found in ledger" assert row[0] == "e2e_test_action" assert row[1] == "EXECUTE" assert row[2] == 1 def test_cleanup(self): """Clean up test data""" # Clean DragonflyDB keys = self.redis.keys(f"agent:{TEST_AGENT_ID}:*") if keys: self.redis.delete(*keys) # Revoke agent token if we have one if self.agent_token: try: vault_request("POST", "auth/token/revoke-self", self.agent_token) except: pass # Token may already be invalid # Update state to completed self.redis.hset(f"agent:{TEST_AGENT_ID}:state", mapping={ "phase": "EXIT", "status": "completed", "completed_at": datetime.utcnow().isoformat() }) def main(): # Suppress SSL warnings import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) test = RealE2ETest() success = test.run_all() # Print detailed results print("\nDetailed Results:") print("-" * 40) for name, passed, error in test.results: status = "PASS" if passed else "FAIL" print(f" {status}: {name}") if error: print(f" {error[:60]}") return 0 if success else 1 if __name__ == "__main__": sys.exit(main())