agent-governance/tests/real_e2e_test.py
profit 77655c298c Initial commit: Agent Governance System Phase 8
Phase 8 Production Hardening with complete governance infrastructure:

- Vault integration with tiered policies (T0-T4)
- DragonflyDB state management
- SQLite audit ledger
- Pipeline DSL and templates
- Promotion/revocation engine
- Checkpoint system for session persistence
- Health manager and circuit breaker for fault tolerance
- GitHub/Slack integrations
- Architectural test pipeline with bug watcher, suggestion engine, council review
- Multi-agent chaos testing framework

Test Results:
- Governance tests: 68/68 passing
- E2E workflow: 16/16 passing
- Phase 2 Vault: 14/14 passing
- Integration tests: 27/27 passing

Coverage: 57.6% average across 12 phases

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 22:07:06 -05:00

409 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Real End-to-End Test for Agent Governance System
Tests the full agent lifecycle using actual infrastructure:
- HashiCorp Vault (authentication, policies)
- DragonflyDB (state management)
- SQLite Ledger (audit trail)
"""
import os
import sys
import json
import sqlite3
import subprocess
import time
from datetime import datetime
from pathlib import Path
import redis
import requests
# Configuration
VAULT_ADDR = os.environ.get("VAULT_ADDR", "https://127.0.0.1:8200")
VAULT_TOKEN_FILE = "/opt/vault/init-keys.json"
REDIS_HOST = "127.0.0.1"
REDIS_PORT = 6379
REDIS_PASSWORD = "governance2026"
LEDGER_PATH = "/opt/agent-governance/ledger/governance.db"
# Test agent configuration
TEST_AGENT_ID = f"e2e-test-agent-{int(time.time())}"
TEST_TASK_ID = f"e2e-task-{int(time.time())}"
class Colors:
GREEN = '\033[92m'
RED = '\033[91m'
YELLOW = '\033[93m'
BLUE = '\033[94m'
RESET = '\033[0m'
BOLD = '\033[1m'
def log(msg, status="info"):
colors = {"ok": Colors.GREEN, "fail": Colors.RED, "info": Colors.BLUE, "warn": Colors.YELLOW}
prefix = {"ok": "", "fail": "", "info": "", "warn": "!"}
print(f"{colors.get(status, '')}{prefix.get(status, '')} {msg}{Colors.RESET}")
def get_vault_token():
"""Get root token from init file"""
with open(VAULT_TOKEN_FILE) as f:
data = json.load(f)
return data["root_token"]
def vault_request(method, path, token, data=None):
"""Make authenticated Vault request"""
url = f"{VAULT_ADDR}/v1/{path}"
headers = {"X-Vault-Token": token}
resp = requests.request(
method, url, headers=headers, json=data,
verify=False, timeout=10
)
return resp
class RealE2ETest:
def __init__(self):
self.vault_token = get_vault_token()
self.redis = redis.Redis(
host=REDIS_HOST,
port=REDIS_PORT,
password=REDIS_PASSWORD,
decode_responses=True
)
self.agent_token = None
self.results = []
def run_all(self):
"""Run all tests"""
print(f"\n{Colors.BOLD}{'='*60}")
print("REAL END-TO-END TEST")
print(f"{'='*60}{Colors.RESET}\n")
print(f"Agent ID: {TEST_AGENT_ID}")
print(f"Task ID: {TEST_TASK_ID}")
print(f"Timestamp: {datetime.utcnow().isoformat()}\n")
tests = [
("Vault Connection", self.test_vault_connection),
("DragonflyDB Connection", self.test_redis_connection),
("Ledger Connection", self.test_ledger_connection),
("Register Test Agent", self.test_register_agent),
("AppRole Authentication", self.test_approle_auth),
("Policy Enforcement (Allow)", self.test_policy_allow),
("Policy Enforcement (Deny)", self.test_policy_deny),
("Create Instruction Packet", self.test_create_packet),
("Acquire Execution Lock", self.test_acquire_lock),
("Update Agent State", self.test_update_state),
("Record Heartbeat", self.test_heartbeat),
("Record Action in Ledger", self.test_record_action),
("Track Error Budget", self.test_error_budget),
("Release Lock", self.test_release_lock),
("Verify Ledger Entry", self.test_verify_ledger),
("Cleanup", self.test_cleanup),
]
passed = 0
failed = 0
for name, test_func in tests:
try:
test_func()
log(name, "ok")
self.results.append((name, True, None))
passed += 1
except AssertionError as e:
log(f"{name}: {e}", "fail")
self.results.append((name, False, str(e)))
failed += 1
except Exception as e:
log(f"{name}: {type(e).__name__}: {e}", "fail")
self.results.append((name, False, str(e)))
failed += 1
print(f"\n{Colors.BOLD}{'='*60}")
print(f"RESULTS: {passed}/{passed+failed} passed")
print(f"{'='*60}{Colors.RESET}\n")
return failed == 0
def test_vault_connection(self):
"""Test Vault is accessible"""
resp = vault_request("GET", "sys/health", self.vault_token)
assert resp.status_code == 200, f"Vault unhealthy: {resp.status_code}"
data = resp.json()
assert not data.get("sealed"), "Vault is sealed"
def test_redis_connection(self):
"""Test DragonflyDB is accessible"""
assert self.redis.ping(), "Redis ping failed"
def test_ledger_connection(self):
"""Test SQLite ledger is accessible"""
conn = sqlite3.connect(LEDGER_PATH)
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM agent_actions")
count = cursor.fetchone()[0]
conn.close()
assert count >= 0, "Ledger query failed"
def test_register_agent(self):
"""Register test agent in Vault"""
agent_data = {
"agent_id": TEST_AGENT_ID,
"agent_role": "operator",
"tier": 1,
"owner": "e2e-test",
"version": "1.0.0",
"allowed_side_effects": '["read_docs","generate_plan","ssh_sandbox"]',
"forbidden_actions": '["ssh_prod","terraform_apply_prod"]',
"status": "registered",
"created_at": datetime.utcnow().isoformat()
}
resp = vault_request(
"POST",
f"secret/data/agents/{TEST_AGENT_ID}",
self.vault_token,
{"data": agent_data}
)
assert resp.status_code in [200, 204], f"Failed to register agent: {resp.text}"
def test_approle_auth(self):
"""Authenticate using AppRole"""
# Get role-id
resp = vault_request("GET", "auth/approle/role/tier1-agent/role-id", self.vault_token)
assert resp.status_code == 200, f"Failed to get role-id: {resp.text}"
role_id = resp.json()["data"]["role_id"]
# Generate secret-id
resp = vault_request("POST", "auth/approle/role/tier1-agent/secret-id", self.vault_token)
assert resp.status_code == 200, f"Failed to generate secret-id: {resp.text}"
secret_id = resp.json()["data"]["secret_id"]
# Login
resp = requests.post(
f"{VAULT_ADDR}/v1/auth/approle/login",
json={"role_id": role_id, "secret_id": secret_id},
verify=False,
timeout=10
)
assert resp.status_code == 200, f"AppRole login failed: {resp.text}"
auth_data = resp.json()["auth"]
self.agent_token = auth_data["client_token"]
assert "t1-operator" in auth_data["policies"], "Missing t1-operator policy"
def test_policy_allow(self):
"""Test that allowed operations work"""
# T1 operator should read inventory
resp = vault_request("GET", "secret/data/inventory/proxmox", self.agent_token)
# May be 200 (exists) or 404 (doesn't exist yet) - both are valid policy responses
assert resp.status_code in [200, 404], f"Policy check failed: {resp.status_code}"
def test_policy_deny(self):
"""Test that forbidden operations are denied"""
# T1 operator should NOT access governance secrets
resp = vault_request("GET", "secret/data/governance/policies", self.agent_token)
assert resp.status_code == 403, f"Should be denied, got: {resp.status_code}"
def test_create_packet(self):
"""Create instruction packet in DragonflyDB"""
packet = {
"agent_id": TEST_AGENT_ID,
"task_id": TEST_TASK_ID,
"objective": "E2E test execution",
"deliverables": ["test_output"],
"constraints": {
"scope": ["sandbox"],
"forbidden": ["prod_access"],
"required_steps": ["preflight", "plan", "execute"]
},
"success_criteria": ["all_tests_pass"],
"error_budget": {
"max_total_errors": 3,
"max_same_error_repeats": 2,
"max_procedure_violations": 1
},
"created_at": datetime.utcnow().isoformat()
}
self.redis.set(
f"agent:{TEST_AGENT_ID}:packet",
json.dumps(packet)
)
# Verify
stored = self.redis.get(f"agent:{TEST_AGENT_ID}:packet")
assert stored is not None, "Packet not stored"
assert json.loads(stored)["task_id"] == TEST_TASK_ID
def test_acquire_lock(self):
"""Acquire execution lock"""
lock_key = f"agent:{TEST_AGENT_ID}:lock"
# Set lock with TTL
result = self.redis.set(lock_key, TEST_AGENT_ID, ex=300, nx=True)
assert result, "Failed to acquire lock"
# Verify lock
owner = self.redis.get(lock_key)
assert owner == TEST_AGENT_ID, f"Lock owner mismatch: {owner}"
def test_update_state(self):
"""Update agent state through lifecycle phases"""
phases = ["BOOTSTRAP", "PREFLIGHT", "PLAN", "EXECUTE", "VERIFY"]
for phase in phases:
self.redis.hset(f"agent:{TEST_AGENT_ID}:state", mapping={
"phase": phase,
"step": "1",
"status": "running",
"updated_at": datetime.utcnow().isoformat()
})
time.sleep(0.1) # Small delay between phases
# Verify final state
state = self.redis.hgetall(f"agent:{TEST_AGENT_ID}:state")
assert state["phase"] == "VERIFY", f"Phase mismatch: {state['phase']}"
def test_heartbeat(self):
"""Record heartbeat"""
heartbeat_key = f"agent:{TEST_AGENT_ID}:heartbeat"
self.redis.set(heartbeat_key, datetime.utcnow().isoformat(), ex=60)
# Verify
hb = self.redis.get(heartbeat_key)
assert hb is not None, "Heartbeat not recorded"
def test_record_action(self):
"""Record action in governance ledger"""
conn = sqlite3.connect(LEDGER_PATH)
cursor = conn.cursor()
cursor.execute("""
INSERT INTO agent_actions
(timestamp, agent_id, agent_version, tier, action, decision,
confidence, target, success, session_id, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
datetime.utcnow().isoformat(),
TEST_AGENT_ID,
"1.0.0",
1,
"e2e_test_action",
"EXECUTE",
0.95,
"sandbox-test",
1,
TEST_TASK_ID,
datetime.utcnow().isoformat()
))
conn.commit()
conn.close()
def test_error_budget(self):
"""Test error budget tracking"""
error_key = f"agent:{TEST_AGENT_ID}:errors"
# Initialize
self.redis.hset(error_key, mapping={
"total_errors": "0",
"same_error_count": "0",
"procedure_violations": "0"
})
# Simulate an error
self.redis.hincrby(error_key, "total_errors", 1)
# Check budget
errors = self.redis.hgetall(error_key)
total = int(errors["total_errors"])
# Budget check (max 3 for this test)
within_budget = total < 3
assert within_budget, f"Error budget exceeded: {total}"
def test_release_lock(self):
"""Release execution lock"""
lock_key = f"agent:{TEST_AGENT_ID}:lock"
# Only release if we own it
owner = self.redis.get(lock_key)
if owner == TEST_AGENT_ID:
self.redis.delete(lock_key)
# Verify released
assert self.redis.get(lock_key) is None, "Lock not released"
def test_verify_ledger(self):
"""Verify action was recorded in ledger"""
conn = sqlite3.connect(LEDGER_PATH)
cursor = conn.cursor()
cursor.execute("""
SELECT action, decision, success
FROM agent_actions
WHERE agent_id = ? AND session_id = ?
""", (TEST_AGENT_ID, TEST_TASK_ID))
row = cursor.fetchone()
conn.close()
assert row is not None, "Action not found in ledger"
assert row[0] == "e2e_test_action"
assert row[1] == "EXECUTE"
assert row[2] == 1
def test_cleanup(self):
"""Clean up test data"""
# Clean DragonflyDB
keys = self.redis.keys(f"agent:{TEST_AGENT_ID}:*")
if keys:
self.redis.delete(*keys)
# Revoke agent token if we have one
if self.agent_token:
try:
vault_request("POST", "auth/token/revoke-self", self.agent_token)
except:
pass # Token may already be invalid
# Update state to completed
self.redis.hset(f"agent:{TEST_AGENT_ID}:state", mapping={
"phase": "EXIT",
"status": "completed",
"completed_at": datetime.utcnow().isoformat()
})
def main():
# Suppress SSL warnings
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
test = RealE2ETest()
success = test.run_all()
# Print detailed results
print("\nDetailed Results:")
print("-" * 40)
for name, passed, error in test.results:
status = "PASS" if passed else "FAIL"
print(f" {status}: {name}")
if error:
print(f" {error[:60]}")
return 0 if success else 1
if __name__ == "__main__":
sys.exit(main())