Phase 8 Production Hardening with complete governance infrastructure: - Vault integration with tiered policies (T0-T4) - DragonflyDB state management - SQLite audit ledger - Pipeline DSL and templates - Promotion/revocation engine - Checkpoint system for session persistence - Health manager and circuit breaker for fault tolerance - GitHub/Slack integrations - Architectural test pipeline with bug watcher, suggestion engine, council review - Multi-agent chaos testing framework Test Results: - Governance tests: 68/68 passing - E2E workflow: 16/16 passing - Phase 2 Vault: 14/14 passing - Integration tests: 27/27 passing Coverage: 57.6% average across 12 phases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
409 lines
13 KiB
Python
409 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Real End-to-End Test for Agent Governance System
|
|
|
|
Tests the full agent lifecycle using actual infrastructure:
|
|
- HashiCorp Vault (authentication, policies)
|
|
- DragonflyDB (state management)
|
|
- SQLite Ledger (audit trail)
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import sqlite3
|
|
import subprocess
|
|
import time
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import redis
|
|
import requests
|
|
|
|
# Configuration
|
|
VAULT_ADDR = os.environ.get("VAULT_ADDR", "https://127.0.0.1:8200")
|
|
VAULT_TOKEN_FILE = "/opt/vault/init-keys.json"
|
|
REDIS_HOST = "127.0.0.1"
|
|
REDIS_PORT = 6379
|
|
REDIS_PASSWORD = "governance2026"
|
|
LEDGER_PATH = "/opt/agent-governance/ledger/governance.db"
|
|
|
|
# Test agent configuration
|
|
TEST_AGENT_ID = f"e2e-test-agent-{int(time.time())}"
|
|
TEST_TASK_ID = f"e2e-task-{int(time.time())}"
|
|
|
|
|
|
class Colors:
|
|
GREEN = '\033[92m'
|
|
RED = '\033[91m'
|
|
YELLOW = '\033[93m'
|
|
BLUE = '\033[94m'
|
|
RESET = '\033[0m'
|
|
BOLD = '\033[1m'
|
|
|
|
|
|
def log(msg, status="info"):
|
|
colors = {"ok": Colors.GREEN, "fail": Colors.RED, "info": Colors.BLUE, "warn": Colors.YELLOW}
|
|
prefix = {"ok": "✓", "fail": "✗", "info": "→", "warn": "!"}
|
|
print(f"{colors.get(status, '')}{prefix.get(status, '')} {msg}{Colors.RESET}")
|
|
|
|
|
|
def get_vault_token():
|
|
"""Get root token from init file"""
|
|
with open(VAULT_TOKEN_FILE) as f:
|
|
data = json.load(f)
|
|
return data["root_token"]
|
|
|
|
|
|
def vault_request(method, path, token, data=None):
|
|
"""Make authenticated Vault request"""
|
|
url = f"{VAULT_ADDR}/v1/{path}"
|
|
headers = {"X-Vault-Token": token}
|
|
|
|
resp = requests.request(
|
|
method, url, headers=headers, json=data,
|
|
verify=False, timeout=10
|
|
)
|
|
return resp
|
|
|
|
|
|
class RealE2ETest:
|
|
def __init__(self):
|
|
self.vault_token = get_vault_token()
|
|
self.redis = redis.Redis(
|
|
host=REDIS_HOST,
|
|
port=REDIS_PORT,
|
|
password=REDIS_PASSWORD,
|
|
decode_responses=True
|
|
)
|
|
self.agent_token = None
|
|
self.results = []
|
|
|
|
def run_all(self):
|
|
"""Run all tests"""
|
|
print(f"\n{Colors.BOLD}{'='*60}")
|
|
print("REAL END-TO-END TEST")
|
|
print(f"{'='*60}{Colors.RESET}\n")
|
|
print(f"Agent ID: {TEST_AGENT_ID}")
|
|
print(f"Task ID: {TEST_TASK_ID}")
|
|
print(f"Timestamp: {datetime.utcnow().isoformat()}\n")
|
|
|
|
tests = [
|
|
("Vault Connection", self.test_vault_connection),
|
|
("DragonflyDB Connection", self.test_redis_connection),
|
|
("Ledger Connection", self.test_ledger_connection),
|
|
("Register Test Agent", self.test_register_agent),
|
|
("AppRole Authentication", self.test_approle_auth),
|
|
("Policy Enforcement (Allow)", self.test_policy_allow),
|
|
("Policy Enforcement (Deny)", self.test_policy_deny),
|
|
("Create Instruction Packet", self.test_create_packet),
|
|
("Acquire Execution Lock", self.test_acquire_lock),
|
|
("Update Agent State", self.test_update_state),
|
|
("Record Heartbeat", self.test_heartbeat),
|
|
("Record Action in Ledger", self.test_record_action),
|
|
("Track Error Budget", self.test_error_budget),
|
|
("Release Lock", self.test_release_lock),
|
|
("Verify Ledger Entry", self.test_verify_ledger),
|
|
("Cleanup", self.test_cleanup),
|
|
]
|
|
|
|
passed = 0
|
|
failed = 0
|
|
|
|
for name, test_func in tests:
|
|
try:
|
|
test_func()
|
|
log(name, "ok")
|
|
self.results.append((name, True, None))
|
|
passed += 1
|
|
except AssertionError as e:
|
|
log(f"{name}: {e}", "fail")
|
|
self.results.append((name, False, str(e)))
|
|
failed += 1
|
|
except Exception as e:
|
|
log(f"{name}: {type(e).__name__}: {e}", "fail")
|
|
self.results.append((name, False, str(e)))
|
|
failed += 1
|
|
|
|
print(f"\n{Colors.BOLD}{'='*60}")
|
|
print(f"RESULTS: {passed}/{passed+failed} passed")
|
|
print(f"{'='*60}{Colors.RESET}\n")
|
|
|
|
return failed == 0
|
|
|
|
def test_vault_connection(self):
|
|
"""Test Vault is accessible"""
|
|
resp = vault_request("GET", "sys/health", self.vault_token)
|
|
assert resp.status_code == 200, f"Vault unhealthy: {resp.status_code}"
|
|
data = resp.json()
|
|
assert not data.get("sealed"), "Vault is sealed"
|
|
|
|
def test_redis_connection(self):
|
|
"""Test DragonflyDB is accessible"""
|
|
assert self.redis.ping(), "Redis ping failed"
|
|
|
|
def test_ledger_connection(self):
|
|
"""Test SQLite ledger is accessible"""
|
|
conn = sqlite3.connect(LEDGER_PATH)
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT COUNT(*) FROM agent_actions")
|
|
count = cursor.fetchone()[0]
|
|
conn.close()
|
|
assert count >= 0, "Ledger query failed"
|
|
|
|
def test_register_agent(self):
|
|
"""Register test agent in Vault"""
|
|
agent_data = {
|
|
"agent_id": TEST_AGENT_ID,
|
|
"agent_role": "operator",
|
|
"tier": 1,
|
|
"owner": "e2e-test",
|
|
"version": "1.0.0",
|
|
"allowed_side_effects": '["read_docs","generate_plan","ssh_sandbox"]',
|
|
"forbidden_actions": '["ssh_prod","terraform_apply_prod"]',
|
|
"status": "registered",
|
|
"created_at": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
resp = vault_request(
|
|
"POST",
|
|
f"secret/data/agents/{TEST_AGENT_ID}",
|
|
self.vault_token,
|
|
{"data": agent_data}
|
|
)
|
|
assert resp.status_code in [200, 204], f"Failed to register agent: {resp.text}"
|
|
|
|
def test_approle_auth(self):
|
|
"""Authenticate using AppRole"""
|
|
# Get role-id
|
|
resp = vault_request("GET", "auth/approle/role/tier1-agent/role-id", self.vault_token)
|
|
assert resp.status_code == 200, f"Failed to get role-id: {resp.text}"
|
|
role_id = resp.json()["data"]["role_id"]
|
|
|
|
# Generate secret-id
|
|
resp = vault_request("POST", "auth/approle/role/tier1-agent/secret-id", self.vault_token)
|
|
assert resp.status_code == 200, f"Failed to generate secret-id: {resp.text}"
|
|
secret_id = resp.json()["data"]["secret_id"]
|
|
|
|
# Login
|
|
resp = requests.post(
|
|
f"{VAULT_ADDR}/v1/auth/approle/login",
|
|
json={"role_id": role_id, "secret_id": secret_id},
|
|
verify=False,
|
|
timeout=10
|
|
)
|
|
assert resp.status_code == 200, f"AppRole login failed: {resp.text}"
|
|
|
|
auth_data = resp.json()["auth"]
|
|
self.agent_token = auth_data["client_token"]
|
|
|
|
assert "t1-operator" in auth_data["policies"], "Missing t1-operator policy"
|
|
|
|
def test_policy_allow(self):
|
|
"""Test that allowed operations work"""
|
|
# T1 operator should read inventory
|
|
resp = vault_request("GET", "secret/data/inventory/proxmox", self.agent_token)
|
|
# May be 200 (exists) or 404 (doesn't exist yet) - both are valid policy responses
|
|
assert resp.status_code in [200, 404], f"Policy check failed: {resp.status_code}"
|
|
|
|
def test_policy_deny(self):
|
|
"""Test that forbidden operations are denied"""
|
|
# T1 operator should NOT access governance secrets
|
|
resp = vault_request("GET", "secret/data/governance/policies", self.agent_token)
|
|
assert resp.status_code == 403, f"Should be denied, got: {resp.status_code}"
|
|
|
|
def test_create_packet(self):
|
|
"""Create instruction packet in DragonflyDB"""
|
|
packet = {
|
|
"agent_id": TEST_AGENT_ID,
|
|
"task_id": TEST_TASK_ID,
|
|
"objective": "E2E test execution",
|
|
"deliverables": ["test_output"],
|
|
"constraints": {
|
|
"scope": ["sandbox"],
|
|
"forbidden": ["prod_access"],
|
|
"required_steps": ["preflight", "plan", "execute"]
|
|
},
|
|
"success_criteria": ["all_tests_pass"],
|
|
"error_budget": {
|
|
"max_total_errors": 3,
|
|
"max_same_error_repeats": 2,
|
|
"max_procedure_violations": 1
|
|
},
|
|
"created_at": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
self.redis.set(
|
|
f"agent:{TEST_AGENT_ID}:packet",
|
|
json.dumps(packet)
|
|
)
|
|
|
|
# Verify
|
|
stored = self.redis.get(f"agent:{TEST_AGENT_ID}:packet")
|
|
assert stored is not None, "Packet not stored"
|
|
assert json.loads(stored)["task_id"] == TEST_TASK_ID
|
|
|
|
def test_acquire_lock(self):
|
|
"""Acquire execution lock"""
|
|
lock_key = f"agent:{TEST_AGENT_ID}:lock"
|
|
|
|
# Set lock with TTL
|
|
result = self.redis.set(lock_key, TEST_AGENT_ID, ex=300, nx=True)
|
|
assert result, "Failed to acquire lock"
|
|
|
|
# Verify lock
|
|
owner = self.redis.get(lock_key)
|
|
assert owner == TEST_AGENT_ID, f"Lock owner mismatch: {owner}"
|
|
|
|
def test_update_state(self):
|
|
"""Update agent state through lifecycle phases"""
|
|
phases = ["BOOTSTRAP", "PREFLIGHT", "PLAN", "EXECUTE", "VERIFY"]
|
|
|
|
for phase in phases:
|
|
self.redis.hset(f"agent:{TEST_AGENT_ID}:state", mapping={
|
|
"phase": phase,
|
|
"step": "1",
|
|
"status": "running",
|
|
"updated_at": datetime.utcnow().isoformat()
|
|
})
|
|
time.sleep(0.1) # Small delay between phases
|
|
|
|
# Verify final state
|
|
state = self.redis.hgetall(f"agent:{TEST_AGENT_ID}:state")
|
|
assert state["phase"] == "VERIFY", f"Phase mismatch: {state['phase']}"
|
|
|
|
def test_heartbeat(self):
|
|
"""Record heartbeat"""
|
|
heartbeat_key = f"agent:{TEST_AGENT_ID}:heartbeat"
|
|
|
|
self.redis.set(heartbeat_key, datetime.utcnow().isoformat(), ex=60)
|
|
|
|
# Verify
|
|
hb = self.redis.get(heartbeat_key)
|
|
assert hb is not None, "Heartbeat not recorded"
|
|
|
|
def test_record_action(self):
|
|
"""Record action in governance ledger"""
|
|
conn = sqlite3.connect(LEDGER_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
INSERT INTO agent_actions
|
|
(timestamp, agent_id, agent_version, tier, action, decision,
|
|
confidence, target, success, session_id, created_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
datetime.utcnow().isoformat(),
|
|
TEST_AGENT_ID,
|
|
"1.0.0",
|
|
1,
|
|
"e2e_test_action",
|
|
"EXECUTE",
|
|
0.95,
|
|
"sandbox-test",
|
|
1,
|
|
TEST_TASK_ID,
|
|
datetime.utcnow().isoformat()
|
|
))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def test_error_budget(self):
|
|
"""Test error budget tracking"""
|
|
error_key = f"agent:{TEST_AGENT_ID}:errors"
|
|
|
|
# Initialize
|
|
self.redis.hset(error_key, mapping={
|
|
"total_errors": "0",
|
|
"same_error_count": "0",
|
|
"procedure_violations": "0"
|
|
})
|
|
|
|
# Simulate an error
|
|
self.redis.hincrby(error_key, "total_errors", 1)
|
|
|
|
# Check budget
|
|
errors = self.redis.hgetall(error_key)
|
|
total = int(errors["total_errors"])
|
|
|
|
# Budget check (max 3 for this test)
|
|
within_budget = total < 3
|
|
assert within_budget, f"Error budget exceeded: {total}"
|
|
|
|
def test_release_lock(self):
|
|
"""Release execution lock"""
|
|
lock_key = f"agent:{TEST_AGENT_ID}:lock"
|
|
|
|
# Only release if we own it
|
|
owner = self.redis.get(lock_key)
|
|
if owner == TEST_AGENT_ID:
|
|
self.redis.delete(lock_key)
|
|
|
|
# Verify released
|
|
assert self.redis.get(lock_key) is None, "Lock not released"
|
|
|
|
def test_verify_ledger(self):
|
|
"""Verify action was recorded in ledger"""
|
|
conn = sqlite3.connect(LEDGER_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
SELECT action, decision, success
|
|
FROM agent_actions
|
|
WHERE agent_id = ? AND session_id = ?
|
|
""", (TEST_AGENT_ID, TEST_TASK_ID))
|
|
|
|
row = cursor.fetchone()
|
|
conn.close()
|
|
|
|
assert row is not None, "Action not found in ledger"
|
|
assert row[0] == "e2e_test_action"
|
|
assert row[1] == "EXECUTE"
|
|
assert row[2] == 1
|
|
|
|
def test_cleanup(self):
|
|
"""Clean up test data"""
|
|
# Clean DragonflyDB
|
|
keys = self.redis.keys(f"agent:{TEST_AGENT_ID}:*")
|
|
if keys:
|
|
self.redis.delete(*keys)
|
|
|
|
# Revoke agent token if we have one
|
|
if self.agent_token:
|
|
try:
|
|
vault_request("POST", "auth/token/revoke-self", self.agent_token)
|
|
except:
|
|
pass # Token may already be invalid
|
|
|
|
# Update state to completed
|
|
self.redis.hset(f"agent:{TEST_AGENT_ID}:state", mapping={
|
|
"phase": "EXIT",
|
|
"status": "completed",
|
|
"completed_at": datetime.utcnow().isoformat()
|
|
})
|
|
|
|
|
|
def main():
|
|
# Suppress SSL warnings
|
|
import urllib3
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
test = RealE2ETest()
|
|
success = test.run_all()
|
|
|
|
# Print detailed results
|
|
print("\nDetailed Results:")
|
|
print("-" * 40)
|
|
for name, passed, error in test.results:
|
|
status = "PASS" if passed else "FAIL"
|
|
print(f" {status}: {name}")
|
|
if error:
|
|
print(f" {error[:60]}")
|
|
|
|
return 0 if success else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|