agent-governance/tests/real_e2e_test.py

#!/usr/bin/env python3
"""
Real End-to-End Test for Agent Governance System

Tests the full agent lifecycle using actual infrastructure:
- HashiCorp Vault (authentication, policies)
- DragonflyDB (state management)
- SQLite Ledger (audit trail)
"""

import os
import sys
import json
import sqlite3
import subprocess
import time
from datetime import datetime
from pathlib import Path

import redis
import requests

# Configuration
VAULT_ADDR = os.environ.get("VAULT_ADDR", "https://127.0.0.1:8200")
VAULT_TOKEN_FILE = "/opt/vault/init-keys.json"
REDIS_HOST = "127.0.0.1"
REDIS_PORT = 6379
REDIS_PASSWORD = "governance2026"
LEDGER_PATH = "/opt/agent-governance/ledger/governance.db"

# Test agent configuration
TEST_AGENT_ID = f"e2e-test-agent-{int(time.time())}"
TEST_TASK_ID = f"e2e-task-{int(time.time())}"


class Colors:
    GREEN = '\033[92m'
    RED = '\033[91m'
    YELLOW = '\033[93m'
    BLUE = '\033[94m'
    RESET = '\033[0m'
    BOLD = '\033[1m'


def log(msg, status="info"):
    colors = {"ok": Colors.GREEN, "fail": Colors.RED, "info": Colors.BLUE, "warn": Colors.YELLOW}
    prefix = {"ok": "✓", "fail": "✗", "info": "→", "warn": "!"}
    print(f"{colors.get(status, '')}{prefix.get(status, '')} {msg}{Colors.RESET}")


def get_vault_token():
    """Get root token from init file"""
    with open(VAULT_TOKEN_FILE) as f:
        data = json.load(f)
        return data["root_token"]


def vault_request(method, path, token, data=None):
    """Make authenticated Vault request"""
    url = f"{VAULT_ADDR}/v1/{path}"
    headers = {"X-Vault-Token": token}

    resp = requests.request(
        method, url, headers=headers, json=data,
        verify=False, timeout=10
    )
    return resp


class RealE2ETest:
    def __init__(self):
        self.vault_token = get_vault_token()
        self.redis = redis.Redis(
            host=REDIS_HOST,
            port=REDIS_PORT,
            password=REDIS_PASSWORD,
            decode_responses=True
        )
        self.agent_token = None
        self.results = []

    def run_all(self):
        """Run all tests"""
        print(f"\n{Colors.BOLD}{'='*60}")
        print("REAL END-TO-END TEST")
        print(f"{'='*60}{Colors.RESET}\n")
        print(f"Agent ID: {TEST_AGENT_ID}")
        print(f"Task ID: {TEST_TASK_ID}")
        print(f"Timestamp: {datetime.utcnow().isoformat()}\n")

        tests = [
            ("Vault Connection", self.test_vault_connection),
            ("DragonflyDB Connection", self.test_redis_connection),
            ("Ledger Connection", self.test_ledger_connection),
            ("Register Test Agent", self.test_register_agent),
            ("AppRole Authentication", self.test_approle_auth),
            ("Policy Enforcement (Allow)", self.test_policy_allow),
            ("Policy Enforcement (Deny)", self.test_policy_deny),
            ("Create Instruction Packet", self.test_create_packet),
            ("Acquire Execution Lock", self.test_acquire_lock),
            ("Update Agent State", self.test_update_state),
            ("Record Heartbeat", self.test_heartbeat),
            ("Record Action in Ledger", self.test_record_action),
            ("Track Error Budget", self.test_error_budget),
            ("Release Lock", self.test_release_lock),
            ("Verify Ledger Entry", self.test_verify_ledger),
            ("Cleanup", self.test_cleanup),
        ]

        passed = 0
        failed = 0

        for name, test_func in tests:
            try:
                test_func()
                log(name, "ok")
                self.results.append((name, True, None))
                passed += 1
            except AssertionError as e:
                log(f"{name}: {e}", "fail")
                self.results.append((name, False, str(e)))
                failed += 1
            except Exception as e:
                log(f"{name}: {type(e).__name__}: {e}", "fail")
                self.results.append((name, False, str(e)))
                failed += 1

        print(f"\n{Colors.BOLD}{'='*60}")
        print(f"RESULTS: {passed}/{passed+failed} passed")
        print(f"{'='*60}{Colors.RESET}\n")

        return failed == 0

    def test_vault_connection(self):
        """Test Vault is accessible"""
        resp = vault_request("GET", "sys/health", self.vault_token)
        assert resp.status_code == 200, f"Vault unhealthy: {resp.status_code}"
        data = resp.json()
        assert not data.get("sealed"), "Vault is sealed"

    def test_redis_connection(self):
        """Test DragonflyDB is accessible"""
        assert self.redis.ping(), "Redis ping failed"

    def test_ledger_connection(self):
        """Test SQLite ledger is accessible"""
        conn = sqlite3.connect(LEDGER_PATH)
        cursor = conn.cursor()
        cursor.execute("SELECT COUNT(*) FROM agent_actions")
        count = cursor.fetchone()[0]
        conn.close()
        assert count >= 0, "Ledger query failed"

    def test_register_agent(self):
        """Register test agent in Vault"""
        agent_data = {
            "agent_id": TEST_AGENT_ID,
            "agent_role": "operator",
            "tier": 1,
            "owner": "e2e-test",
            "version": "1.0.0",
            "allowed_side_effects": '["read_docs","generate_plan","ssh_sandbox"]',
            "forbidden_actions": '["ssh_prod","terraform_apply_prod"]',
            "status": "registered",
            "created_at": datetime.utcnow().isoformat()
        }

        resp = vault_request(
            "POST",
            f"secret/data/agents/{TEST_AGENT_ID}",
            self.vault_token,
            {"data": agent_data}
        )
        assert resp.status_code in [200, 204], f"Failed to register agent: {resp.text}"

    def test_approle_auth(self):
        """Authenticate using AppRole"""
        # Get role-id
        resp = vault_request("GET", "auth/approle/role/tier1-agent/role-id", self.vault_token)
        assert resp.status_code == 200, f"Failed to get role-id: {resp.text}"
        role_id = resp.json()["data"]["role_id"]

        # Generate secret-id
        resp = vault_request("POST", "auth/approle/role/tier1-agent/secret-id", self.vault_token)
        assert resp.status_code == 200, f"Failed to generate secret-id: {resp.text}"
        secret_id = resp.json()["data"]["secret_id"]

        # Login
        resp = requests.post(
            f"{VAULT_ADDR}/v1/auth/approle/login",
            json={"role_id": role_id, "secret_id": secret_id},
            verify=False,
            timeout=10
        )
        assert resp.status_code == 200, f"AppRole login failed: {resp.text}"

        auth_data = resp.json()["auth"]
        self.agent_token = auth_data["client_token"]

        assert "t1-operator" in auth_data["policies"], "Missing t1-operator policy"

    def test_policy_allow(self):
        """Test that allowed operations work"""
        # T1 operator should read inventory
        resp = vault_request("GET", "secret/data/inventory/proxmox", self.agent_token)
        # May be 200 (exists) or 404 (doesn't exist yet) - both are valid policy responses
        assert resp.status_code in [200, 404], f"Policy check failed: {resp.status_code}"

    def test_policy_deny(self):
        """Test that forbidden operations are denied"""
        # T1 operator should NOT access governance secrets
        resp = vault_request("GET", "secret/data/governance/policies", self.agent_token)
        assert resp.status_code == 403, f"Should be denied, got: {resp.status_code}"

    def test_create_packet(self):
        """Create instruction packet in DragonflyDB"""
        packet = {
            "agent_id": TEST_AGENT_ID,
            "task_id": TEST_TASK_ID,
            "objective": "E2E test execution",
            "deliverables": ["test_output"],
            "constraints": {
                "scope": ["sandbox"],
                "forbidden": ["prod_access"],
                "required_steps": ["preflight", "plan", "execute"]
            },
            "success_criteria": ["all_tests_pass"],
            "error_budget": {
                "max_total_errors": 3,
                "max_same_error_repeats": 2,
                "max_procedure_violations": 1
            },
            "created_at": datetime.utcnow().isoformat()
        }

        self.redis.set(
            f"agent:{TEST_AGENT_ID}:packet",
            json.dumps(packet)
        )

        # Verify
        stored = self.redis.get(f"agent:{TEST_AGENT_ID}:packet")
        assert stored is not None, "Packet not stored"
        assert json.loads(stored)["task_id"] == TEST_TASK_ID

    def test_acquire_lock(self):
        """Acquire execution lock"""
        lock_key = f"agent:{TEST_AGENT_ID}:lock"

        # Set lock with TTL
        result = self.redis.set(lock_key, TEST_AGENT_ID, ex=300, nx=True)
        assert result, "Failed to acquire lock"

        # Verify lock
        owner = self.redis.get(lock_key)
        assert owner == TEST_AGENT_ID, f"Lock owner mismatch: {owner}"

    def test_update_state(self):
        """Update agent state through lifecycle phases"""
        phases = ["BOOTSTRAP", "PREFLIGHT", "PLAN", "EXECUTE", "VERIFY"]

        for phase in phases:
            self.redis.hset(f"agent:{TEST_AGENT_ID}:state", mapping={
                "phase": phase,
                "step": "1",
                "status": "running",
                "updated_at": datetime.utcnow().isoformat()
            })
            time.sleep(0.1)  # Small delay between phases

        # Verify final state
        state = self.redis.hgetall(f"agent:{TEST_AGENT_ID}:state")
        assert state["phase"] == "VERIFY", f"Phase mismatch: {state['phase']}"

    def test_heartbeat(self):
        """Record heartbeat"""
        heartbeat_key = f"agent:{TEST_AGENT_ID}:heartbeat"

        self.redis.set(heartbeat_key, datetime.utcnow().isoformat(), ex=60)

        # Verify
        hb = self.redis.get(heartbeat_key)
        assert hb is not None, "Heartbeat not recorded"

    def test_record_action(self):
        """Record action in governance ledger"""
        conn = sqlite3.connect(LEDGER_PATH)
        cursor = conn.cursor()

        cursor.execute("""
            INSERT INTO agent_actions
            (timestamp, agent_id, agent_version, tier, action, decision,
             confidence, target, success, session_id, created_at)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            datetime.utcnow().isoformat(),
            TEST_AGENT_ID,
            "1.0.0",
            1,
            "e2e_test_action",
            "EXECUTE",
            0.95,
            "sandbox-test",
            1,
            TEST_TASK_ID,
            datetime.utcnow().isoformat()
        ))

        conn.commit()
        conn.close()

    def test_error_budget(self):
        """Test error budget tracking"""
        error_key = f"agent:{TEST_AGENT_ID}:errors"

        # Initialize
        self.redis.hset(error_key, mapping={
            "total_errors": "0",
            "same_error_count": "0",
            "procedure_violations": "0"
        })

        # Simulate an error
        self.redis.hincrby(error_key, "total_errors", 1)

        # Check budget
        errors = self.redis.hgetall(error_key)
        total = int(errors["total_errors"])

        # Budget check (max 3 for this test)
        within_budget = total < 3
        assert within_budget, f"Error budget exceeded: {total}"

    def test_release_lock(self):
        """Release execution lock"""
        lock_key = f"agent:{TEST_AGENT_ID}:lock"

        # Only release if we own it
        owner = self.redis.get(lock_key)
        if owner == TEST_AGENT_ID:
            self.redis.delete(lock_key)

        # Verify released
        assert self.redis.get(lock_key) is None, "Lock not released"

    def test_verify_ledger(self):
        """Verify action was recorded in ledger"""
        conn = sqlite3.connect(LEDGER_PATH)
        cursor = conn.cursor()

        cursor.execute("""
            SELECT action, decision, success
            FROM agent_actions
            WHERE agent_id = ? AND session_id = ?
        """, (TEST_AGENT_ID, TEST_TASK_ID))

        row = cursor.fetchone()
        conn.close()

        assert row is not None, "Action not found in ledger"
        assert row[0] == "e2e_test_action"
        assert row[1] == "EXECUTE"
        assert row[2] == 1

    def test_cleanup(self):
        """Clean up test data"""
        # Clean DragonflyDB
        keys = self.redis.keys(f"agent:{TEST_AGENT_ID}:*")
        if keys:
            self.redis.delete(*keys)

        # Revoke agent token if we have one
        if self.agent_token:
            try:
                vault_request("POST", "auth/token/revoke-self", self.agent_token)
            except:
                pass  # Token may already be invalid

        # Update state to completed
        self.redis.hset(f"agent:{TEST_AGENT_ID}:state", mapping={
            "phase": "EXIT",
            "status": "completed",
            "completed_at": datetime.utcnow().isoformat()
        })


def main():
    # Suppress SSL warnings
    import urllib3
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    test = RealE2ETest()
    success = test.run_all()

    # Print detailed results
    print("\nDetailed Results:")
    print("-" * 40)
    for name, passed, error in test.results:
        status = "PASS" if passed else "FAIL"
        print(f"  {status}: {name}")
        if error:
            print(f"         {error[:60]}")

    return 0 if success else 1


if __name__ == "__main__":
    sys.exit(main())