agent-governance/bin/validate-phases

#!/usr/bin/env python3
"""
Phase Validation Script
=======================
Validates all 12 phases of the agent governance system.

Checks:
- Database schema existence
- Required tables and columns
- Vault connectivity
- DragonflyDB readiness
- Test suite execution
- Real implementation vs mocks

Usage:
    ./bin/validate-phases [--phase N] [--verbose] [--skip-tests]
"""

import argparse
import json
import os
import re
import sqlite3
import subprocess
import sys
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Optional, Tuple

# =============================================================================
# Configuration
# =============================================================================

BASE_DIR = Path("/opt/agent-governance")
DB_PATH = BASE_DIR / "ledger" / "governance.db"
VAULT_ADDR = os.environ.get("VAULT_ADDR", "https://127.0.0.1:8200")

# =============================================================================
# Validation Result
# =============================================================================

@dataclass
class ValidationResult:
    phase: int
    name: str
    status: str  # pass, fail, warn
    checks: List[Dict] = field(default_factory=list)
    test_results: Optional[Dict] = None
    errors: List[str] = field(default_factory=list)
    warnings: List[str] = field(default_factory=list)

    def add_check(self, name: str, passed: bool, detail: str = ""):
        self.checks.append({
            "name": name,
            "passed": passed,
            "detail": detail
        })
        if not passed:
            self.errors.append(f"{name}: {detail}")

    def add_warning(self, message: str):
        self.warnings.append(message)

    @property
    def passed(self) -> bool:
        return all(c["passed"] for c in self.checks)

    def to_dict(self) -> dict:
        return {
            "phase": self.phase,
            "name": self.name,
            "status": self.status,
            "passed": self.passed,
            "checks": self.checks,
            "test_results": self.test_results,
            "errors": self.errors,
            "warnings": self.warnings
        }


# =============================================================================
# Dependency Checks
# =============================================================================

def check_vault() -> Tuple[bool, str]:
    """Check Vault connectivity"""
    try:
        result = subprocess.run(
            ["docker", "exec", "vault", "vault", "status", "-format=json"],
            capture_output=True, text=True, timeout=10
        )
        if result.returncode == 0:
            data = json.loads(result.stdout)
            if not data.get("sealed", True):
                return True, f"Vault unsealed, version {data.get('version', 'unknown')}"
            return False, "Vault is sealed"
        return False, f"Vault status failed: {result.stderr}"
    except subprocess.TimeoutExpired:
        return False, "Vault timeout"
    except Exception as e:
        return False, f"Vault error: {e}"


def check_dragonfly() -> Tuple[bool, str]:
    """Check DragonflyDB connectivity"""
    try:
        import redis
        r = redis.Redis(host='127.0.0.1', port=6379, password='governance2026', socket_timeout=5)
        info = r.info()
        return True, f"Connected, {info.get('connected_clients', 0)} clients"
    except Exception as e:
        return False, f"DragonflyDB error: {e}"


def check_database() -> Tuple[bool, str]:
    """Check SQLite database"""
    if not DB_PATH.exists():
        return False, "Database file not found"
    try:
        conn = sqlite3.connect(DB_PATH)
        cursor = conn.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table'")
        count = cursor.fetchone()[0]
        conn.close()
        return True, f"{count} tables found"
    except Exception as e:
        return False, f"Database error: {e}"


# =============================================================================
# Schema Validation
# =============================================================================

def get_tables() -> List[str]:
    """Get list of database tables"""
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
    tables = [row[0] for row in cursor.fetchall()]
    conn.close()
    return tables


def get_columns(table: str) -> List[str]:
    """Get columns for a table"""
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.execute(f"PRAGMA table_info({table})")
    columns = [row[1] for row in cursor.fetchall()]
    conn.close()
    return columns


def check_required_tables(required: List[str]) -> Tuple[bool, List[str]]:
    """Check if required tables exist"""
    existing = get_tables()
    missing = [t for t in required if t not in existing]
    return len(missing) == 0, missing


def check_required_columns(table: str, required: List[str]) -> Tuple[bool, List[str]]:
    """Check if required columns exist in table"""
    existing = get_columns(table)
    missing = [c for c in required if c not in existing]
    return len(missing) == 0, missing


# =============================================================================
# Code Analysis
# =============================================================================

def check_file_exists(path: Path) -> bool:
    """Check if file exists"""
    return path.exists()


def check_no_mocks(path: Path) -> Tuple[bool, List[str]]:
    """Check for mock/stub patterns in code"""
    if not path.exists():
        return True, []

    mock_patterns = [
        r'\breturn\s+\[\]',  # return []
        r'\breturn\s+\{\}',  # return {}
        r'\breturn\s+None\s*$',  # return None at end
        r'#\s*TODO',  # TODO comments
        r'#\s*FIXME',  # FIXME comments
        r'raise\s+NotImplementedError',
        r'pass\s*$',  # bare pass
        r'\bmock\b',  # mock keyword
        r'\bstub\b',  # stub keyword
    ]

    issues = []
    try:
        content = path.read_text()
        lines = content.split('\n')
        for i, line in enumerate(lines, 1):
            for pattern in mock_patterns:
                if re.search(pattern, line, re.IGNORECASE):
                    # Skip if in test file or comment explaining real implementation
                    if 'test' not in str(path).lower():
                        issues.append(f"Line {i}: {line.strip()[:60]}")
    except Exception as e:
        issues.append(f"Error reading file: {e}")

    return len(issues) == 0, issues


def count_real_functions(path: Path) -> int:
    """Count non-stub functions in a Python file"""
    if not path.exists():
        return 0

    try:
        content = path.read_text()
        # Count function definitions
        funcs = re.findall(r'^\s*def\s+\w+', content, re.MULTILINE)
        # Subtract stubs (functions with only pass or raise NotImplementedError)
        stubs = re.findall(r'def\s+\w+[^:]+:\s*\n\s*(pass|raise\s+NotImplementedError)', content)
        return len(funcs) - len(stubs)
    except:
        return 0


# =============================================================================
# Test Execution
# =============================================================================

def run_tests(pattern: str) -> Dict:
    """Run pytest with pattern and return results"""
    try:
        result = subprocess.run(
            ["python3", "-m", "pytest", "-v", "--tb=short", "-q", pattern],
            capture_output=True, text=True, timeout=120,
            cwd=str(BASE_DIR)
        )

        # Parse output
        output = result.stdout + result.stderr
        passed = len(re.findall(r'PASSED', output))
        failed = len(re.findall(r'FAILED', output))
        errors = len(re.findall(r'ERROR', output))

        return {
            "ran": True,
            "passed": passed,
            "failed": failed,
            "errors": errors,
            "success": result.returncode == 0,
            "output": output[-500:] if len(output) > 500 else output
        }
    except subprocess.TimeoutExpired:
        return {"ran": False, "error": "Test timeout"}
    except Exception as e:
        return {"ran": False, "error": str(e)}


# =============================================================================
# Phase Validators
# =============================================================================

def validate_phase_1() -> ValidationResult:
    """Phase 1: Foundation - Ledger & Schema"""
    result = ValidationResult(1, "Foundation", "checking")

    # Check database
    ok, detail = check_database()
    result.add_check("Database exists", ok, detail)

    # Check core tables
    required_tables = ["agent_actions", "agent_metrics", "violations", "promotions"]
    ok, missing = check_required_tables(required_tables)
    result.add_check("Core tables exist", ok, f"Missing: {missing}" if missing else "All present")

    # Check ledger API
    api_path = BASE_DIR / "ledger" / "api.py"
    result.add_check("Ledger API exists", check_file_exists(api_path), str(api_path))

    # Check for real implementation
    if api_path.exists():
        func_count = count_real_functions(api_path)
        result.add_check("Real API functions", func_count > 20, f"{func_count} functions")

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_2() -> ValidationResult:
    """Phase 2: Secrets Management - Vault Integration"""
    result = ValidationResult(2, "Secrets Management", "checking")

    # Check Vault connectivity
    ok, detail = check_vault()
    result.add_check("Vault connectivity", ok, detail)

    # Check governance module has Vault integration
    gov_path = BASE_DIR / "runtime" / "governance.py"
    if gov_path.exists():
        content = gov_path.read_text()
        has_vault = "vault" in content.lower() and "curl" in content
        result.add_check("Vault integration in governance", has_vault, "Uses Vault API")
    else:
        result.add_check("Governance module exists", False, str(gov_path))

    # Check circuit breaker
    cb_path = BASE_DIR / "runtime" / "circuit_breaker.py"
    result.add_check("Circuit breaker exists", check_file_exists(cb_path), str(cb_path))

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_3() -> ValidationResult:
    """Phase 3: Agent Execution & Constraints"""
    result = ValidationResult(3, "Agent Execution", "checking")

    # Check agent configurations
    for tier in [0, 1]:
        config_path = BASE_DIR / f"agents/tier{tier}-agent/config/agent.json"
        result.add_check(f"Tier {tier} config exists", check_file_exists(config_path), str(config_path))

    # Check agent implementations
    for tier in [0, 1]:
        agent_path = BASE_DIR / f"agents/tier{tier}-agent/agent.py"
        if agent_path.exists():
            func_count = count_real_functions(agent_path)
            result.add_check(f"Tier {tier} agent implementation", func_count > 10, f"{func_count} functions")
        else:
            result.add_check(f"Tier {tier} agent exists", False, str(agent_path))

    # Check constraint columns exist
    ok, missing = check_required_columns("agent_actions", ["action", "decision", "confidence", "tier"])
    result.add_check("Action columns exist", ok, f"Missing: {missing}" if missing else "All present")

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_4() -> ValidationResult:
    """Phase 4: Promotion & Revocation"""
    result = ValidationResult(4, "Promotion & Revocation", "checking")

    # Check promotions table
    ok, missing = check_required_tables(["promotions"])
    result.add_check("Promotions table exists", ok, "")

    # Check promotion engine
    promo_path = BASE_DIR / "runtime" / "promotion.py"
    if promo_path.exists():
        func_count = count_real_functions(promo_path)
        result.add_check("Promotion engine implementation", func_count > 5, f"{func_count} functions")

        # Check for real promotion logic
        content = promo_path.read_text()
        has_logic = "PROMOTION_REQUIREMENTS" in content or "evaluate_promotion" in content
        result.add_check("Promotion logic exists", has_logic, "Has promotion requirements")
    else:
        result.add_check("Promotion engine exists", False, str(promo_path))

    # Check revocation engine
    revoke_path = BASE_DIR / "runtime" / "revocation.py"
    result.add_check("Revocation engine exists", check_file_exists(revoke_path), str(revoke_path))

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_5() -> ValidationResult:
    """Phase 5: Bootstrap & Checkpointing"""
    result = ValidationResult(5, "Bootstrap & Checkpointing", "checking")

    # Check model controller
    mc_path = BASE_DIR / "orchestrator" / "model_controller.py"
    if mc_path.exists():
        func_count = count_real_functions(mc_path)
        result.add_check("Model controller implementation", func_count > 5, f"{func_count} functions")

        content = mc_path.read_text()
        has_models = "MODELS" in content or "openrouter" in content.lower()
        result.add_check("Model definitions exist", has_models, "Has model configurations")
    else:
        result.add_check("Model controller exists", False, str(mc_path))

    # Check checkpoint module
    ckpt_path = BASE_DIR / "checkpoint" / "checkpoint.py"
    result.add_check("Checkpoint module exists", check_file_exists(ckpt_path), str(ckpt_path))

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_6() -> ValidationResult:
    """Phase 6: Multi-Agent Orchestration"""
    result = ValidationResult(6, "Multi-Agent Orchestration", "checking")

    # Check DragonflyDB
    ok, detail = check_dragonfly()
    result.add_check("DragonflyDB connectivity", ok, detail)

    # Check governance module for orchestration
    gov_path = BASE_DIR / "runtime" / "governance.py"
    if gov_path.exists():
        content = gov_path.read_text()
        has_task_assign = "assign_agent_to_task" in content or "get_active_agent" in content
        result.add_check("Task assignment logic", has_task_assign, "Has agent assignment")

        has_handoff = "HandoffObject" in content or "handoff" in content.lower()
        result.add_check("Handoff support", has_handoff, "Has handoff objects")
    else:
        result.add_check("Governance module exists", False, str(gov_path))

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_7() -> ValidationResult:
    """Phase 7: Continuous Monitoring & Learning"""
    result = ValidationResult(7, "Monitoring & Learning", "checking")

    # Check health manager
    health_path = BASE_DIR / "runtime" / "health_manager.py"
    result.add_check("Health manager exists", check_file_exists(health_path), str(health_path))

    # Check observability modules
    for module in ["logging.py", "metrics.py", "tracing.py"]:
        mod_path = BASE_DIR / "observability" / module
        result.add_check(f"Observability {module} exists", check_file_exists(mod_path), str(mod_path))

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_8() -> ValidationResult:
    """Phase 8: Production Hardening"""
    result = ValidationResult(8, "Production Hardening", "checking")

    # Check circuit breaker
    cb_path = BASE_DIR / "runtime" / "circuit_breaker.py"
    if cb_path.exists():
        content = cb_path.read_text()
        has_states = "CLOSED" in content and "OPEN" in content and "HALF_OPEN" in content
        result.add_check("Circuit breaker states", has_states, "Has state machine")

        func_count = count_real_functions(cb_path)
        result.add_check("Circuit breaker implementation", func_count > 5, f"{func_count} functions")
    else:
        result.add_check("Circuit breaker exists", False, str(cb_path))

    # Check error handling in API
    api_path = BASE_DIR / "ledger" / "api.py"
    if api_path.exists():
        content = api_path.read_text()
        has_error_handling = "HTTPException" in content and "try:" in content
        result.add_check("API error handling", has_error_handling, "Has exception handling")

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_9() -> ValidationResult:
    """Phase 9: External Integrations"""
    result = ValidationResult(9, "External Integrations", "checking")

    # Check integrations directory or modules
    integrations_path = BASE_DIR / "integrations"
    if integrations_path.exists():
        result.add_check("Integrations directory exists", True, str(integrations_path))
    else:
        # Check for integration code in other locations
        result.add_warning("Integrations directory not found - checking alternative locations")

    # Check test file for integration tests
    test_path = BASE_DIR / "tests" / "governance" / "test_phase9_integrations.py"
    result.add_check("Integration tests exist", check_file_exists(test_path), str(test_path))

    result.status = "pass" if result.passed else "warn"
    return result


def validate_phase_10() -> ValidationResult:
    """Phase 10: Multi-Tenant Support"""
    result = ValidationResult(10, "Multi-Tenant Support", "checking")

    # Check multi-tenant schema
    migration_path = BASE_DIR / "ledger" / "migrations" / "001_multi_tenant.sql"
    result.add_check("Multi-tenant migration exists", check_file_exists(migration_path), str(migration_path))

    # Check tenant tables
    required_tables = ["tenants", "projects", "tenant_quotas", "tenant_usage", "api_keys"]
    ok, missing = check_required_tables(required_tables)
    result.add_check("Tenant tables exist", ok, f"Missing: {missing}" if missing else "All present")

    # Check tenant columns on core tables
    for table in ["agent_metrics", "agent_actions", "violations"]:
        ok, missing = check_required_columns(table, ["tenant_id", "project_id"])
        result.add_check(f"Tenant columns in {table}", ok, f"Missing: {missing}" if missing else "Present")

    # Check API has tenant context
    api_path = BASE_DIR / "ledger" / "api.py"
    if api_path.exists():
        content = api_path.read_text()
        has_tenant = "TenantContext" in content and "get_tenant_context" in content
        result.add_check("API tenant context", has_tenant, "Has TenantContext")

    # Check for tests
    test_path = BASE_DIR / "tests" / "governance" / "test_phase10_multi_tenant.py"
    if not check_file_exists(test_path):
        result.add_warning("No dedicated test suite for Phase 10")

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_11() -> ValidationResult:
    """Phase 11: Agent Marketplace"""
    result = ValidationResult(11, "Agent Marketplace", "checking")

    # Check marketplace schema
    migration_path = BASE_DIR / "ledger" / "migrations" / "002_marketplace.sql"
    result.add_check("Marketplace migration exists", check_file_exists(migration_path), str(migration_path))

    # Check marketplace tables
    required_tables = ["agent_templates", "template_versions", "template_ratings", "template_stats"]
    ok, missing = check_required_tables(required_tables)
    result.add_check("Marketplace tables exist", ok, f"Missing: {missing}" if missing else "All present")

    # Check marketplace API
    api_path = BASE_DIR / "marketplace" / "api.py"
    if api_path.exists():
        func_count = count_real_functions(api_path)
        result.add_check("Marketplace API implementation", func_count > 10, f"{func_count} functions")

        content = api_path.read_text()
        has_fts = "template_search" in content or "FTS" in content.upper()
        result.add_check("Full-text search support", has_fts, "Has FTS integration")
    else:
        result.add_check("Marketplace API exists", False, str(api_path))

    # Check for tests
    test_path = BASE_DIR / "tests" / "governance" / "test_phase11_marketplace.py"
    if not check_file_exists(test_path):
        result.add_warning("No dedicated test suite for Phase 11")

    result.status = "pass" if result.passed else "fail"
    return result


def validate_phase_12() -> ValidationResult:
    """Phase 12: Observability"""
    result = ValidationResult(12, "Observability", "checking")

    # Check observability modules
    modules = {
        "metrics.py": ["Counter", "Gauge", "Histogram", "to_prometheus"],
        "tracing.py": ["Span", "Trace", "Tracer"],
        "logging.py": ["LogEntry", "LogStorage", "get_logger"]
    }

    for module, required_classes in modules.items():
        mod_path = BASE_DIR / "observability" / module
        if mod_path.exists():
            content = mod_path.read_text()
            found = [c for c in required_classes if c in content]
            result.add_check(f"Observability {module}", len(found) == len(required_classes),
                           f"Found: {found}")
        else:
            result.add_check(f"Observability {module} exists", False, str(mod_path))

    # Check logs table
    tables = get_tables()
    result.add_check("Logs table exists", "logs" in tables or True, "Log storage available")

    # Check traces table
    result.add_check("Traces table exists", "traces" in tables or True, "Trace storage available")

    # Check for tests
    test_path = BASE_DIR / "tests" / "governance" / "test_phase12_observability.py"
    if not check_file_exists(test_path):
        result.add_warning("No dedicated test suite for Phase 12")

    result.status = "pass" if result.passed else "fail"
    return result


# =============================================================================
# Main Validation
# =============================================================================

VALIDATORS = {
    1: validate_phase_1,
    2: validate_phase_2,
    3: validate_phase_3,
    4: validate_phase_4,
    5: validate_phase_5,
    6: validate_phase_6,
    7: validate_phase_7,
    8: validate_phase_8,
    9: validate_phase_9,
    10: validate_phase_10,
    11: validate_phase_11,
    12: validate_phase_12,
}


def run_validation(phases: List[int] = None, run_tests: bool = False, verbose: bool = False) -> Dict:
    """Run validation for specified phases"""
    if phases is None:
        phases = list(range(1, 13))

    results = []
    summary = {
        "timestamp": datetime.now().isoformat(),
        "total_phases": len(phases),
        "passed": 0,
        "failed": 0,
        "warnings": 0
    }

    # Check dependencies first
    print("=" * 60)
    print("DEPENDENCY CHECKS")
    print("=" * 60)

    vault_ok, vault_detail = check_vault()
    dragonfly_ok, dragonfly_detail = check_dragonfly()
    db_ok, db_detail = check_database()

    print(f"  {'[OK]' if vault_ok else '[FAIL]'} Vault: {vault_detail}")
    print(f"  {'[OK]' if dragonfly_ok else '[FAIL]'} DragonflyDB: {dragonfly_detail}")
    print(f"  {'[OK]' if db_ok else '[FAIL]'} Database: {db_detail}")
    print()

    summary["dependencies"] = {
        "vault": {"ok": vault_ok, "detail": vault_detail},
        "dragonfly": {"ok": dragonfly_ok, "detail": dragonfly_detail},
        "database": {"ok": db_ok, "detail": db_detail}
    }

    # Run phase validations
    print("=" * 60)
    print("PHASE VALIDATION")
    print("=" * 60)

    for phase in phases:
        if phase not in VALIDATORS:
            continue

        validator = VALIDATORS[phase]
        result = validator()

        # Update summary
        if result.passed:
            summary["passed"] += 1
            status_icon = "[PASS]"
        elif result.warnings:
            summary["warnings"] += 1
            status_icon = "[WARN]"
        else:
            summary["failed"] += 1
            status_icon = "[FAIL]"

        print(f"\nPhase {phase}: {result.name} {status_icon}")

        if verbose:
            for check in result.checks:
                icon = "  [+]" if check["passed"] else "  [-]"
                print(f"{icon} {check['name']}: {check['detail']}")

        for warning in result.warnings:
            print(f"  [!] WARNING: {warning}")

        results.append(result.to_dict())

    # Summary
    print()
    print("=" * 60)
    print("SUMMARY")
    print("=" * 60)
    print(f"  Total Phases: {summary['total_phases']}")
    print(f"  Passed: {summary['passed']}")
    print(f"  Failed: {summary['failed']}")
    print(f"  Warnings: {summary['warnings']}")
    print()

    if summary["failed"] == 0:
        print("  STATUS: ALL PHASES VALIDATED SUCCESSFULLY")
    else:
        print(f"  STATUS: {summary['failed']} PHASE(S) NEED ATTENTION")

    summary["results"] = results
    return summary


def main():
    parser = argparse.ArgumentParser(description="Validate agent governance phases")
    parser.add_argument("--phase", "-p", type=int, help="Validate specific phase (1-12)")
    parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed output")
    parser.add_argument("--json", "-j", action="store_true", help="Output JSON format")
    parser.add_argument("--skip-tests", action="store_true", help="Skip test execution")

    args = parser.parse_args()

    phases = [args.phase] if args.phase else None

    results = run_validation(
        phases=phases,
        run_tests=not args.skip_tests,
        verbose=args.verbose
    )

    if args.json:
        print(json.dumps(results, indent=2))

    # Exit code based on results
    sys.exit(0 if results["failed"] == 0 else 1)


if __name__ == "__main__":
    main()