agent-governance/preflight/dependency_check.py

#!/usr/bin/env python3
"""
Dependency Checker
==================
Verifies that all required secrets and resources are available before execution.
Part of Phase 3: Execution Pipeline - Preflight System.
"""

import json
import subprocess
import sys
from dataclasses import dataclass
from datetime import datetime, timezone
from enum import Enum
from typing import Optional


class CheckStatus(str, Enum):
    PASS = "PASS"
    FAIL = "FAIL"
    WARN = "WARN"
    SKIP = "SKIP"


@dataclass
class CheckResult:
    check_name: str
    status: CheckStatus
    message: str
    details: dict
    timestamp: str

    def to_dict(self) -> dict:
        return {
            "check_name": self.check_name,
            "status": self.status.value,
            "message": self.message,
            "details": self.details,
            "timestamp": self.timestamp
        }


class DependencyChecker:
    """
    Validates that all required dependencies are available.

    Checks:
    - Required Vault secrets exist
    - Required services are reachable
    - Required tools are installed
    - Required credentials are valid
    """

    # Standard dependencies by action type
    DEPENDENCIES = {
        "terraform": {
            "secrets": ["api-keys/proxmox", "ssh/sandbox"],
            "services": ["vault", "proxmox-api"],
            "tools": ["terraform"]
        },
        "ansible": {
            "secrets": ["ssh/sandbox", "inventory/proxmox"],
            "services": ["vault"],
            "tools": ["ansible-playbook"]
        },
        "docker": {
            "secrets": ["registry/credentials"],
            "services": ["vault", "docker-registry"],
            "tools": ["docker"]
        },
        "generic": {
            "secrets": [],
            "services": ["vault", "dragonfly"],
            "tools": []
        }
    }

    # Service health endpoints
    SERVICE_ENDPOINTS = {
        "vault": ("https://127.0.0.1:8200/v1/sys/health", 200),
        "dragonfly": ("redis://127.0.0.1:6379", None),
        "proxmox-api": ("https://10.77.0.2:8006/api2/json/version", 200),
        "docker-registry": ("https://registry.local/v2/", 200),
    }

    def __init__(self):
        self.vault_token = self._get_vault_token()

    def _get_vault_token(self) -> str:
        with open("/opt/vault/init-keys.json") as f:
            return json.load(f)["root_token"]

    def _now(self) -> str:
        return datetime.now(timezone.utc).isoformat()

    def _vault_read(self, path: str) -> Optional[dict]:
        """Read from Vault KV"""
        result = subprocess.run([
            "curl", "-sk",
            "-H", f"X-Vault-Token: {self.vault_token}",
            f"https://127.0.0.1:8200/v1/secret/data/{path}"
        ], capture_output=True, text=True)

        try:
            data = json.loads(result.stdout)
            if "data" in data and "data" in data["data"]:
                return data["data"]["data"]
        except:
            pass
        return None

    def check_vault_secret(self, secret_path: str) -> CheckResult:
        """Check if a Vault secret exists and is readable"""
        data = self._vault_read(secret_path)

        if data:
            # Don't expose actual secret values
            keys = list(data.keys())
            return CheckResult(
                check_name=f"secret:{secret_path}",
                status=CheckStatus.PASS,
                message=f"Secret '{secret_path}' exists with keys: {keys}",
                details={"path": secret_path, "keys": keys},
                timestamp=self._now()
            )

        return CheckResult(
            check_name=f"secret:{secret_path}",
            status=CheckStatus.FAIL,
            message=f"Secret '{secret_path}' not found or not readable",
            details={"path": secret_path},
            timestamp=self._now()
        )

    def check_service_health(self, service_name: str) -> CheckResult:
        """Check if a service is reachable"""
        if service_name not in self.SERVICE_ENDPOINTS:
            return CheckResult(
                check_name=f"service:{service_name}",
                status=CheckStatus.SKIP,
                message=f"No health endpoint defined for '{service_name}'",
                details={"service": service_name},
                timestamp=self._now()
            )

        endpoint, expected_code = self.SERVICE_ENDPOINTS[service_name]

        # Special handling for Redis/Dragonfly
        if endpoint.startswith("redis://"):
            try:
                import redis
                host = endpoint.replace("redis://", "").split(":")[0]
                port = int(endpoint.split(":")[-1])

                # Get password from Vault for dragonfly
                password = None
                if service_name == "dragonfly":
                    creds = self._vault_read("services/dragonfly")
                    if creds:
                        password = creds.get("password")

                r = redis.Redis(host=host, port=port, password=password, socket_timeout=5)
                r.ping()
                return CheckResult(
                    check_name=f"service:{service_name}",
                    status=CheckStatus.PASS,
                    message=f"Service '{service_name}' is healthy (PING OK)",
                    details={"service": service_name, "endpoint": endpoint},
                    timestamp=self._now()
                )
            except Exception as e:
                return CheckResult(
                    check_name=f"service:{service_name}",
                    status=CheckStatus.FAIL,
                    message=f"Service '{service_name}' is not reachable: {str(e)}",
                    details={"service": service_name, "endpoint": endpoint, "error": str(e)},
                    timestamp=self._now()
                )

        # HTTP health check
        try:
            result = subprocess.run([
                "curl", "-sk", "-o", "/dev/null", "-w", "%{http_code}",
                "--connect-timeout", "5",
                endpoint
            ], capture_output=True, text=True, timeout=10)

            status_code = int(result.stdout.strip())

            if expected_code is None or status_code == expected_code:
                return CheckResult(
                    check_name=f"service:{service_name}",
                    status=CheckStatus.PASS,
                    message=f"Service '{service_name}' is healthy (HTTP {status_code})",
                    details={"service": service_name, "endpoint": endpoint, "status_code": status_code},
                    timestamp=self._now()
                )
            else:
                return CheckResult(
                    check_name=f"service:{service_name}",
                    status=CheckStatus.WARN,
                    message=f"Service '{service_name}' returned unexpected status {status_code}",
                    details={"service": service_name, "endpoint": endpoint, "status_code": status_code, "expected": expected_code},
                    timestamp=self._now()
                )

        except Exception as e:
            return CheckResult(
                check_name=f"service:{service_name}",
                status=CheckStatus.FAIL,
                message=f"Service '{service_name}' health check failed: {str(e)}",
                details={"service": service_name, "endpoint": endpoint, "error": str(e)},
                timestamp=self._now()
            )

    def check_tool_installed(self, tool_name: str) -> CheckResult:
        """Check if a tool is installed and available"""
        result = subprocess.run(
            ["which", tool_name],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            path = result.stdout.strip()
            return CheckResult(
                check_name=f"tool:{tool_name}",
                status=CheckStatus.PASS,
                message=f"Tool '{tool_name}' found at {path}",
                details={"tool": tool_name, "path": path},
                timestamp=self._now()
            )

        return CheckResult(
            check_name=f"tool:{tool_name}",
            status=CheckStatus.FAIL,
            message=f"Tool '{tool_name}' not found in PATH",
            details={"tool": tool_name},
            timestamp=self._now()
        )

    def check_vault_token_valid(self) -> CheckResult:
        """Check if the Vault token is valid"""
        result = subprocess.run([
            "curl", "-sk",
            "-H", f"X-Vault-Token: {self.vault_token}",
            "https://127.0.0.1:8200/v1/auth/token/lookup-self"
        ], capture_output=True, text=True)

        try:
            data = json.loads(result.stdout)
            if "data" in data:
                token_data = data["data"]
                policies = token_data.get("policies", [])
                ttl = token_data.get("ttl", 0)

                if ttl > 0 or "root" in policies:
                    return CheckResult(
                        check_name="vault_token",
                        status=CheckStatus.PASS,
                        message=f"Vault token valid (TTL: {ttl}s, policies: {policies})",
                        details={"ttl": ttl, "policies": policies},
                        timestamp=self._now()
                    )

                return CheckResult(
                    check_name="vault_token",
                    status=CheckStatus.WARN,
                    message=f"Vault token has no TTL (may be root token)",
                    details={"ttl": ttl, "policies": policies},
                    timestamp=self._now()
                )
        except:
            pass

        return CheckResult(
            check_name="vault_token",
            status=CheckStatus.FAIL,
            message="Vault token invalid or expired",
            details={},
            timestamp=self._now()
        )

    def run_action_checks(self, action_type: str) -> list[CheckResult]:
        """Run all dependency checks for an action type"""
        results = []

        # Always check Vault token
        results.append(self.check_vault_token_valid())

        deps = self.DEPENDENCIES.get(action_type, self.DEPENDENCIES["generic"])

        # Check secrets
        for secret in deps.get("secrets", []):
            results.append(self.check_vault_secret(secret))

        # Check services
        for service in deps.get("services", []):
            results.append(self.check_service_health(service))

        # Check tools
        for tool in deps.get("tools", []):
            results.append(self.check_tool_installed(tool))

        return results

    def preflight_report(self, action_type: str, extra_secrets: list[str] = None) -> dict:
        """Generate a full dependency preflight report"""
        report = {
            "report_type": "dependency_preflight",
            "action_type": action_type,
            "timestamp": self._now(),
            "checks": [],
            "summary": {
                "total_checks": 0,
                "passed": 0,
                "failed": 0,
                "warnings": 0,
                "skipped": 0
            },
            "can_proceed": True
        }

        # Run standard checks
        results = self.run_action_checks(action_type)

        # Check any extra secrets
        for secret in (extra_secrets or []):
            results.append(self.check_vault_secret(secret))

        for r in results:
            report["checks"].append(r.to_dict())
            report["summary"]["total_checks"] += 1

            if r.status == CheckStatus.PASS:
                report["summary"]["passed"] += 1
            elif r.status == CheckStatus.FAIL:
                report["summary"]["failed"] += 1
                report["can_proceed"] = False
            elif r.status == CheckStatus.WARN:
                report["summary"]["warnings"] += 1
            elif r.status == CheckStatus.SKIP:
                report["summary"]["skipped"] += 1

        return report


# =============================================================================
# CLI
# =============================================================================

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Dependency Preflight Checker")
    parser.add_argument("action_type", choices=["terraform", "ansible", "docker", "generic"],
                        help="Type of action to check dependencies for")
    parser.add_argument("--secret", action="append", dest="extra_secrets",
                        help="Additional secrets to check (can be repeated)")
    parser.add_argument("--json", action="store_true", help="Output JSON")

    args = parser.parse_args()

    checker = DependencyChecker()
    report = checker.preflight_report(args.action_type, args.extra_secrets)

    if args.json:
        print(json.dumps(report, indent=2))
    else:
        print("\n" + "=" * 60)
        print("DEPENDENCY PREFLIGHT REPORT")
        print("=" * 60)
        print(f"Action Type: {report['action_type']}")
        print(f"Timestamp: {report['timestamp']}")
        print()

        for check in report["checks"]:
            status_icon = {
                "PASS": "[OK]",
                "FAIL": "[FAIL]",
                "WARN": "[WARN]",
                "SKIP": "[SKIP]"
            }.get(check["status"], "[?]")
            print(f"  {status_icon} {check['check_name']}")
            print(f"       {check['message']}")

        print("\n" + "-" * 60)
        print("SUMMARY")
        print("-" * 60)
        s = report["summary"]
        print(f"  Total Checks: {s['total_checks']}")
        print(f"  Passed: {s['passed']}")
        print(f"  Failed: {s['failed']}")
        print(f"  Warnings: {s['warnings']}")
        print(f"  Skipped: {s['skipped']}")
        print()

        if report["can_proceed"]:
            print("[OK] DEPENDENCY CHECK PASSED - All dependencies available")
        else:
            print("[FAIL] DEPENDENCY CHECK FAILED - Missing dependencies")

        print("=" * 60)

    sys.exit(0 if report["can_proceed"] else 1)