agent-governance/preflight/dependency_check.py
profit 77655c298c Initial commit: Agent Governance System Phase 8
Phase 8 Production Hardening with complete governance infrastructure:

- Vault integration with tiered policies (T0-T4)
- DragonflyDB state management
- SQLite audit ledger
- Pipeline DSL and templates
- Promotion/revocation engine
- Checkpoint system for session persistence
- Health manager and circuit breaker for fault tolerance
- GitHub/Slack integrations
- Architectural test pipeline with bug watcher, suggestion engine, council review
- Multi-agent chaos testing framework

Test Results:
- Governance tests: 68/68 passing
- E2E workflow: 16/16 passing
- Phase 2 Vault: 14/14 passing
- Integration tests: 27/27 passing

Coverage: 57.6% average across 12 phases

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 22:07:06 -05:00

406 lines
14 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Dependency Checker
==================
Verifies that all required secrets and resources are available before execution.
Part of Phase 3: Execution Pipeline - Preflight System.
"""
import json
import subprocess
import sys
from dataclasses import dataclass
from datetime import datetime, timezone
from enum import Enum
from typing import Optional
class CheckStatus(str, Enum):
PASS = "PASS"
FAIL = "FAIL"
WARN = "WARN"
SKIP = "SKIP"
@dataclass
class CheckResult:
check_name: str
status: CheckStatus
message: str
details: dict
timestamp: str
def to_dict(self) -> dict:
return {
"check_name": self.check_name,
"status": self.status.value,
"message": self.message,
"details": self.details,
"timestamp": self.timestamp
}
class DependencyChecker:
"""
Validates that all required dependencies are available.
Checks:
- Required Vault secrets exist
- Required services are reachable
- Required tools are installed
- Required credentials are valid
"""
# Standard dependencies by action type
DEPENDENCIES = {
"terraform": {
"secrets": ["api-keys/proxmox", "ssh/sandbox"],
"services": ["vault", "proxmox-api"],
"tools": ["terraform"]
},
"ansible": {
"secrets": ["ssh/sandbox", "inventory/proxmox"],
"services": ["vault"],
"tools": ["ansible-playbook"]
},
"docker": {
"secrets": ["registry/credentials"],
"services": ["vault", "docker-registry"],
"tools": ["docker"]
},
"generic": {
"secrets": [],
"services": ["vault", "dragonfly"],
"tools": []
}
}
# Service health endpoints
SERVICE_ENDPOINTS = {
"vault": ("https://127.0.0.1:8200/v1/sys/health", 200),
"dragonfly": ("redis://127.0.0.1:6379", None),
"proxmox-api": ("https://10.77.0.2:8006/api2/json/version", 200),
"docker-registry": ("https://registry.local/v2/", 200),
}
def __init__(self):
self.vault_token = self._get_vault_token()
def _get_vault_token(self) -> str:
with open("/opt/vault/init-keys.json") as f:
return json.load(f)["root_token"]
def _now(self) -> str:
return datetime.now(timezone.utc).isoformat()
def _vault_read(self, path: str) -> Optional[dict]:
"""Read from Vault KV"""
result = subprocess.run([
"curl", "-sk",
"-H", f"X-Vault-Token: {self.vault_token}",
f"https://127.0.0.1:8200/v1/secret/data/{path}"
], capture_output=True, text=True)
try:
data = json.loads(result.stdout)
if "data" in data and "data" in data["data"]:
return data["data"]["data"]
except:
pass
return None
def check_vault_secret(self, secret_path: str) -> CheckResult:
"""Check if a Vault secret exists and is readable"""
data = self._vault_read(secret_path)
if data:
# Don't expose actual secret values
keys = list(data.keys())
return CheckResult(
check_name=f"secret:{secret_path}",
status=CheckStatus.PASS,
message=f"Secret '{secret_path}' exists with keys: {keys}",
details={"path": secret_path, "keys": keys},
timestamp=self._now()
)
return CheckResult(
check_name=f"secret:{secret_path}",
status=CheckStatus.FAIL,
message=f"Secret '{secret_path}' not found or not readable",
details={"path": secret_path},
timestamp=self._now()
)
def check_service_health(self, service_name: str) -> CheckResult:
"""Check if a service is reachable"""
if service_name not in self.SERVICE_ENDPOINTS:
return CheckResult(
check_name=f"service:{service_name}",
status=CheckStatus.SKIP,
message=f"No health endpoint defined for '{service_name}'",
details={"service": service_name},
timestamp=self._now()
)
endpoint, expected_code = self.SERVICE_ENDPOINTS[service_name]
# Special handling for Redis/Dragonfly
if endpoint.startswith("redis://"):
try:
import redis
host = endpoint.replace("redis://", "").split(":")[0]
port = int(endpoint.split(":")[-1])
# Get password from Vault for dragonfly
password = None
if service_name == "dragonfly":
creds = self._vault_read("services/dragonfly")
if creds:
password = creds.get("password")
r = redis.Redis(host=host, port=port, password=password, socket_timeout=5)
r.ping()
return CheckResult(
check_name=f"service:{service_name}",
status=CheckStatus.PASS,
message=f"Service '{service_name}' is healthy (PING OK)",
details={"service": service_name, "endpoint": endpoint},
timestamp=self._now()
)
except Exception as e:
return CheckResult(
check_name=f"service:{service_name}",
status=CheckStatus.FAIL,
message=f"Service '{service_name}' is not reachable: {str(e)}",
details={"service": service_name, "endpoint": endpoint, "error": str(e)},
timestamp=self._now()
)
# HTTP health check
try:
result = subprocess.run([
"curl", "-sk", "-o", "/dev/null", "-w", "%{http_code}",
"--connect-timeout", "5",
endpoint
], capture_output=True, text=True, timeout=10)
status_code = int(result.stdout.strip())
if expected_code is None or status_code == expected_code:
return CheckResult(
check_name=f"service:{service_name}",
status=CheckStatus.PASS,
message=f"Service '{service_name}' is healthy (HTTP {status_code})",
details={"service": service_name, "endpoint": endpoint, "status_code": status_code},
timestamp=self._now()
)
else:
return CheckResult(
check_name=f"service:{service_name}",
status=CheckStatus.WARN,
message=f"Service '{service_name}' returned unexpected status {status_code}",
details={"service": service_name, "endpoint": endpoint, "status_code": status_code, "expected": expected_code},
timestamp=self._now()
)
except Exception as e:
return CheckResult(
check_name=f"service:{service_name}",
status=CheckStatus.FAIL,
message=f"Service '{service_name}' health check failed: {str(e)}",
details={"service": service_name, "endpoint": endpoint, "error": str(e)},
timestamp=self._now()
)
def check_tool_installed(self, tool_name: str) -> CheckResult:
"""Check if a tool is installed and available"""
result = subprocess.run(
["which", tool_name],
capture_output=True,
text=True
)
if result.returncode == 0:
path = result.stdout.strip()
return CheckResult(
check_name=f"tool:{tool_name}",
status=CheckStatus.PASS,
message=f"Tool '{tool_name}' found at {path}",
details={"tool": tool_name, "path": path},
timestamp=self._now()
)
return CheckResult(
check_name=f"tool:{tool_name}",
status=CheckStatus.FAIL,
message=f"Tool '{tool_name}' not found in PATH",
details={"tool": tool_name},
timestamp=self._now()
)
def check_vault_token_valid(self) -> CheckResult:
"""Check if the Vault token is valid"""
result = subprocess.run([
"curl", "-sk",
"-H", f"X-Vault-Token: {self.vault_token}",
"https://127.0.0.1:8200/v1/auth/token/lookup-self"
], capture_output=True, text=True)
try:
data = json.loads(result.stdout)
if "data" in data:
token_data = data["data"]
policies = token_data.get("policies", [])
ttl = token_data.get("ttl", 0)
if ttl > 0 or "root" in policies:
return CheckResult(
check_name="vault_token",
status=CheckStatus.PASS,
message=f"Vault token valid (TTL: {ttl}s, policies: {policies})",
details={"ttl": ttl, "policies": policies},
timestamp=self._now()
)
return CheckResult(
check_name="vault_token",
status=CheckStatus.WARN,
message=f"Vault token has no TTL (may be root token)",
details={"ttl": ttl, "policies": policies},
timestamp=self._now()
)
except:
pass
return CheckResult(
check_name="vault_token",
status=CheckStatus.FAIL,
message="Vault token invalid or expired",
details={},
timestamp=self._now()
)
def run_action_checks(self, action_type: str) -> list[CheckResult]:
"""Run all dependency checks for an action type"""
results = []
# Always check Vault token
results.append(self.check_vault_token_valid())
deps = self.DEPENDENCIES.get(action_type, self.DEPENDENCIES["generic"])
# Check secrets
for secret in deps.get("secrets", []):
results.append(self.check_vault_secret(secret))
# Check services
for service in deps.get("services", []):
results.append(self.check_service_health(service))
# Check tools
for tool in deps.get("tools", []):
results.append(self.check_tool_installed(tool))
return results
def preflight_report(self, action_type: str, extra_secrets: list[str] = None) -> dict:
"""Generate a full dependency preflight report"""
report = {
"report_type": "dependency_preflight",
"action_type": action_type,
"timestamp": self._now(),
"checks": [],
"summary": {
"total_checks": 0,
"passed": 0,
"failed": 0,
"warnings": 0,
"skipped": 0
},
"can_proceed": True
}
# Run standard checks
results = self.run_action_checks(action_type)
# Check any extra secrets
for secret in (extra_secrets or []):
results.append(self.check_vault_secret(secret))
for r in results:
report["checks"].append(r.to_dict())
report["summary"]["total_checks"] += 1
if r.status == CheckStatus.PASS:
report["summary"]["passed"] += 1
elif r.status == CheckStatus.FAIL:
report["summary"]["failed"] += 1
report["can_proceed"] = False
elif r.status == CheckStatus.WARN:
report["summary"]["warnings"] += 1
elif r.status == CheckStatus.SKIP:
report["summary"]["skipped"] += 1
return report
# =============================================================================
# CLI
# =============================================================================
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Dependency Preflight Checker")
parser.add_argument("action_type", choices=["terraform", "ansible", "docker", "generic"],
help="Type of action to check dependencies for")
parser.add_argument("--secret", action="append", dest="extra_secrets",
help="Additional secrets to check (can be repeated)")
parser.add_argument("--json", action="store_true", help="Output JSON")
args = parser.parse_args()
checker = DependencyChecker()
report = checker.preflight_report(args.action_type, args.extra_secrets)
if args.json:
print(json.dumps(report, indent=2))
else:
print("\n" + "=" * 60)
print("DEPENDENCY PREFLIGHT REPORT")
print("=" * 60)
print(f"Action Type: {report['action_type']}")
print(f"Timestamp: {report['timestamp']}")
print()
for check in report["checks"]:
status_icon = {
"PASS": "[OK]",
"FAIL": "[FAIL]",
"WARN": "[WARN]",
"SKIP": "[SKIP]"
}.get(check["status"], "[?]")
print(f" {status_icon} {check['check_name']}")
print(f" {check['message']}")
print("\n" + "-" * 60)
print("SUMMARY")
print("-" * 60)
s = report["summary"]
print(f" Total Checks: {s['total_checks']}")
print(f" Passed: {s['passed']}")
print(f" Failed: {s['failed']}")
print(f" Warnings: {s['warnings']}")
print(f" Skipped: {s['skipped']}")
print()
if report["can_proceed"]:
print("[OK] DEPENDENCY CHECK PASSED - All dependencies available")
else:
print("[FAIL] DEPENDENCY CHECK FAILED - Missing dependencies")
print("=" * 60)
sys.exit(0 if report["can_proceed"] else 1)