Phase 8 Production Hardening with complete governance infrastructure: - Vault integration with tiered policies (T0-T4) - DragonflyDB state management - SQLite audit ledger - Pipeline DSL and templates - Promotion/revocation engine - Checkpoint system for session persistence - Health manager and circuit breaker for fault tolerance - GitHub/Slack integrations - Architectural test pipeline with bug watcher, suggestion engine, council review - Multi-agent chaos testing framework Test Results: - Governance tests: 68/68 passing - E2E workflow: 16/16 passing - Phase 2 Vault: 14/14 passing - Integration tests: 27/27 passing Coverage: 57.6% average across 12 phases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
406 lines
14 KiB
Python
Executable File
406 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Dependency Checker
|
|
==================
|
|
Verifies that all required secrets and resources are available before execution.
|
|
Part of Phase 3: Execution Pipeline - Preflight System.
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from enum import Enum
|
|
from typing import Optional
|
|
|
|
|
|
class CheckStatus(str, Enum):
|
|
PASS = "PASS"
|
|
FAIL = "FAIL"
|
|
WARN = "WARN"
|
|
SKIP = "SKIP"
|
|
|
|
|
|
@dataclass
|
|
class CheckResult:
|
|
check_name: str
|
|
status: CheckStatus
|
|
message: str
|
|
details: dict
|
|
timestamp: str
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"check_name": self.check_name,
|
|
"status": self.status.value,
|
|
"message": self.message,
|
|
"details": self.details,
|
|
"timestamp": self.timestamp
|
|
}
|
|
|
|
|
|
class DependencyChecker:
|
|
"""
|
|
Validates that all required dependencies are available.
|
|
|
|
Checks:
|
|
- Required Vault secrets exist
|
|
- Required services are reachable
|
|
- Required tools are installed
|
|
- Required credentials are valid
|
|
"""
|
|
|
|
# Standard dependencies by action type
|
|
DEPENDENCIES = {
|
|
"terraform": {
|
|
"secrets": ["api-keys/proxmox", "ssh/sandbox"],
|
|
"services": ["vault", "proxmox-api"],
|
|
"tools": ["terraform"]
|
|
},
|
|
"ansible": {
|
|
"secrets": ["ssh/sandbox", "inventory/proxmox"],
|
|
"services": ["vault"],
|
|
"tools": ["ansible-playbook"]
|
|
},
|
|
"docker": {
|
|
"secrets": ["registry/credentials"],
|
|
"services": ["vault", "docker-registry"],
|
|
"tools": ["docker"]
|
|
},
|
|
"generic": {
|
|
"secrets": [],
|
|
"services": ["vault", "dragonfly"],
|
|
"tools": []
|
|
}
|
|
}
|
|
|
|
# Service health endpoints
|
|
SERVICE_ENDPOINTS = {
|
|
"vault": ("https://127.0.0.1:8200/v1/sys/health", 200),
|
|
"dragonfly": ("redis://127.0.0.1:6379", None),
|
|
"proxmox-api": ("https://10.77.0.2:8006/api2/json/version", 200),
|
|
"docker-registry": ("https://registry.local/v2/", 200),
|
|
}
|
|
|
|
def __init__(self):
|
|
self.vault_token = self._get_vault_token()
|
|
|
|
def _get_vault_token(self) -> str:
|
|
with open("/opt/vault/init-keys.json") as f:
|
|
return json.load(f)["root_token"]
|
|
|
|
def _now(self) -> str:
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
def _vault_read(self, path: str) -> Optional[dict]:
|
|
"""Read from Vault KV"""
|
|
result = subprocess.run([
|
|
"curl", "-sk",
|
|
"-H", f"X-Vault-Token: {self.vault_token}",
|
|
f"https://127.0.0.1:8200/v1/secret/data/{path}"
|
|
], capture_output=True, text=True)
|
|
|
|
try:
|
|
data = json.loads(result.stdout)
|
|
if "data" in data and "data" in data["data"]:
|
|
return data["data"]["data"]
|
|
except:
|
|
pass
|
|
return None
|
|
|
|
def check_vault_secret(self, secret_path: str) -> CheckResult:
|
|
"""Check if a Vault secret exists and is readable"""
|
|
data = self._vault_read(secret_path)
|
|
|
|
if data:
|
|
# Don't expose actual secret values
|
|
keys = list(data.keys())
|
|
return CheckResult(
|
|
check_name=f"secret:{secret_path}",
|
|
status=CheckStatus.PASS,
|
|
message=f"Secret '{secret_path}' exists with keys: {keys}",
|
|
details={"path": secret_path, "keys": keys},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
return CheckResult(
|
|
check_name=f"secret:{secret_path}",
|
|
status=CheckStatus.FAIL,
|
|
message=f"Secret '{secret_path}' not found or not readable",
|
|
details={"path": secret_path},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
def check_service_health(self, service_name: str) -> CheckResult:
|
|
"""Check if a service is reachable"""
|
|
if service_name not in self.SERVICE_ENDPOINTS:
|
|
return CheckResult(
|
|
check_name=f"service:{service_name}",
|
|
status=CheckStatus.SKIP,
|
|
message=f"No health endpoint defined for '{service_name}'",
|
|
details={"service": service_name},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
endpoint, expected_code = self.SERVICE_ENDPOINTS[service_name]
|
|
|
|
# Special handling for Redis/Dragonfly
|
|
if endpoint.startswith("redis://"):
|
|
try:
|
|
import redis
|
|
host = endpoint.replace("redis://", "").split(":")[0]
|
|
port = int(endpoint.split(":")[-1])
|
|
|
|
# Get password from Vault for dragonfly
|
|
password = None
|
|
if service_name == "dragonfly":
|
|
creds = self._vault_read("services/dragonfly")
|
|
if creds:
|
|
password = creds.get("password")
|
|
|
|
r = redis.Redis(host=host, port=port, password=password, socket_timeout=5)
|
|
r.ping()
|
|
return CheckResult(
|
|
check_name=f"service:{service_name}",
|
|
status=CheckStatus.PASS,
|
|
message=f"Service '{service_name}' is healthy (PING OK)",
|
|
details={"service": service_name, "endpoint": endpoint},
|
|
timestamp=self._now()
|
|
)
|
|
except Exception as e:
|
|
return CheckResult(
|
|
check_name=f"service:{service_name}",
|
|
status=CheckStatus.FAIL,
|
|
message=f"Service '{service_name}' is not reachable: {str(e)}",
|
|
details={"service": service_name, "endpoint": endpoint, "error": str(e)},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
# HTTP health check
|
|
try:
|
|
result = subprocess.run([
|
|
"curl", "-sk", "-o", "/dev/null", "-w", "%{http_code}",
|
|
"--connect-timeout", "5",
|
|
endpoint
|
|
], capture_output=True, text=True, timeout=10)
|
|
|
|
status_code = int(result.stdout.strip())
|
|
|
|
if expected_code is None or status_code == expected_code:
|
|
return CheckResult(
|
|
check_name=f"service:{service_name}",
|
|
status=CheckStatus.PASS,
|
|
message=f"Service '{service_name}' is healthy (HTTP {status_code})",
|
|
details={"service": service_name, "endpoint": endpoint, "status_code": status_code},
|
|
timestamp=self._now()
|
|
)
|
|
else:
|
|
return CheckResult(
|
|
check_name=f"service:{service_name}",
|
|
status=CheckStatus.WARN,
|
|
message=f"Service '{service_name}' returned unexpected status {status_code}",
|
|
details={"service": service_name, "endpoint": endpoint, "status_code": status_code, "expected": expected_code},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
except Exception as e:
|
|
return CheckResult(
|
|
check_name=f"service:{service_name}",
|
|
status=CheckStatus.FAIL,
|
|
message=f"Service '{service_name}' health check failed: {str(e)}",
|
|
details={"service": service_name, "endpoint": endpoint, "error": str(e)},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
def check_tool_installed(self, tool_name: str) -> CheckResult:
|
|
"""Check if a tool is installed and available"""
|
|
result = subprocess.run(
|
|
["which", tool_name],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
path = result.stdout.strip()
|
|
return CheckResult(
|
|
check_name=f"tool:{tool_name}",
|
|
status=CheckStatus.PASS,
|
|
message=f"Tool '{tool_name}' found at {path}",
|
|
details={"tool": tool_name, "path": path},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
return CheckResult(
|
|
check_name=f"tool:{tool_name}",
|
|
status=CheckStatus.FAIL,
|
|
message=f"Tool '{tool_name}' not found in PATH",
|
|
details={"tool": tool_name},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
def check_vault_token_valid(self) -> CheckResult:
|
|
"""Check if the Vault token is valid"""
|
|
result = subprocess.run([
|
|
"curl", "-sk",
|
|
"-H", f"X-Vault-Token: {self.vault_token}",
|
|
"https://127.0.0.1:8200/v1/auth/token/lookup-self"
|
|
], capture_output=True, text=True)
|
|
|
|
try:
|
|
data = json.loads(result.stdout)
|
|
if "data" in data:
|
|
token_data = data["data"]
|
|
policies = token_data.get("policies", [])
|
|
ttl = token_data.get("ttl", 0)
|
|
|
|
if ttl > 0 or "root" in policies:
|
|
return CheckResult(
|
|
check_name="vault_token",
|
|
status=CheckStatus.PASS,
|
|
message=f"Vault token valid (TTL: {ttl}s, policies: {policies})",
|
|
details={"ttl": ttl, "policies": policies},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
return CheckResult(
|
|
check_name="vault_token",
|
|
status=CheckStatus.WARN,
|
|
message=f"Vault token has no TTL (may be root token)",
|
|
details={"ttl": ttl, "policies": policies},
|
|
timestamp=self._now()
|
|
)
|
|
except:
|
|
pass
|
|
|
|
return CheckResult(
|
|
check_name="vault_token",
|
|
status=CheckStatus.FAIL,
|
|
message="Vault token invalid or expired",
|
|
details={},
|
|
timestamp=self._now()
|
|
)
|
|
|
|
def run_action_checks(self, action_type: str) -> list[CheckResult]:
|
|
"""Run all dependency checks for an action type"""
|
|
results = []
|
|
|
|
# Always check Vault token
|
|
results.append(self.check_vault_token_valid())
|
|
|
|
deps = self.DEPENDENCIES.get(action_type, self.DEPENDENCIES["generic"])
|
|
|
|
# Check secrets
|
|
for secret in deps.get("secrets", []):
|
|
results.append(self.check_vault_secret(secret))
|
|
|
|
# Check services
|
|
for service in deps.get("services", []):
|
|
results.append(self.check_service_health(service))
|
|
|
|
# Check tools
|
|
for tool in deps.get("tools", []):
|
|
results.append(self.check_tool_installed(tool))
|
|
|
|
return results
|
|
|
|
def preflight_report(self, action_type: str, extra_secrets: list[str] = None) -> dict:
|
|
"""Generate a full dependency preflight report"""
|
|
report = {
|
|
"report_type": "dependency_preflight",
|
|
"action_type": action_type,
|
|
"timestamp": self._now(),
|
|
"checks": [],
|
|
"summary": {
|
|
"total_checks": 0,
|
|
"passed": 0,
|
|
"failed": 0,
|
|
"warnings": 0,
|
|
"skipped": 0
|
|
},
|
|
"can_proceed": True
|
|
}
|
|
|
|
# Run standard checks
|
|
results = self.run_action_checks(action_type)
|
|
|
|
# Check any extra secrets
|
|
for secret in (extra_secrets or []):
|
|
results.append(self.check_vault_secret(secret))
|
|
|
|
for r in results:
|
|
report["checks"].append(r.to_dict())
|
|
report["summary"]["total_checks"] += 1
|
|
|
|
if r.status == CheckStatus.PASS:
|
|
report["summary"]["passed"] += 1
|
|
elif r.status == CheckStatus.FAIL:
|
|
report["summary"]["failed"] += 1
|
|
report["can_proceed"] = False
|
|
elif r.status == CheckStatus.WARN:
|
|
report["summary"]["warnings"] += 1
|
|
elif r.status == CheckStatus.SKIP:
|
|
report["summary"]["skipped"] += 1
|
|
|
|
return report
|
|
|
|
|
|
# =============================================================================
|
|
# CLI
|
|
# =============================================================================
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Dependency Preflight Checker")
|
|
parser.add_argument("action_type", choices=["terraform", "ansible", "docker", "generic"],
|
|
help="Type of action to check dependencies for")
|
|
parser.add_argument("--secret", action="append", dest="extra_secrets",
|
|
help="Additional secrets to check (can be repeated)")
|
|
parser.add_argument("--json", action="store_true", help="Output JSON")
|
|
|
|
args = parser.parse_args()
|
|
|
|
checker = DependencyChecker()
|
|
report = checker.preflight_report(args.action_type, args.extra_secrets)
|
|
|
|
if args.json:
|
|
print(json.dumps(report, indent=2))
|
|
else:
|
|
print("\n" + "=" * 60)
|
|
print("DEPENDENCY PREFLIGHT REPORT")
|
|
print("=" * 60)
|
|
print(f"Action Type: {report['action_type']}")
|
|
print(f"Timestamp: {report['timestamp']}")
|
|
print()
|
|
|
|
for check in report["checks"]:
|
|
status_icon = {
|
|
"PASS": "[OK]",
|
|
"FAIL": "[FAIL]",
|
|
"WARN": "[WARN]",
|
|
"SKIP": "[SKIP]"
|
|
}.get(check["status"], "[?]")
|
|
print(f" {status_icon} {check['check_name']}")
|
|
print(f" {check['message']}")
|
|
|
|
print("\n" + "-" * 60)
|
|
print("SUMMARY")
|
|
print("-" * 60)
|
|
s = report["summary"]
|
|
print(f" Total Checks: {s['total_checks']}")
|
|
print(f" Passed: {s['passed']}")
|
|
print(f" Failed: {s['failed']}")
|
|
print(f" Warnings: {s['warnings']}")
|
|
print(f" Skipped: {s['skipped']}")
|
|
print()
|
|
|
|
if report["can_proceed"]:
|
|
print("[OK] DEPENDENCY CHECK PASSED - All dependencies available")
|
|
else:
|
|
print("[FAIL] DEPENDENCY CHECK FAILED - Missing dependencies")
|
|
|
|
print("=" * 60)
|
|
|
|
sys.exit(0 if report["can_proceed"] else 1)
|