agent-governance/wrappers/ansible-governed.sh

#!/bin/bash
#
# Governed Ansible Wrapper
# ========================
# Enforces check-mode-first and full audit trail.
# Part of Phase 3: Execution Pipeline.
#
# Usage:
#   ansible-governed.sh check <playbook> [ansible options]
#   ansible-governed.sh run <playbook> --check-id=<id> [ansible options]
#   ansible-governed.sh adhoc <pattern> -m <module> [options]
#
# Rules:
#   1. All playbook runs MUST have a corresponding check-mode artifact
#   2. Check outputs are stored with hashes for verification
#   3. All operations are logged to the governance ledger
#   4. Ad-hoc commands require Tier 2+
#

set -euo pipefail

# Configuration
GOVERNANCE_DIR="/opt/agent-governance"
EVIDENCE_DIR="${GOVERNANCE_DIR}/evidence"
PREFLIGHT_DIR="${GOVERNANCE_DIR}/preflight"
ARTIFACT_DIR="${EVIDENCE_DIR}/ansible"

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

# Ensure directories exist
mkdir -p "${ARTIFACT_DIR}"

log_info() {
    echo -e "${BLUE}[INFO]${NC} $1"
}

log_ok() {
    echo -e "${GREEN}[OK]${NC} $1"
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

log_block() {
    echo -e "${RED}[BLOCKED]${NC} $1"
}

# Generate unique artifact ID
generate_artifact_id() {
    echo "ans-$(date +%Y%m%d-%H%M%S)-$(openssl rand -hex 4)"
}

# Get agent info from environment or default
get_agent_info() {
    AGENT_ID="${AGENT_ID:-cli-user}"
    AGENT_TIER="${AGENT_TIER:-1}"
}

# Run preflight checks
run_preflight() {
    local targets="$1"

    log_info "Running preflight checks..."

    if [[ -f "${PREFLIGHT_DIR}/preflight.py" ]]; then
        cd "${PREFLIGHT_DIR}"
        python3 preflight.py ${targets} --action ansible --tier "${AGENT_TIER}" --agent-id "${AGENT_ID}" --quiet
        return $?
    else
        log_warn "Preflight system not available, skipping checks"
        return 0
    fi
}

# Extract inventory hosts from playbook
extract_hosts() {
    local playbook="$1"

    # Simple extraction - in production would parse YAML properly
    grep -E "^\s*hosts:" "${playbook}" 2>/dev/null | head -1 | sed 's/.*hosts:\s*//' | tr -d '"'"'" || echo "all"
}

# Store check artifact
store_check_artifact() {
    local check_output="$1"
    local artifact_id="$2"
    local playbook="$3"

    local artifact_path="${ARTIFACT_DIR}/${artifact_id}"
    mkdir -p "${artifact_path}"

    # Store check output
    echo "${check_output}" > "${artifact_path}/check-output.txt"

    # Copy playbook for reference
    cp "${playbook}" "${artifact_path}/playbook.yml" 2>/dev/null || true

    # Calculate checksum of playbook
    sha256sum "${playbook}" > "${artifact_path}/playbook.sha256" 2>/dev/null || true

    # Store metadata
    cat > "${artifact_path}/metadata.json" << EOF
{
    "artifact_id": "${artifact_id}",
    "type": "ansible_check",
    "created_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
    "agent_id": "${AGENT_ID}",
    "agent_tier": ${AGENT_TIER},
    "playbook": "${playbook}",
    "playbook_checksum": "$(sha256sum ${playbook} 2>/dev/null | cut -d' ' -f1 || echo 'unknown')"
}
EOF

    echo "${artifact_path}"
}

# Verify check artifact before run
verify_check_artifact() {
    local artifact_id="$1"
    local playbook="$2"

    local artifact_path="${ARTIFACT_DIR}/${artifact_id}"

    if [[ ! -d "${artifact_path}" ]]; then
        log_error "Check artifact not found: ${artifact_id}"
        return 1
    fi

    # Verify playbook hasn't changed
    if [[ -f "${artifact_path}/playbook.sha256" ]]; then
        local stored_checksum=$(cat "${artifact_path}/playbook.sha256" | cut -d' ' -f1)
        local current_checksum=$(sha256sum "${playbook}" | cut -d' ' -f1)

        if [[ "${stored_checksum}" != "${current_checksum}" ]]; then
            log_error "Playbook has changed since check was run!"
            log_error "  Stored:  ${stored_checksum}"
            log_error "  Current: ${current_checksum}"
            return 1
        fi
    fi

    log_ok "Check artifact verified: ${artifact_id}"
    return 0
}

# Log to governance ledger
log_to_ledger() {
    local action="$1"
    local status="$2"
    local details="$3"

    local ledger_entry=$(cat << EOF
{
    "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
    "agent_id": "${AGENT_ID}",
    "agent_tier": ${AGENT_TIER},
    "tool": "ansible",
    "action": "${action}",
    "status": "${status}",
    "details": "${details}"
}
EOF
)

    echo "${ledger_entry}" >> "${EVIDENCE_DIR}/ansible-ledger.jsonl"
}

# Main command handlers
cmd_check() {
    local playbook="$1"
    shift

    get_agent_info

    echo ""
    echo "=========================================="
    echo "GOVERNED ANSIBLE CHECK"
    echo "=========================================="
    echo "Agent: ${AGENT_ID} (Tier ${AGENT_TIER})"
    echo "Playbook: ${playbook}"
    echo ""

    if [[ ! -f "${playbook}" ]]; then
        log_error "Playbook not found: ${playbook}"
        exit 1
    fi

    # Extract hosts for preflight
    local hosts=$(extract_hosts "${playbook}")
    log_info "Target hosts: ${hosts}"

    # For preflight, use a sample target
    local preflight_target="sandbox-vm-01"

    # Run preflight
    if ! run_preflight "${preflight_target}"; then
        log_block "Preflight checks failed"
        log_to_ledger "check" "BLOCKED" "Preflight failed"
        exit 1
    fi
    log_ok "Preflight checks passed"

    # Generate artifact ID
    local artifact_id=$(generate_artifact_id)

    log_info "Running ansible-playbook --check --diff..."
    log_info "Check will be stored as artifact: ${artifact_id}"

    # Run check mode
    local check_output
    if check_output=$(ansible-playbook --check --diff "${playbook}" "$@" 2>&1); then
        log_ok "Ansible check completed"

        # Store artifact
        local artifact_path=$(store_check_artifact "${check_output}" "${artifact_id}" "${playbook}")
        log_ok "Check artifact stored: ${artifact_path}"

        log_to_ledger "check" "SUCCESS" "artifact_id=${artifact_id}"

        echo ""
        echo "=========================================="
        echo "CHECK OUTPUT"
        echo "=========================================="
        echo "${check_output}"
        echo "=========================================="
        echo ""
        echo "CHECK COMPLETE"
        echo "=========================================="
        echo "Artifact ID: ${artifact_id}"
        echo "To run this playbook:"
        echo "  ansible-governed.sh run ${playbook} --check-id=${artifact_id}"
        echo "=========================================="
    else
        log_error "Ansible check failed"
        echo "${check_output}"
        log_to_ledger "check" "FAILED" "ansible error"
        exit 1
    fi
}

cmd_run() {
    local playbook="$1"
    shift

    get_agent_info

    echo ""
    echo "=========================================="
    echo "GOVERNED ANSIBLE RUN"
    echo "=========================================="
    echo "Agent: ${AGENT_ID} (Tier ${AGENT_TIER})"
    echo "Playbook: ${playbook}"
    echo ""

    if [[ ! -f "${playbook}" ]]; then
        log_error "Playbook not found: ${playbook}"
        exit 1
    fi

    # Parse arguments
    local check_artifact_id=""
    local remaining_args=()
    for arg in "$@"; do
        case $arg in
            --check-id=*)
                check_artifact_id="${arg#*=}"
                ;;
            *)
                remaining_args+=("$arg")
                ;;
        esac
    done

    # Require check artifact
    if [[ -z "${check_artifact_id}" ]]; then
        log_block "Run requires a check artifact"
        echo ""
        echo "Usage: ansible-governed.sh run <playbook> --check-id=<artifact_id>"
        echo ""
        echo "First run: ansible-governed.sh check <playbook>"
        echo "Then use the artifact ID provided."
        log_to_ledger "run" "BLOCKED" "no check artifact"
        exit 1
    fi

    # Verify check artifact
    if ! verify_check_artifact "${check_artifact_id}" "${playbook}"; then
        log_block "Check verification failed"
        log_to_ledger "run" "BLOCKED" "verification failed"
        exit 1
    fi

    # Run preflight again
    local preflight_target="sandbox-vm-01"
    if ! run_preflight "${preflight_target}"; then
        log_block "Preflight checks failed"
        log_to_ledger "run" "BLOCKED" "Preflight failed"
        exit 1
    fi
    log_ok "Preflight checks passed"

    log_info "Running playbook (check artifact: ${check_artifact_id})..."

    # Store artifact path for evidence
    local artifact_path="${ARTIFACT_DIR}/${check_artifact_id}"

    # Run playbook
    local run_output
    if run_output=$(ansible-playbook "${playbook}" "${remaining_args[@]}" 2>&1); then
        log_ok "Ansible run completed"

        # Store run evidence
        echo "${run_output}" > "${artifact_path}/run-output.txt"

        cat > "${artifact_path}/run-result.json" << EOF
{
    "ran_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
    "agent_id": "${AGENT_ID}",
    "status": "SUCCESS"
}
EOF

        log_to_ledger "run" "SUCCESS" "artifact_id=${check_artifact_id}"

        echo ""
        echo "=========================================="
        echo "RUN OUTPUT"
        echo "=========================================="
        echo "${run_output}"
        echo "=========================================="
        echo ""
        echo "RUN COMPLETE"
        echo "=========================================="
        echo "Check artifact: ${check_artifact_id}"
        echo "Evidence stored at: ${artifact_path}"
        echo "=========================================="
    else
        log_error "Ansible run failed"
        echo "${run_output}"

        # Still store output as evidence
        echo "${run_output}" > "${artifact_path}/run-output.txt"

        cat > "${artifact_path}/run-result.json" << EOF
{
    "ran_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
    "agent_id": "${AGENT_ID}",
    "status": "FAILED"
}
EOF

        log_to_ledger "run" "FAILED" "ansible error"
        exit 1
    fi
}

cmd_adhoc() {
    get_agent_info

    echo ""
    echo "=========================================="
    echo "GOVERNED ANSIBLE AD-HOC"
    echo "=========================================="
    echo "Agent: ${AGENT_ID} (Tier ${AGENT_TIER})"
    echo ""

    # Ad-hoc requires Tier 2+
    if [[ "${AGENT_TIER}" -lt 2 ]]; then
        log_block "Ad-hoc commands require Tier 2+ (current: Tier ${AGENT_TIER})"
        log_to_ledger "adhoc" "BLOCKED" "insufficient tier"
        exit 1
    fi

    log_warn "Ad-hoc commands bypass check-first requirement"
    log_info "All ad-hoc commands are logged for audit"

    # Log the command
    log_to_ledger "adhoc" "EXECUTED" "args=$*"

    # Run ansible
    ansible "$@"
}

# Show usage
usage() {
    echo "Governed Ansible Wrapper"
    echo ""
    echo "Usage:"
    echo "  ansible-governed.sh check <playbook> [ansible options]"
    echo "  ansible-governed.sh run <playbook> --check-id=<artifact_id> [options]"
    echo "  ansible-governed.sh adhoc <pattern> -m <module> [options]"
    echo ""
    echo "Environment Variables:"
    echo "  AGENT_ID   - Agent identifier (default: cli-user)"
    echo "  AGENT_TIER - Agent trust tier 0-4 (default: 1)"
    echo ""
    echo "Examples:"
    echo "  ansible-governed.sh check site.yml -i inventory.ini"
    echo "  ansible-governed.sh run site.yml --check-id=ans-20260123-120000-abc"
    echo "  AGENT_TIER=2 ansible-governed.sh adhoc all -m ping"
}

# Main
case "${1:-}" in
    check)
        shift
        cmd_check "$@"
        ;;
    run)
        shift
        cmd_run "$@"
        ;;
    adhoc)
        shift
        cmd_adhoc "$@"
        ;;
    -h|--help|help)
        usage
        ;;
    *)
        usage
        exit 1
        ;;
esac