Phase 8 Production Hardening with complete governance infrastructure: - Vault integration with tiered policies (T0-T4) - DragonflyDB state management - SQLite audit ledger - Pipeline DSL and templates - Promotion/revocation engine - Checkpoint system for session persistence - Health manager and circuit breaker for fault tolerance - GitHub/Slack integrations - Architectural test pipeline with bug watcher, suggestion engine, council review - Multi-agent chaos testing framework Test Results: - Governance tests: 68/68 passing - E2E workflow: 16/16 passing - Phase 2 Vault: 14/14 passing - Integration tests: 27/27 passing Coverage: 57.6% average across 12 phases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
260 lines
8.3 KiB
Bash
Executable File
260 lines
8.3 KiB
Bash
Executable File
#!/bin/bash
|
|
# Agent Bootstrap Library
|
|
# Source this in agent scripts to handle authentication and validation
|
|
|
|
VAULT_ADDR="${VAULT_ADDR:-https://10.77.0.1:8200}"
|
|
export VAULT_SKIP_VERIFY=true
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
|
|
|
# Authenticate agent with AppRole
|
|
# Usage: agent_authenticate <role_id> <secret_id>
|
|
# Sets: VAULT_TOKEN, AGENT_TOKEN_ACCESSOR
|
|
agent_authenticate() {
|
|
local role_id="$1"
|
|
local secret_id="$2"
|
|
|
|
if [[ -z "$role_id" || -z "$secret_id" ]]; then
|
|
log_error "role_id and secret_id required"
|
|
return 1
|
|
fi
|
|
|
|
local response
|
|
response=$(curl -sk --request POST \
|
|
--data "{\"role_id\":\"$role_id\",\"secret_id\":\"$secret_id\"}" \
|
|
"$VAULT_ADDR/v1/auth/approle/login")
|
|
|
|
local token
|
|
token=$(echo "$response" | jq -r '.auth.client_token // empty')
|
|
|
|
if [[ -z "$token" ]]; then
|
|
log_error "Authentication failed: $(echo "$response" | jq -r '.errors[]? // "unknown error"')"
|
|
return 1
|
|
fi
|
|
|
|
export VAULT_TOKEN="$token"
|
|
export AGENT_TOKEN_ACCESSOR=$(echo "$response" | jq -r '.auth.accessor')
|
|
export AGENT_TOKEN_TTL=$(echo "$response" | jq -r '.auth.lease_duration')
|
|
export AGENT_POLICIES=$(echo "$response" | jq -r '.auth.policies | join(",")')
|
|
|
|
log_info "Authenticated successfully (TTL: ${AGENT_TOKEN_TTL}s, policies: $AGENT_POLICIES)"
|
|
return 0
|
|
}
|
|
|
|
# Load agent metadata from Vault
|
|
# Usage: agent_load_metadata <agent_id>
|
|
# Sets: AGENT_* environment variables
|
|
agent_load_metadata() {
|
|
local agent_id="$1"
|
|
|
|
if [[ -z "$VAULT_TOKEN" ]]; then
|
|
log_error "Not authenticated. Call agent_authenticate first."
|
|
return 1
|
|
fi
|
|
|
|
local response
|
|
response=$(curl -sk -H "X-Vault-Token: $VAULT_TOKEN" \
|
|
"$VAULT_ADDR/v1/secret/data/agents/$agent_id")
|
|
|
|
local data
|
|
data=$(echo "$response" | jq -r '.data.data // empty')
|
|
|
|
if [[ -z "$data" ]]; then
|
|
log_error "Failed to load agent metadata: $(echo "$response" | jq -r '.errors[]? // "not found"')"
|
|
return 1
|
|
fi
|
|
|
|
export AGENT_ID=$(echo "$data" | jq -r '.agent_id')
|
|
export AGENT_ROLE=$(echo "$data" | jq -r '.agent_role')
|
|
export AGENT_TIER=$(echo "$data" | jq -r '.tier')
|
|
export AGENT_VERSION=$(echo "$data" | jq -r '.version')
|
|
export AGENT_OWNER=$(echo "$data" | jq -r '.owner')
|
|
export AGENT_CONFIDENCE_THRESHOLD=$(echo "$data" | jq -r '.confidence_threshold')
|
|
export AGENT_ALLOWED=$(echo "$data" | jq -r '.allowed_side_effects')
|
|
export AGENT_FORBIDDEN=$(echo "$data" | jq -r '.forbidden_actions')
|
|
|
|
log_info "Loaded metadata for agent: $AGENT_ID (role: $AGENT_ROLE, tier: $AGENT_TIER)"
|
|
return 0
|
|
}
|
|
|
|
# Validate an action against agent's allowed/forbidden lists
|
|
# Usage: agent_validate_action <action>
|
|
# Returns: 0 if allowed, 1 if forbidden
|
|
agent_validate_action() {
|
|
local action="$1"
|
|
|
|
# Check forbidden first
|
|
if echo "$AGENT_FORBIDDEN" | jq -e "index(\"$action\")" > /dev/null 2>&1; then
|
|
log_error "Action '$action' is FORBIDDEN for this agent"
|
|
return 1
|
|
fi
|
|
|
|
# Check allowed
|
|
if echo "$AGENT_ALLOWED" | jq -e "index(\"$action\")" > /dev/null 2>&1; then
|
|
log_info "Action '$action' is ALLOWED"
|
|
return 0
|
|
fi
|
|
|
|
# Not explicitly allowed or forbidden - deny by default
|
|
log_warn "Action '$action' is not explicitly allowed - denying"
|
|
return 1
|
|
}
|
|
|
|
# Generate structured output
|
|
# Usage: agent_output <decision> <confidence> <action> [notes]
|
|
agent_output() {
|
|
local decision="$1"
|
|
local confidence="$2"
|
|
local action="$3"
|
|
local notes="${4:-}"
|
|
|
|
local timestamp
|
|
timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
jq -n \
|
|
--arg agent_id "$AGENT_ID" \
|
|
--arg version "$AGENT_VERSION" \
|
|
--arg timestamp "$timestamp" \
|
|
--arg action "$action" \
|
|
--arg decision "$decision" \
|
|
--argjson confidence "$confidence" \
|
|
--arg notes "$notes" \
|
|
'{
|
|
agent_id: $agent_id,
|
|
version: $version,
|
|
timestamp: $timestamp,
|
|
action: $action,
|
|
decision: $decision,
|
|
confidence: $confidence,
|
|
assumptions: [],
|
|
dependencies: [],
|
|
side_effects: [],
|
|
notes_for_humans: $notes
|
|
}'
|
|
}
|
|
|
|
# Generate error output (per foundation doc section 3.2)
|
|
# Usage: agent_error <error_type> <message> <input> <progress> <recommendation>
|
|
agent_error() {
|
|
local error_type="$1"
|
|
local message="$2"
|
|
local input="$3"
|
|
local progress="$4"
|
|
local recommendation="$5"
|
|
|
|
local timestamp
|
|
timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
jq -n \
|
|
--arg agent_id "$AGENT_ID" \
|
|
--arg version "$AGENT_VERSION" \
|
|
--arg timestamp "$timestamp" \
|
|
--arg error_type "$error_type" \
|
|
--arg message "$message" \
|
|
--arg input "$input" \
|
|
--arg progress "$progress" \
|
|
--arg recommendation "$recommendation" \
|
|
'{
|
|
agent_id: $agent_id,
|
|
version: $version,
|
|
timestamp: $timestamp,
|
|
action: "ERROR",
|
|
decision: "ERROR",
|
|
confidence: 0,
|
|
assumptions: [],
|
|
dependencies: [],
|
|
side_effects: [],
|
|
notes_for_humans: "",
|
|
error: {
|
|
type: $error_type,
|
|
message: $message,
|
|
triggering_input: $input,
|
|
partial_progress: $progress,
|
|
recommended_action: $recommendation
|
|
}
|
|
}'
|
|
}
|
|
|
|
# Check if confidence meets threshold
|
|
# Usage: agent_check_confidence <confidence>
|
|
# Returns: 0 if meets threshold, 1 if below
|
|
agent_check_confidence() {
|
|
local confidence="$1"
|
|
|
|
if (( $(echo "$confidence < $AGENT_CONFIDENCE_THRESHOLD" | bc -l) )); then
|
|
log_warn "Confidence $confidence below threshold $AGENT_CONFIDENCE_THRESHOLD"
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
log_info "Agent bootstrap library loaded"
|
|
|
|
# Write action to ledger
|
|
# Usage: ledger_write_action <action> <decision> <confidence> <success> [error_type] [error_message]
|
|
ledger_write_action() {
|
|
local action="$1"
|
|
local decision="$2"
|
|
local confidence="$3"
|
|
local success="$4"
|
|
local error_type="${5:-}"
|
|
local error_message="${6:-}"
|
|
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
sqlite3 /opt/agent-governance/ledger/governance.db << SQL
|
|
INSERT INTO agent_actions
|
|
(timestamp, agent_id, agent_version, tier, action, decision, confidence, success, error_type, error_message, vault_token_accessor)
|
|
VALUES
|
|
('$timestamp', '$AGENT_ID', '$AGENT_VERSION', $AGENT_TIER, '$action', '$decision', $confidence, $success, '$error_type', '$error_message', '$AGENT_TOKEN_ACCESSOR');
|
|
SQL
|
|
|
|
# Update metrics
|
|
sqlite3 /opt/agent-governance/ledger/governance.db << SQL
|
|
INSERT INTO agent_metrics (agent_id, current_tier, total_runs, last_active_at)
|
|
VALUES ('$AGENT_ID', $AGENT_TIER, 1, '$timestamp')
|
|
ON CONFLICT(agent_id) DO UPDATE SET
|
|
total_runs = total_runs + 1,
|
|
compliant_runs = CASE WHEN $success = 1 THEN compliant_runs + 1 ELSE compliant_runs END,
|
|
consecutive_compliant = CASE WHEN $success = 1 THEN consecutive_compliant + 1 ELSE 0 END,
|
|
last_active_at = '$timestamp',
|
|
updated_at = '$timestamp';
|
|
SQL
|
|
}
|
|
|
|
# Write violation to ledger
|
|
# Usage: ledger_write_violation <type> <severity> <description> [action] [evidence]
|
|
ledger_write_violation() {
|
|
local violation_type="$1"
|
|
local severity="$2"
|
|
local description="$3"
|
|
local action="${4:-}"
|
|
local evidence="${5:-}"
|
|
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
sqlite3 /opt/agent-governance/ledger/governance.db << SQL
|
|
INSERT INTO violations
|
|
(timestamp, agent_id, violation_type, severity, description, triggering_action, evidence)
|
|
VALUES
|
|
('$timestamp', '$AGENT_ID', '$violation_type', '$severity', '$description', '$action', '$evidence');
|
|
SQL
|
|
|
|
# Update last violation timestamp
|
|
sqlite3 /opt/agent-governance/ledger/governance.db << SQL
|
|
UPDATE agent_metrics SET
|
|
last_violation_at = '$timestamp',
|
|
consecutive_compliant = 0,
|
|
promotion_eligible = 0
|
|
WHERE agent_id = '$AGENT_ID';
|
|
SQL
|
|
|
|
log_warn "Violation recorded: $violation_type ($severity)"
|
|
}
|