profit 77655c298c Initial commit: Agent Governance System Phase 8
Phase 8 Production Hardening with complete governance infrastructure:

- Vault integration with tiered policies (T0-T4)
- DragonflyDB state management
- SQLite audit ledger
- Pipeline DSL and templates
- Promotion/revocation engine
- Checkpoint system for session persistence
- Health manager and circuit breaker for fault tolerance
- GitHub/Slack integrations
- Architectural test pipeline with bug watcher, suggestion engine, council review
- Multi-agent chaos testing framework

Test Results:
- Governance tests: 68/68 passing
- E2E workflow: 16/16 passing
- Phase 2 Vault: 14/14 passing
- Integration tests: 27/27 passing

Coverage: 57.6% average across 12 phases

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 22:07:06 -05:00

604 lines
20 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Tier 0 Observer Agent
=====================
A governed agent that can read documentation, view inventory,
and generate plans, but CANNOT execute any commands.
This agent enforces strict Tier 0 constraints:
- Read-only file access (within allowed paths)
- Plan generation only (no execution)
- No secret access
- No SSH/API access
- All actions logged to governance ledger
"""
import json
import os
import sys
import hashlib
import sqlite3
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, Any
import redis
# =============================================================================
# Configuration
# =============================================================================
AGENT_DIR = Path(__file__).parent
CONFIG_FILE = AGENT_DIR / "config" / "agent.json"
WORKSPACE_DIR = AGENT_DIR / "workspace"
PLANS_DIR = AGENT_DIR / "plans"
LOGS_DIR = AGENT_DIR / "logs"
LEDGER_DB = Path("/opt/agent-governance/ledger/governance.db")
# Load agent config
with open(CONFIG_FILE) as f:
CONFIG = json.load(f)
AGENT_ID = CONFIG["agent_id"]
AGENT_TIER = CONFIG["tier"]
ALLOWED_PATHS = [Path(p) for p in CONFIG["constraints"]["allowed_paths"]]
FORBIDDEN_PATHS = CONFIG["constraints"]["forbidden_paths"]
ALLOWED_ACTIONS = CONFIG["constraints"]["allowed_actions"]
FORBIDDEN_ACTIONS = CONFIG["constraints"]["forbidden_actions"]
# =============================================================================
# Data Classes
# =============================================================================
@dataclass
class ActionResult:
"""Result of an agent action"""
action: str
success: bool
data: Any = None
error: Optional[str] = None
blocked: bool = False
block_reason: Optional[str] = None
@dataclass
class Plan:
"""A generated plan"""
plan_id: str
title: str
description: str
target: str
steps: list
rollback_steps: list
created_at: str
agent_id: str
status: str = "draft"
# =============================================================================
# Governance Integration
# =============================================================================
class GovernanceClient:
"""Interfaces with the governance system"""
def __init__(self):
self.redis = self._get_redis()
self.session_id = os.environ.get("SESSION_ID", "unknown")
def _get_redis(self) -> Optional[redis.Redis]:
try:
# Get password from environment or file
password = os.environ.get("REDIS_PASSWORD")
if not password:
# Try to get from Vault (using root token for bootstrap)
import subprocess
with open("/opt/vault/init-keys.json") as f:
token = json.load(f)["root_token"]
result = subprocess.run([
"curl", "-sk",
"-H", f"X-Vault-Token: {token}",
"https://127.0.0.1:8200/v1/secret/data/services/dragonfly"
], capture_output=True, text=True)
creds = json.loads(result.stdout)["data"]["data"]
password = creds["password"]
return redis.Redis(host="127.0.0.1", port=6379, password=password, decode_responses=True)
except:
return None
def log_action(self, action: str, decision: str, target: str,
success: bool, confidence: float = 1.0, error: str = None):
"""Log action to governance ledger"""
try:
conn = sqlite3.connect(LEDGER_DB)
cursor = conn.cursor()
cursor.execute("""
INSERT INTO agent_actions
(timestamp, agent_id, agent_version, tier, action, decision,
confidence, target, success, error_message, session_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
datetime.now(timezone.utc).isoformat(),
AGENT_ID,
CONFIG.get("agent_version", "1.0.0"),
AGENT_TIER,
action,
decision,
confidence,
target,
1 if success else 0,
error,
self.session_id
))
conn.commit()
conn.close()
except Exception as e:
print(f"Warning: Could not log action: {e}")
def update_heartbeat(self):
"""Update agent heartbeat in DragonflyDB"""
if self.redis:
try:
self.redis.set(f"agent:{AGENT_ID}:heartbeat", str(int(datetime.now().timestamp())), ex=60)
except:
pass
def check_revocation(self) -> bool:
"""Check if agent has been revoked"""
if self.redis:
try:
signal = self.redis.get(f"agent:{AGENT_ID}:revoke_signal")
return signal == "1"
except:
pass
return False
def increment_compliant(self):
"""Increment compliant run counter"""
try:
conn = sqlite3.connect(LEDGER_DB)
cursor = conn.cursor()
cursor.execute("""
UPDATE agent_metrics
SET compliant_runs = compliant_runs + 1,
consecutive_compliant = consecutive_compliant + 1,
total_runs = total_runs + 1,
last_active_at = datetime('now'),
updated_at = datetime('now')
WHERE agent_id = ?
""", (AGENT_ID,))
conn.commit()
conn.close()
except Exception as e:
print(f"Warning: Could not update metrics: {e}")
# =============================================================================
# Tier 0 Agent
# =============================================================================
class Tier0Agent:
"""
A strictly constrained Tier 0 agent.
Can only read and generate plans, cannot execute anything.
"""
def __init__(self):
self.governance = GovernanceClient()
self._check_not_revoked()
def _now(self) -> str:
return datetime.now(timezone.utc).isoformat()
def _check_not_revoked(self):
"""Check revocation status before any action"""
if self.governance.check_revocation():
print("[REVOKED] Agent has been revoked. Exiting.")
sys.exit(1)
def _is_path_allowed(self, path: str) -> bool:
"""Check if path is within allowed paths"""
target = Path(path).resolve()
# Check forbidden patterns
for pattern in FORBIDDEN_PATHS:
if pattern.startswith("**/"):
if pattern[3:] in str(target):
return False
elif target.match(pattern):
return False
# Check allowed paths
for allowed in ALLOWED_PATHS:
allowed_resolved = Path(allowed).resolve()
try:
target.relative_to(allowed_resolved)
return True
except ValueError:
continue
return False
def _block_action(self, action: str, reason: str) -> ActionResult:
"""Record a blocked action"""
self.governance.log_action(
action=action,
decision="BLOCKED",
target="N/A",
success=False,
error=reason
)
return ActionResult(
action=action,
success=False,
blocked=True,
block_reason=reason
)
# -------------------------------------------------------------------------
# Allowed Actions
# -------------------------------------------------------------------------
def read_file(self, path: str) -> ActionResult:
"""Read a file (if allowed)"""
self._check_not_revoked()
self.governance.update_heartbeat()
if not self._is_path_allowed(path):
return self._block_action("read_file", f"Path not allowed: {path}")
try:
with open(path) as f:
content = f.read()
self.governance.log_action(
action="read_file",
decision="EXECUTE",
target=path,
success=True
)
return ActionResult(
action="read_file",
success=True,
data={"path": path, "content": content, "size": len(content)}
)
except Exception as e:
self.governance.log_action(
action="read_file",
decision="EXECUTE",
target=path,
success=False,
error=str(e)
)
return ActionResult(action="read_file", success=False, error=str(e))
def list_directory(self, path: str) -> ActionResult:
"""List directory contents (if allowed)"""
self._check_not_revoked()
self.governance.update_heartbeat()
if not self._is_path_allowed(path):
return self._block_action("list_directory", f"Path not allowed: {path}")
try:
entries = []
for entry in Path(path).iterdir():
entries.append({
"name": entry.name,
"is_dir": entry.is_dir(),
"size": entry.stat().st_size if entry.is_file() else 0
})
self.governance.log_action(
action="list_directory",
decision="EXECUTE",
target=path,
success=True
)
return ActionResult(
action="list_directory",
success=True,
data={"path": path, "entries": entries}
)
except Exception as e:
return ActionResult(action="list_directory", success=False, error=str(e))
def generate_plan(self, title: str, description: str, target: str,
steps: list, rollback_steps: list = None) -> ActionResult:
"""Generate a plan (does NOT execute it)"""
self._check_not_revoked()
self.governance.update_heartbeat()
# Generate plan ID
plan_id = f"plan-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{hashlib.sha256(title.encode()).hexdigest()[:8]}"
plan = Plan(
plan_id=plan_id,
title=title,
description=description,
target=target,
steps=steps,
rollback_steps=rollback_steps or [],
created_at=self._now(),
agent_id=AGENT_ID,
status="draft"
)
# Save plan to file
plan_file = PLANS_DIR / f"{plan_id}.json"
plan_dict = {
"plan_id": plan.plan_id,
"title": plan.title,
"description": plan.description,
"target": plan.target,
"steps": plan.steps,
"rollback_steps": plan.rollback_steps,
"created_at": plan.created_at,
"agent_id": plan.agent_id,
"agent_tier": AGENT_TIER,
"status": plan.status,
"requires_approval": True,
"approved_by": None,
"executed": False
}
with open(plan_file, "w") as f:
json.dump(plan_dict, f, indent=2)
# Log action
self.governance.log_action(
action="generate_plan",
decision="PLAN",
target=target,
success=True,
confidence=0.9
)
# Increment compliant counter
self.governance.increment_compliant()
return ActionResult(
action="generate_plan",
success=True,
data={
"plan_id": plan_id,
"plan_file": str(plan_file),
"message": "Plan generated. Requires approval before execution."
}
)
def request_review(self, subject: str, details: str) -> ActionResult:
"""Request human review/assistance"""
self._check_not_revoked()
self.governance.update_heartbeat()
review_id = f"review-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
review_request = {
"review_id": review_id,
"agent_id": AGENT_ID,
"agent_tier": AGENT_TIER,
"subject": subject,
"details": details,
"created_at": self._now(),
"status": "pending"
}
# Save review request
review_file = WORKSPACE_DIR / f"{review_id}.json"
with open(review_file, "w") as f:
json.dump(review_request, f, indent=2)
self.governance.log_action(
action="request_review",
decision="PLAN",
target=subject,
success=True
)
return ActionResult(
action="request_review",
success=True,
data={"review_id": review_id, "message": "Review request submitted."}
)
# -------------------------------------------------------------------------
# Forbidden Actions (Always Blocked)
# -------------------------------------------------------------------------
def execute_command(self, command: str) -> ActionResult:
"""FORBIDDEN: Execute a command"""
return self._block_action(
"execute_command",
"Tier 0 agents cannot execute commands. Generate a plan instead."
)
def write_file(self, path: str, content: str) -> ActionResult:
"""FORBIDDEN: Write to a file (except plans in allowed paths)"""
# Allow writing to plans directory
if str(Path(path).resolve()).startswith(str(PLANS_DIR.resolve())):
try:
with open(path, "w") as f:
f.write(content)
self.governance.log_action(
action="write_plan_file",
decision="EXECUTE",
target=path,
success=True
)
return ActionResult(action="write_file", success=True, data={"path": path})
except Exception as e:
return ActionResult(action="write_file", success=False, error=str(e))
return self._block_action(
"write_file",
"Tier 0 agents cannot write files outside plans directory."
)
def ssh_connect(self, host: str) -> ActionResult:
"""FORBIDDEN: SSH to a host"""
return self._block_action(
"ssh_connect",
"Tier 0 agents cannot SSH to hosts. Generate a plan instead."
)
def terraform_apply(self, directory: str) -> ActionResult:
"""FORBIDDEN: Apply Terraform"""
return self._block_action(
"terraform_apply",
"Tier 0 agents cannot apply Terraform. Use terraform_plan to generate a plan."
)
def ansible_run(self, playbook: str) -> ActionResult:
"""FORBIDDEN: Run Ansible playbook"""
return self._block_action(
"ansible_run",
"Tier 0 agents cannot run Ansible. Generate a plan with check-mode only."
)
# =============================================================================
# CLI Interface
# =============================================================================
def main():
import argparse
parser = argparse.ArgumentParser(description="Tier 0 Observer Agent")
subparsers = parser.add_subparsers(dest="command", required=True)
# Status
subparsers.add_parser("status", help="Show agent status")
# Read file
read_parser = subparsers.add_parser("read", help="Read a file")
read_parser.add_argument("path", help="File path to read")
# List directory
ls_parser = subparsers.add_parser("ls", help="List directory")
ls_parser.add_argument("path", nargs="?", default=str(WORKSPACE_DIR))
# Generate plan
plan_parser = subparsers.add_parser("plan", help="Generate a plan")
plan_parser.add_argument("--title", required=True)
plan_parser.add_argument("--description", required=True)
plan_parser.add_argument("--target", required=True)
plan_parser.add_argument("--steps", required=True, help="JSON array of steps")
plan_parser.add_argument("--rollback", help="JSON array of rollback steps")
# Request review
review_parser = subparsers.add_parser("review", help="Request human review")
review_parser.add_argument("--subject", required=True)
review_parser.add_argument("--details", required=True)
# Test forbidden actions
subparsers.add_parser("test-forbidden", help="Test that forbidden actions are blocked")
args = parser.parse_args()
agent = Tier0Agent()
if args.command == "status":
print(f"\n{'='*50}")
print("TIER 0 AGENT STATUS")
print(f"{'='*50}")
print(f"Agent ID: {AGENT_ID}")
print(f"Tier: {AGENT_TIER} (Observer)")
print(f"Session: {os.environ.get('SESSION_ID', 'N/A')}")
print(f"\nAllowed Actions: {', '.join(ALLOWED_ACTIONS)}")
print(f"Forbidden Actions: {', '.join(FORBIDDEN_ACTIONS)}")
print(f"\nWorkspace: {WORKSPACE_DIR}")
print(f"Plans: {PLANS_DIR}")
# Check revocation
if agent.governance.check_revocation():
print(f"\n[REVOKED] Agent has been revoked!")
else:
print(f"\n[ACTIVE] Agent is active")
print(f"{'='*50}")
elif args.command == "read":
result = agent.read_file(args.path)
if result.success:
print(result.data["content"])
elif result.blocked:
print(f"[BLOCKED] {result.block_reason}")
else:
print(f"[ERROR] {result.error}")
elif args.command == "ls":
result = agent.list_directory(args.path)
if result.success:
for entry in result.data["entries"]:
prefix = "d" if entry["is_dir"] else "-"
print(f"{prefix} {entry['name']}")
elif result.blocked:
print(f"[BLOCKED] {result.block_reason}")
else:
print(f"[ERROR] {result.error}")
elif args.command == "plan":
steps = json.loads(args.steps)
rollback = json.loads(args.rollback) if args.rollback else []
result = agent.generate_plan(
title=args.title,
description=args.description,
target=args.target,
steps=steps,
rollback_steps=rollback
)
if result.success:
print(f"\n[OK] Plan generated: {result.data['plan_id']}")
print(f"File: {result.data['plan_file']}")
print(f"Note: {result.data['message']}")
else:
print(f"[ERROR] {result.error}")
elif args.command == "review":
result = agent.request_review(args.subject, args.details)
if result.success:
print(f"[OK] Review request: {result.data['review_id']}")
else:
print(f"[ERROR] {result.error}")
elif args.command == "test-forbidden":
print("\n" + "="*50)
print("TESTING FORBIDDEN ACTIONS")
print("="*50)
tests = [
("execute_command", lambda: agent.execute_command("ls -la")),
("write_file", lambda: agent.write_file("/etc/passwd", "test")),
("ssh_connect", lambda: agent.ssh_connect("10.77.10.1")),
("terraform_apply", lambda: agent.terraform_apply("./infra")),
("ansible_run", lambda: agent.ansible_run("playbook.yml")),
]
all_blocked = True
for name, test_fn in tests:
result = test_fn()
if result.blocked:
print(f"[BLOCKED] {name}: {result.block_reason}")
else:
print(f"[FAIL] {name} was NOT blocked!")
all_blocked = False
print("="*50)
if all_blocked:
print("[OK] All forbidden actions correctly blocked")
else:
print("[FAIL] Some actions were not blocked!")
sys.exit(1)
if __name__ == "__main__":
main()