agent-governance/pipeline/core.py

#!/usr/bin/env python3
"""
Pipeline Core Definitions
=========================

This module contains the authoritative definitions for the agent governance
pipeline system. All other code (tests, demos, orchestrators) should import
from here to ensure consistency with the architecture specification.

Architecture Reference: /opt/agent-governance/docs/ARCHITECTURE.md Section 4.2
"""

from enum import Enum
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, List, Any, Optional
import hashlib
import json


# =============================================================================
# Agent Lifecycle Phases (OFFICIAL)
# =============================================================================
# These phases define the complete agent lifecycle as specified in ARCHITECTURE.md
# Order: BOOTSTRAP -> PREFLIGHT -> PLAN -> EXECUTE -> VERIFY -> PACKAGE -> REPORT -> EXIT

class AgentPhase(str, Enum):
    """
    Official agent lifecycle phases.

    From ARCHITECTURE.md Section 4.2:
    - BOOTSTRAP: Agent initialization and authentication
    - PREFLIGHT: Pre-execution validation (sandbox, inventory, dependencies)
    - PLAN: Generate and validate execution plan
    - EXECUTE: Perform the planned actions
    - VERIFY: Validate execution results
    - PACKAGE: Bundle artifacts and evidence
    - REPORT: Generate completion report
    - EXIT: Clean shutdown and resource release
    - REVOKED: Agent was revoked (terminal state)
    """
    BOOTSTRAP = "BOOTSTRAP"
    PREFLIGHT = "PREFLIGHT"
    PLAN = "PLAN"
    EXECUTE = "EXECUTE"
    VERIFY = "VERIFY"
    PACKAGE = "PACKAGE"
    REPORT = "REPORT"
    EXIT = "EXIT"
    REVOKED = "REVOKED"


# Ordered list of phases for iteration (excludes REVOKED as it's terminal)
AGENT_PHASES_ORDERED = [
    AgentPhase.BOOTSTRAP,
    AgentPhase.PREFLIGHT,
    AgentPhase.PLAN,
    AgentPhase.EXECUTE,
    AgentPhase.VERIFY,
    AgentPhase.PACKAGE,
    AgentPhase.REPORT,
    AgentPhase.EXIT,
]

# Phase names as strings for compatibility
AGENT_PHASE_NAMES = [p.value for p in AGENT_PHASES_ORDERED]


# =============================================================================
# Agent Status (OFFICIAL)
# =============================================================================

class AgentStatus(str, Enum):
    """
    Official agent runtime status values.

    From ARCHITECTURE.md and runtime/governance.py:
    - PENDING: Awaiting start
    - STARTING: Initialization in progress
    - RUNNING: Actively executing
    - PAUSED: Temporarily suspended (for plan clarification)
    - COMPLETED: Successfully finished
    - FAILED: Execution failed
    - REVOKED: Forcibly terminated
    - RECOVERING: Recovering from chaos/error condition
    """
    PENDING = "PENDING"
    STARTING = "STARTING"
    RUNNING = "RUNNING"
    PAUSED = "PAUSED"
    COMPLETED = "COMPLETED"
    FAILED = "FAILED"
    REVOKED = "REVOKED"
    RECOVERING = "RECOVERING"


# =============================================================================
# Pipeline Stage Types (OFFICIAL)
# =============================================================================

class StageType(str, Enum):
    """
    Official pipeline stage types from pipeline.schema.json.

    - AGENT: Executes an agent task
    - GATE: Approval/consensus checkpoint (human or automated)
    - PARALLEL: Concurrent execution of multiple branches
    - CONDITION: Conditional branching (if/then/else)
    """
    AGENT = "agent"
    GATE = "gate"
    PARALLEL = "parallel"
    CONDITION = "condition"


class StageStatus(str, Enum):
    """Official stage execution status."""
    PENDING = "pending"
    RUNNING = "running"
    COMPLETED = "completed"
    FAILED = "failed"
    SKIPPED = "skipped"


# =============================================================================
# Output Types (Alpha/Beta/Gamma)
# =============================================================================

class OutputType(str, Enum):
    """
    Agent output classification for checkpoint tracking.

    - ALPHA: Initial/draft outputs (plans, analysis)
    - BETA: Refined outputs (validated plans, partial results)
    - GAMMA: Final outputs (completed work, verified results)
    """
    ALPHA = "alpha"
    BETA = "beta"
    GAMMA = "gamma"


# Map phases to their typical output types
PHASE_OUTPUT_TYPES = {
    AgentPhase.BOOTSTRAP: OutputType.ALPHA,
    AgentPhase.PREFLIGHT: OutputType.ALPHA,
    AgentPhase.PLAN: OutputType.BETA,
    AgentPhase.EXECUTE: OutputType.BETA,
    AgentPhase.VERIFY: OutputType.GAMMA,
    AgentPhase.PACKAGE: OutputType.GAMMA,
    AgentPhase.REPORT: OutputType.GAMMA,
    AgentPhase.EXIT: OutputType.GAMMA,
}


# =============================================================================
# Chaos Conditions
# =============================================================================

class ChaosCondition(str, Enum):
    """
    Chaos conditions that can be injected for testing resilience.
    """
    NONE = "none"
    TOKEN_REVOKED = "token_revoked"
    LOCK_LOST = "lock_lost"
    STATE_CORRUPTED = "state_corrupted"
    HEARTBEAT_TIMEOUT = "heartbeat_timeout"
    ERROR_SPIKE = "error_spike"
    NETWORK_DELAY = "network_delay"


class ViolationSeverity(str, Enum):
    """
    Severity levels for violations.
    Synchronized with runtime/revocation.py.
    """
    CRITICAL = "critical"  # Immediate revocation + alert
    HIGH = "high"          # Immediate revocation
    MEDIUM = "medium"      # Warning, second offense = revoke
    LOW = "low"            # Warning only


class ViolationType(str, Enum):
    """
    Full violation taxonomy with severity classification.
    Synchronized with runtime/revocation.py.

    Critical - Immediate revocation + alert:
        UNAUTHORIZED_POOL, APPLY_WITHOUT_PLAN, RUN_WITHOUT_CHECK,
        UNAUTHORIZED_PROD, UNRECORDED_ROOT, BASELINE_MUTATION

    High - Immediate revocation:
        ERROR_BUDGET_EXCEEDED, PROCEDURE_VIOLATION, HEARTBEAT_TIMEOUT, LOCK_EXPIRED

    Medium - Warning then revocation:
        SCOPE_VIOLATION, FORBIDDEN_ACTION

    Low - Warning only:
        CONFIDENCE_BELOW_THRESHOLD, MISSING_ARTIFACT
    """
    # Critical
    UNAUTHORIZED_POOL = "UNAUTHORIZED_POOL"
    APPLY_WITHOUT_PLAN = "APPLY_WITHOUT_PLAN"
    RUN_WITHOUT_CHECK = "RUN_WITHOUT_CHECK"
    UNAUTHORIZED_PROD = "UNAUTHORIZED_PROD"
    UNRECORDED_ROOT = "UNRECORDED_ROOT"
    BASELINE_MUTATION = "BASELINE_MUTATION"

    # High
    ERROR_BUDGET_EXCEEDED = "ERROR_BUDGET_EXCEEDED"
    PROCEDURE_VIOLATION = "PROCEDURE_VIOLATION"
    HEARTBEAT_TIMEOUT = "HEARTBEAT_TIMEOUT"
    LOCK_EXPIRED = "LOCK_EXPIRED"

    # Medium
    SCOPE_VIOLATION = "SCOPE_VIOLATION"
    FORBIDDEN_ACTION = "FORBIDDEN_ACTION"

    # Low
    CONFIDENCE_BELOW_THRESHOLD = "CONFIDENCE_BELOW_THRESHOLD"
    MISSING_ARTIFACT = "MISSING_ARTIFACT"


# Backwards compatibility alias
RevocationType = ViolationType


# Violation severity mapping
VIOLATION_SEVERITY_MAP = {
    ViolationType.UNAUTHORIZED_POOL: ViolationSeverity.CRITICAL,
    ViolationType.APPLY_WITHOUT_PLAN: ViolationSeverity.CRITICAL,
    ViolationType.RUN_WITHOUT_CHECK: ViolationSeverity.CRITICAL,
    ViolationType.UNAUTHORIZED_PROD: ViolationSeverity.CRITICAL,
    ViolationType.UNRECORDED_ROOT: ViolationSeverity.CRITICAL,
    ViolationType.BASELINE_MUTATION: ViolationSeverity.CRITICAL,
    ViolationType.ERROR_BUDGET_EXCEEDED: ViolationSeverity.HIGH,
    ViolationType.PROCEDURE_VIOLATION: ViolationSeverity.HIGH,
    ViolationType.HEARTBEAT_TIMEOUT: ViolationSeverity.HIGH,
    ViolationType.LOCK_EXPIRED: ViolationSeverity.HIGH,
    ViolationType.SCOPE_VIOLATION: ViolationSeverity.MEDIUM,
    ViolationType.FORBIDDEN_ACTION: ViolationSeverity.MEDIUM,
    ViolationType.CONFIDENCE_BELOW_THRESHOLD: ViolationSeverity.LOW,
    ViolationType.MISSING_ARTIFACT: ViolationSeverity.LOW,
}


# =============================================================================
# Integration Event Types
# =============================================================================

class IntegrationEventType(str, Enum):
    """
    Event types for external integrations (GitHub, Slack, etc.).
    Maps to AgentPhase lifecycle and governance events.
    """
    # Lifecycle events
    PLAN_CREATED = "plan_created"           # AgentPhase.PLAN completed
    EXECUTION_STARTED = "execution_started" # AgentPhase.EXECUTE started
    EXECUTION_COMPLETE = "execution_complete"  # AgentPhase.EXECUTE completed

    # Governance events
    VIOLATION_DETECTED = "violation_detected"  # ViolationType triggered
    PROMOTION_REQUESTED = "promotion_requested"  # Tier upgrade request
    PROMOTION_APPROVED = "promotion_approved"    # Tier upgrade granted
    AGENT_REVOKED = "agent_revoked"            # Agent token revoked

    # Workflow events
    APPROVAL_REQUIRED = "approval_required"    # StageType.GATE reached
    HEARTBEAT = "heartbeat"                    # Agent health signal


# Map integration events to lifecycle phases
INTEGRATION_EVENT_PHASE_MAP = {
    IntegrationEventType.PLAN_CREATED: AgentPhase.PLAN,
    IntegrationEventType.EXECUTION_STARTED: AgentPhase.EXECUTE,
    IntegrationEventType.EXECUTION_COMPLETE: AgentPhase.EXECUTE,
    IntegrationEventType.AGENT_REVOKED: AgentPhase.REVOKED,
}


# =============================================================================
# Data Classes
# =============================================================================

@dataclass
class AgentOutput:
    """
    Represents an Alpha/Beta/Gamma output from an agent at a checkpoint.
    """
    agent_id: str
    output_type: OutputType
    phase: AgentPhase
    content: Dict[str, Any]
    timestamp: datetime = field(default_factory=datetime.utcnow)
    checksum: str = ""

    def __post_init__(self):
        if not self.checksum:
            self.checksum = hashlib.sha256(
                json.dumps(self.content, sort_keys=True).encode()
            ).hexdigest()[:12]


@dataclass
class StageResult:
    """Result of a pipeline stage execution."""
    name: str
    status: StageStatus
    started_at: Optional[datetime] = None
    completed_at: Optional[datetime] = None
    artifacts: Dict[str, Any] = field(default_factory=dict)
    error: Optional[str] = None
    agent_id: Optional[str] = None


@dataclass
class PipelineContext:
    """Runtime context passed through pipeline stages."""
    pipeline_name: str
    run_id: str
    inputs: Dict[str, Any]
    variables: Dict[str, Any] = field(default_factory=dict)
    artifacts: Dict[str, Any] = field(default_factory=dict)
    stage_results: Dict[str, StageResult] = field(default_factory=dict)


@dataclass
class ErrorBudget:
    """Error budget configuration for agents."""
    max_total_errors: int = 12
    max_same_error_repeats: int = 3
    max_procedure_violations: int = 1


@dataclass
class ClarifiedPlan:
    """A clarified plan broadcast by overwatch after error threshold."""
    plan_id: str
    trigger_reason: str
    history_reviewed: List[str]
    outputs_analyzed: List[str]
    original_objective: str
    clarified_objective: str
    adjustments: List[str]
    broadcast_at: datetime
    acknowledged_by: List[str] = field(default_factory=list)


# =============================================================================
# DragonflyDB Key Patterns (OFFICIAL)
# =============================================================================

class RedisKeys:
    """
    Official DragonflyDB keyspace patterns from ARCHITECTURE.md.
    """
    # Agent keys
    AGENT_PACKET = "agent:{agent_id}:packet"        # Instruction packet
    AGENT_STATE = "agent:{agent_id}:state"          # Runtime state
    AGENT_ERRORS = "agent:{agent_id}:errors"        # Error counters
    AGENT_HEARTBEAT = "agent:{agent_id}:heartbeat"  # Last seen
    AGENT_LOCK = "agent:{agent_id}:lock"            # Execution lock
    AGENT_OUTPUT = "agent:{agent_id}:output:{type}" # Alpha/Beta/Gamma outputs

    # Task keys
    TASK_ACTIVE_AGENT = "task:{task_id}:active_agent"
    TASK_ARTIFACTS = "task:{task_id}:artifacts"

    # Project keys
    PROJECT_AGENTS = "project:{project_id}:agents"
    PROJECT_OBJECTIVE = "project:{project_id}:objective"
    PROJECT_PLAN = "project:{project_id}:plan:{plan_id}"
    PROJECT_BROADCAST = "project:{project_id}:broadcast"

    # Coordination keys
    BLACKBOARD = "blackboard:{task}:{section}"
    MESSAGE = "msg:{task}:{channel}"
    REVOCATIONS = "revocations:ledger"
    HANDOFF = "handoff:{task}:latest"

    # History keys
    HISTORY_RUNS = "history:{agent_id}:runs"

    @classmethod
    def agent_packet(cls, agent_id: str) -> str:
        return cls.AGENT_PACKET.format(agent_id=agent_id)

    @classmethod
    def agent_state(cls, agent_id: str) -> str:
        return cls.AGENT_STATE.format(agent_id=agent_id)

    @classmethod
    def agent_errors(cls, agent_id: str) -> str:
        return cls.AGENT_ERRORS.format(agent_id=agent_id)

    @classmethod
    def agent_heartbeat(cls, agent_id: str) -> str:
        return cls.AGENT_HEARTBEAT.format(agent_id=agent_id)

    @classmethod
    def agent_lock(cls, agent_id: str) -> str:
        return cls.AGENT_LOCK.format(agent_id=agent_id)

    @classmethod
    def agent_output(cls, agent_id: str, output_type: str) -> str:
        return cls.AGENT_OUTPUT.format(agent_id=agent_id, type=output_type)

    @classmethod
    def project_agents(cls, project_id: str) -> str:
        return cls.PROJECT_AGENTS.format(project_id=project_id)

    @classmethod
    def project_objective(cls, project_id: str) -> str:
        return cls.PROJECT_OBJECTIVE.format(project_id=project_id)

    @classmethod
    def project_plan(cls, project_id: str, plan_id: str) -> str:
        return cls.PROJECT_PLAN.format(project_id=project_id, plan_id=plan_id)

    @classmethod
    def history_runs(cls, agent_id: str) -> str:
        return cls.HISTORY_RUNS.format(agent_id=agent_id)


# =============================================================================
# Configuration Constants
# =============================================================================

# Default Redis connection settings
DEFAULT_REDIS_HOST = "127.0.0.1"
DEFAULT_REDIS_PORT = 6379
DEFAULT_REDIS_PASSWORD = "governance2026"

# Default paths
DEFAULT_LEDGER_PATH = "/opt/agent-governance/ledger/governance.db"
DEFAULT_SCHEMA_PATH = "/opt/agent-governance/pipeline/schemas/pipeline.schema.json"
DEFAULT_TEMPLATES_PATH = "/opt/agent-governance/pipeline/templates"

# Timeouts (seconds)
DEFAULT_HEARTBEAT_TTL = 60
DEFAULT_LOCK_TTL = 300
DEFAULT_OUTPUT_TTL = 300


# =============================================================================
# Utility Functions
# =============================================================================

def get_output_type_for_phase(phase: AgentPhase) -> OutputType:
    """Get the appropriate output type for a given phase."""
    return PHASE_OUTPUT_TYPES.get(phase, OutputType.ALPHA)


def is_terminal_phase(phase: AgentPhase) -> bool:
    """Check if a phase is terminal (EXIT or REVOKED)."""
    return phase in (AgentPhase.EXIT, AgentPhase.REVOKED)


def next_phase(current: AgentPhase) -> Optional[AgentPhase]:
    """Get the next phase in the pipeline, or None if at end."""
    if current == AgentPhase.REVOKED:
        return None
    try:
        idx = AGENT_PHASES_ORDERED.index(current)
        if idx < len(AGENT_PHASES_ORDERED) - 1:
            return AGENT_PHASES_ORDERED[idx + 1]
    except ValueError:
        pass
    return None