agent-governance/tests/mocks/mock_llm.py

"""
MockLLM - Simulates LLM responses for deterministic testing.

Provides pattern-based response matching for testing agent behavior
without requiring actual LLM API calls.
"""

from typing import Dict, List, Any, Optional, Callable, Tuple
from dataclasses import dataclass, field
import re
import json


@dataclass
class MockResponse:
    """A configured mock response"""
    pattern: str
    response: str
    confidence: float = 0.9
    delay: float = 0.0  # Simulated latency
    tokens_used: int = 100
    call_count: int = 0


class MockLLM:
    """
    Mock LLM implementation for testing.

    Features:
    - Pattern-based response matching
    - Configurable confidence levels
    - Token counting simulation
    - Call tracking for assertions
    - Error injection for failure testing
    """

    def __init__(self):
        self._responses: List[MockResponse] = []
        self._default_response: str = '{"decision": "EXECUTE", "confidence": 0.5, "steps": []}'
        self._call_history: List[Dict[str, Any]] = []
        self._total_tokens: int = 0
        self._error_mode: Optional[str] = None
        self._error_after: int = 0
        self._call_count: int = 0

        # Set up some default patterns
        self._setup_defaults()

    def _setup_defaults(self):
        """Set up default response patterns"""
        # Plan generation
        self.add_response(
            pattern="generate.*plan|create.*plan|plan.*for",
            response=json.dumps({
                "decision": "PLAN",
                "confidence": 0.85,
                "plan": {
                    "title": "Generated Plan",
                    "steps": [
                        {"action": "analyze", "description": "Analyze requirements"},
                        {"action": "implement", "description": "Implement solution"},
                        {"action": "verify", "description": "Verify results"}
                    ]
                },
                "assumptions": [],
                "risks": []
            }),
            confidence=0.85
        )

        # Read operations
        self.add_response(
            pattern="read|view|show|list|get",
            response=json.dumps({
                "decision": "EXECUTE",
                "confidence": 0.95,
                "action": "read",
                "result": "Operation completed successfully"
            }),
            confidence=0.95
        )

        # Execute operations
        self.add_response(
            pattern="execute|run|deploy|apply",
            response=json.dumps({
                "decision": "EXECUTE",
                "confidence": 0.80,
                "action": "execute",
                "steps": [
                    {"command": "example_command", "status": "pending"}
                ],
                "requires_approval": True
            }),
            confidence=0.80
        )

        # Error/unknown
        self.add_response(
            pattern="error|fail|invalid",
            response=json.dumps({
                "decision": "ERROR",
                "confidence": 0.1,
                "error": "Simulated error response",
                "recommendations": ["Check input parameters", "Verify permissions"]
            }),
            confidence=0.1
        )

    def add_response(self, pattern: str, response: str, confidence: float = 0.9,
                     delay: float = 0.0, tokens: int = 100):
        """
        Add a response pattern.

        Args:
            pattern: Regex pattern to match against prompt
            response: Response to return (usually JSON string)
            confidence: Confidence score for this response
            delay: Simulated response latency (seconds)
            tokens: Simulated token usage
        """
        self._responses.insert(0, MockResponse(
            pattern=pattern,
            response=response,
            confidence=confidence,
            delay=delay,
            tokens_used=tokens
        ))

    def set_default_response(self, response: str):
        """Set the default response when no pattern matches"""
        self._default_response = response

    def set_error_mode(self, error_type: str, after_calls: int = 0):
        """
        Configure error injection.

        Args:
            error_type: Type of error ("timeout", "rate_limit", "api_error", None)
            after_calls: Number of successful calls before error
        """
        self._error_mode = error_type
        self._error_after = after_calls

    def complete(self, prompt: str, max_tokens: int = 1000,
                 temperature: float = 0.7) -> Tuple[str, Dict[str, Any]]:
        """
        Generate a completion for the prompt.

        Returns: (response_text, metadata)
        """
        self._call_count += 1

        # Record call
        call_record = {
            "prompt": prompt[:500],  # Truncate for storage
            "max_tokens": max_tokens,
            "temperature": temperature,
            "timestamp": __import__("datetime").datetime.utcnow().isoformat()
        }

        # Check for error injection
        if self._error_mode and self._call_count > self._error_after:
            call_record["error"] = self._error_mode
            self._call_history.append(call_record)
            raise self._create_error(self._error_mode)

        # Find matching response
        prompt_lower = prompt.lower()
        matched_response = None

        for mock_resp in self._responses:
            if re.search(mock_resp.pattern, prompt_lower, re.IGNORECASE):
                matched_response = mock_resp
                matched_response.call_count += 1
                break

        if matched_response:
            response = matched_response.response
            tokens = matched_response.tokens_used
            confidence = matched_response.confidence

            # Simulate delay
            if matched_response.delay > 0:
                import time
                time.sleep(matched_response.delay)
        else:
            response = self._default_response
            tokens = 50
            confidence = 0.5

        self._total_tokens += tokens

        metadata = {
            "tokens_used": tokens,
            "confidence": confidence,
            "pattern_matched": matched_response.pattern if matched_response else None,
            "total_tokens": self._total_tokens
        }

        call_record["response"] = response[:500]
        call_record["metadata"] = metadata
        self._call_history.append(call_record)

        return response, metadata

    def _create_error(self, error_type: str) -> Exception:
        """Create an appropriate error for testing"""
        if error_type == "timeout":
            return TimeoutError("LLM request timed out")
        elif error_type == "rate_limit":
            return Exception("Rate limit exceeded")
        elif error_type == "api_error":
            return Exception("API error: 500 Internal Server Error")
        else:
            return Exception(f"Unknown error: {error_type}")

    def chat(self, messages: List[Dict[str, str]], **kwargs) -> Tuple[str, Dict[str, Any]]:
        """
        Chat completion interface.

        Args:
            messages: List of {"role": "user/assistant/system", "content": "..."}
        """
        # Combine messages into a single prompt
        prompt = "\n".join([
            f"{m['role']}: {m['content']}"
            for m in messages
        ])
        return self.complete(prompt, **kwargs)

    # === Test Helpers ===

    def reset(self):
        """Reset all state for testing"""
        self._responses.clear()
        self._call_history.clear()
        self._total_tokens = 0
        self._error_mode = None
        self._error_after = 0
        self._call_count = 0
        self._setup_defaults()

    def get_call_history(self) -> List[Dict[str, Any]]:
        """Get call history for assertions"""
        return self._call_history.copy()

    def get_call_count(self) -> int:
        """Get total number of calls"""
        return self._call_count

    def get_total_tokens(self) -> int:
        """Get total tokens used"""
        return self._total_tokens

    def assert_called_with(self, pattern: str) -> bool:
        """Check if any call matched a pattern"""
        for call in self._call_history:
            if re.search(pattern, call["prompt"], re.IGNORECASE):
                return True
        return False

    def get_response_stats(self) -> Dict[str, int]:
        """Get call counts per response pattern"""
        return {
            resp.pattern: resp.call_count
            for resp in self._responses
            if resp.call_count > 0
        }


class MockLLMBuilder:
    """Builder for creating configured MockLLM instances"""

    def __init__(self):
        self._llm = MockLLM()

    def with_response(self, pattern: str, response: str, **kwargs) -> 'MockLLMBuilder':
        """Add a response pattern"""
        self._llm.add_response(pattern, response, **kwargs)
        return self

    def with_json_response(self, pattern: str, data: Dict[str, Any],
                          **kwargs) -> 'MockLLMBuilder':
        """Add a JSON response pattern"""
        self._llm.add_response(pattern, json.dumps(data), **kwargs)
        return self

    def with_error_after(self, calls: int, error_type: str = "api_error") -> 'MockLLMBuilder':
        """Configure error injection"""
        self._llm.set_error_mode(error_type, calls)
        return self

    def with_high_confidence(self) -> 'MockLLMBuilder':
        """Configure all responses for high confidence"""
        for resp in self._llm._responses:
            resp.confidence = 0.95
        return self

    def with_low_confidence(self) -> 'MockLLMBuilder':
        """Configure all responses for low confidence"""
        for resp in self._llm._responses:
            resp.confidence = 0.3
        return self

    def build(self) -> MockLLM:
        """Build the configured MockLLM"""
        return self._llm