profit 77655c298c Initial commit: Agent Governance System Phase 8
Phase 8 Production Hardening with complete governance infrastructure:

- Vault integration with tiered policies (T0-T4)
- DragonflyDB state management
- SQLite audit ledger
- Pipeline DSL and templates
- Promotion/revocation engine
- Checkpoint system for session persistence
- Health manager and circuit breaker for fault tolerance
- GitHub/Slack integrations
- Architectural test pipeline with bug watcher, suggestion engine, council review
- Multi-agent chaos testing framework

Test Results:
- Governance tests: 68/68 passing
- E2E workflow: 16/16 passing
- Phase 2 Vault: 14/14 passing
- Integration tests: 27/27 passing

Coverage: 57.6% average across 12 phases

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 22:07:06 -05:00

309 lines
9.8 KiB
Python

"""
MockLLM - Simulates LLM responses for deterministic testing.
Provides pattern-based response matching for testing agent behavior
without requiring actual LLM API calls.
"""
from typing import Dict, List, Any, Optional, Callable, Tuple
from dataclasses import dataclass, field
import re
import json
@dataclass
class MockResponse:
"""A configured mock response"""
pattern: str
response: str
confidence: float = 0.9
delay: float = 0.0 # Simulated latency
tokens_used: int = 100
call_count: int = 0
class MockLLM:
"""
Mock LLM implementation for testing.
Features:
- Pattern-based response matching
- Configurable confidence levels
- Token counting simulation
- Call tracking for assertions
- Error injection for failure testing
"""
def __init__(self):
self._responses: List[MockResponse] = []
self._default_response: str = '{"decision": "EXECUTE", "confidence": 0.5, "steps": []}'
self._call_history: List[Dict[str, Any]] = []
self._total_tokens: int = 0
self._error_mode: Optional[str] = None
self._error_after: int = 0
self._call_count: int = 0
# Set up some default patterns
self._setup_defaults()
def _setup_defaults(self):
"""Set up default response patterns"""
# Plan generation
self.add_response(
pattern="generate.*plan|create.*plan|plan.*for",
response=json.dumps({
"decision": "PLAN",
"confidence": 0.85,
"plan": {
"title": "Generated Plan",
"steps": [
{"action": "analyze", "description": "Analyze requirements"},
{"action": "implement", "description": "Implement solution"},
{"action": "verify", "description": "Verify results"}
]
},
"assumptions": [],
"risks": []
}),
confidence=0.85
)
# Read operations
self.add_response(
pattern="read|view|show|list|get",
response=json.dumps({
"decision": "EXECUTE",
"confidence": 0.95,
"action": "read",
"result": "Operation completed successfully"
}),
confidence=0.95
)
# Execute operations
self.add_response(
pattern="execute|run|deploy|apply",
response=json.dumps({
"decision": "EXECUTE",
"confidence": 0.80,
"action": "execute",
"steps": [
{"command": "example_command", "status": "pending"}
],
"requires_approval": True
}),
confidence=0.80
)
# Error/unknown
self.add_response(
pattern="error|fail|invalid",
response=json.dumps({
"decision": "ERROR",
"confidence": 0.1,
"error": "Simulated error response",
"recommendations": ["Check input parameters", "Verify permissions"]
}),
confidence=0.1
)
def add_response(self, pattern: str, response: str, confidence: float = 0.9,
delay: float = 0.0, tokens: int = 100):
"""
Add a response pattern.
Args:
pattern: Regex pattern to match against prompt
response: Response to return (usually JSON string)
confidence: Confidence score for this response
delay: Simulated response latency (seconds)
tokens: Simulated token usage
"""
self._responses.insert(0, MockResponse(
pattern=pattern,
response=response,
confidence=confidence,
delay=delay,
tokens_used=tokens
))
def set_default_response(self, response: str):
"""Set the default response when no pattern matches"""
self._default_response = response
def set_error_mode(self, error_type: str, after_calls: int = 0):
"""
Configure error injection.
Args:
error_type: Type of error ("timeout", "rate_limit", "api_error", None)
after_calls: Number of successful calls before error
"""
self._error_mode = error_type
self._error_after = after_calls
def complete(self, prompt: str, max_tokens: int = 1000,
temperature: float = 0.7) -> Tuple[str, Dict[str, Any]]:
"""
Generate a completion for the prompt.
Returns: (response_text, metadata)
"""
self._call_count += 1
# Record call
call_record = {
"prompt": prompt[:500], # Truncate for storage
"max_tokens": max_tokens,
"temperature": temperature,
"timestamp": __import__("datetime").datetime.utcnow().isoformat()
}
# Check for error injection
if self._error_mode and self._call_count > self._error_after:
call_record["error"] = self._error_mode
self._call_history.append(call_record)
raise self._create_error(self._error_mode)
# Find matching response
prompt_lower = prompt.lower()
matched_response = None
for mock_resp in self._responses:
if re.search(mock_resp.pattern, prompt_lower, re.IGNORECASE):
matched_response = mock_resp
matched_response.call_count += 1
break
if matched_response:
response = matched_response.response
tokens = matched_response.tokens_used
confidence = matched_response.confidence
# Simulate delay
if matched_response.delay > 0:
import time
time.sleep(matched_response.delay)
else:
response = self._default_response
tokens = 50
confidence = 0.5
self._total_tokens += tokens
metadata = {
"tokens_used": tokens,
"confidence": confidence,
"pattern_matched": matched_response.pattern if matched_response else None,
"total_tokens": self._total_tokens
}
call_record["response"] = response[:500]
call_record["metadata"] = metadata
self._call_history.append(call_record)
return response, metadata
def _create_error(self, error_type: str) -> Exception:
"""Create an appropriate error for testing"""
if error_type == "timeout":
return TimeoutError("LLM request timed out")
elif error_type == "rate_limit":
return Exception("Rate limit exceeded")
elif error_type == "api_error":
return Exception("API error: 500 Internal Server Error")
else:
return Exception(f"Unknown error: {error_type}")
def chat(self, messages: List[Dict[str, str]], **kwargs) -> Tuple[str, Dict[str, Any]]:
"""
Chat completion interface.
Args:
messages: List of {"role": "user/assistant/system", "content": "..."}
"""
# Combine messages into a single prompt
prompt = "\n".join([
f"{m['role']}: {m['content']}"
for m in messages
])
return self.complete(prompt, **kwargs)
# === Test Helpers ===
def reset(self):
"""Reset all state for testing"""
self._responses.clear()
self._call_history.clear()
self._total_tokens = 0
self._error_mode = None
self._error_after = 0
self._call_count = 0
self._setup_defaults()
def get_call_history(self) -> List[Dict[str, Any]]:
"""Get call history for assertions"""
return self._call_history.copy()
def get_call_count(self) -> int:
"""Get total number of calls"""
return self._call_count
def get_total_tokens(self) -> int:
"""Get total tokens used"""
return self._total_tokens
def assert_called_with(self, pattern: str) -> bool:
"""Check if any call matched a pattern"""
for call in self._call_history:
if re.search(pattern, call["prompt"], re.IGNORECASE):
return True
return False
def get_response_stats(self) -> Dict[str, int]:
"""Get call counts per response pattern"""
return {
resp.pattern: resp.call_count
for resp in self._responses
if resp.call_count > 0
}
class MockLLMBuilder:
"""Builder for creating configured MockLLM instances"""
def __init__(self):
self._llm = MockLLM()
def with_response(self, pattern: str, response: str, **kwargs) -> 'MockLLMBuilder':
"""Add a response pattern"""
self._llm.add_response(pattern, response, **kwargs)
return self
def with_json_response(self, pattern: str, data: Dict[str, Any],
**kwargs) -> 'MockLLMBuilder':
"""Add a JSON response pattern"""
self._llm.add_response(pattern, json.dumps(data), **kwargs)
return self
def with_error_after(self, calls: int, error_type: str = "api_error") -> 'MockLLMBuilder':
"""Configure error injection"""
self._llm.set_error_mode(error_type, calls)
return self
def with_high_confidence(self) -> 'MockLLMBuilder':
"""Configure all responses for high confidence"""
for resp in self._llm._responses:
resp.confidence = 0.95
return self
def with_low_confidence(self) -> 'MockLLMBuilder':
"""Configure all responses for low confidence"""
for resp in self._llm._responses:
resp.confidence = 0.3
return self
def build(self) -> MockLLM:
"""Build the configured MockLLM"""
return self._llm