""" MockLLM - Simulates LLM responses for deterministic testing. Provides pattern-based response matching for testing agent behavior without requiring actual LLM API calls. """ from typing import Dict, List, Any, Optional, Callable, Tuple from dataclasses import dataclass, field import re import json @dataclass class MockResponse: """A configured mock response""" pattern: str response: str confidence: float = 0.9 delay: float = 0.0 # Simulated latency tokens_used: int = 100 call_count: int = 0 class MockLLM: """ Mock LLM implementation for testing. Features: - Pattern-based response matching - Configurable confidence levels - Token counting simulation - Call tracking for assertions - Error injection for failure testing """ def __init__(self): self._responses: List[MockResponse] = [] self._default_response: str = '{"decision": "EXECUTE", "confidence": 0.5, "steps": []}' self._call_history: List[Dict[str, Any]] = [] self._total_tokens: int = 0 self._error_mode: Optional[str] = None self._error_after: int = 0 self._call_count: int = 0 # Set up some default patterns self._setup_defaults() def _setup_defaults(self): """Set up default response patterns""" # Plan generation self.add_response( pattern="generate.*plan|create.*plan|plan.*for", response=json.dumps({ "decision": "PLAN", "confidence": 0.85, "plan": { "title": "Generated Plan", "steps": [ {"action": "analyze", "description": "Analyze requirements"}, {"action": "implement", "description": "Implement solution"}, {"action": "verify", "description": "Verify results"} ] }, "assumptions": [], "risks": [] }), confidence=0.85 ) # Read operations self.add_response( pattern="read|view|show|list|get", response=json.dumps({ "decision": "EXECUTE", "confidence": 0.95, "action": "read", "result": "Operation completed successfully" }), confidence=0.95 ) # Execute operations self.add_response( pattern="execute|run|deploy|apply", response=json.dumps({ "decision": "EXECUTE", "confidence": 0.80, "action": "execute", "steps": [ {"command": "example_command", "status": "pending"} ], "requires_approval": True }), confidence=0.80 ) # Error/unknown self.add_response( pattern="error|fail|invalid", response=json.dumps({ "decision": "ERROR", "confidence": 0.1, "error": "Simulated error response", "recommendations": ["Check input parameters", "Verify permissions"] }), confidence=0.1 ) def add_response(self, pattern: str, response: str, confidence: float = 0.9, delay: float = 0.0, tokens: int = 100): """ Add a response pattern. Args: pattern: Regex pattern to match against prompt response: Response to return (usually JSON string) confidence: Confidence score for this response delay: Simulated response latency (seconds) tokens: Simulated token usage """ self._responses.insert(0, MockResponse( pattern=pattern, response=response, confidence=confidence, delay=delay, tokens_used=tokens )) def set_default_response(self, response: str): """Set the default response when no pattern matches""" self._default_response = response def set_error_mode(self, error_type: str, after_calls: int = 0): """ Configure error injection. Args: error_type: Type of error ("timeout", "rate_limit", "api_error", None) after_calls: Number of successful calls before error """ self._error_mode = error_type self._error_after = after_calls def complete(self, prompt: str, max_tokens: int = 1000, temperature: float = 0.7) -> Tuple[str, Dict[str, Any]]: """ Generate a completion for the prompt. Returns: (response_text, metadata) """ self._call_count += 1 # Record call call_record = { "prompt": prompt[:500], # Truncate for storage "max_tokens": max_tokens, "temperature": temperature, "timestamp": __import__("datetime").datetime.utcnow().isoformat() } # Check for error injection if self._error_mode and self._call_count > self._error_after: call_record["error"] = self._error_mode self._call_history.append(call_record) raise self._create_error(self._error_mode) # Find matching response prompt_lower = prompt.lower() matched_response = None for mock_resp in self._responses: if re.search(mock_resp.pattern, prompt_lower, re.IGNORECASE): matched_response = mock_resp matched_response.call_count += 1 break if matched_response: response = matched_response.response tokens = matched_response.tokens_used confidence = matched_response.confidence # Simulate delay if matched_response.delay > 0: import time time.sleep(matched_response.delay) else: response = self._default_response tokens = 50 confidence = 0.5 self._total_tokens += tokens metadata = { "tokens_used": tokens, "confidence": confidence, "pattern_matched": matched_response.pattern if matched_response else None, "total_tokens": self._total_tokens } call_record["response"] = response[:500] call_record["metadata"] = metadata self._call_history.append(call_record) return response, metadata def _create_error(self, error_type: str) -> Exception: """Create an appropriate error for testing""" if error_type == "timeout": return TimeoutError("LLM request timed out") elif error_type == "rate_limit": return Exception("Rate limit exceeded") elif error_type == "api_error": return Exception("API error: 500 Internal Server Error") else: return Exception(f"Unknown error: {error_type}") def chat(self, messages: List[Dict[str, str]], **kwargs) -> Tuple[str, Dict[str, Any]]: """ Chat completion interface. Args: messages: List of {"role": "user/assistant/system", "content": "..."} """ # Combine messages into a single prompt prompt = "\n".join([ f"{m['role']}: {m['content']}" for m in messages ]) return self.complete(prompt, **kwargs) # === Test Helpers === def reset(self): """Reset all state for testing""" self._responses.clear() self._call_history.clear() self._total_tokens = 0 self._error_mode = None self._error_after = 0 self._call_count = 0 self._setup_defaults() def get_call_history(self) -> List[Dict[str, Any]]: """Get call history for assertions""" return self._call_history.copy() def get_call_count(self) -> int: """Get total number of calls""" return self._call_count def get_total_tokens(self) -> int: """Get total tokens used""" return self._total_tokens def assert_called_with(self, pattern: str) -> bool: """Check if any call matched a pattern""" for call in self._call_history: if re.search(pattern, call["prompt"], re.IGNORECASE): return True return False def get_response_stats(self) -> Dict[str, int]: """Get call counts per response pattern""" return { resp.pattern: resp.call_count for resp in self._responses if resp.call_count > 0 } class MockLLMBuilder: """Builder for creating configured MockLLM instances""" def __init__(self): self._llm = MockLLM() def with_response(self, pattern: str, response: str, **kwargs) -> 'MockLLMBuilder': """Add a response pattern""" self._llm.add_response(pattern, response, **kwargs) return self def with_json_response(self, pattern: str, data: Dict[str, Any], **kwargs) -> 'MockLLMBuilder': """Add a JSON response pattern""" self._llm.add_response(pattern, json.dumps(data), **kwargs) return self def with_error_after(self, calls: int, error_type: str = "api_error") -> 'MockLLMBuilder': """Configure error injection""" self._llm.set_error_mode(error_type, calls) return self def with_high_confidence(self) -> 'MockLLMBuilder': """Configure all responses for high confidence""" for resp in self._llm._responses: resp.confidence = 0.95 return self def with_low_confidence(self) -> 'MockLLMBuilder': """Configure all responses for low confidence""" for resp in self._llm._responses: resp.confidence = 0.3 return self def build(self) -> MockLLM: """Build the configured MockLLM""" return self._llm