""" Suggestion + Reasoning Pipeline =============================== Analyzes outputs and bug reports using historical context to generate suggested fixes with rationale, ranked by risk/impact. Features: - Uses checkpoints, memory logs, STATUS files for context - Generates fix suggestions with reasoning - Ranks by risk and impact - Tracks suggestion outcomes for learning """ import json import hashlib from datetime import datetime, timezone from dataclasses import dataclass, field, asdict from enum import Enum from pathlib import Path from typing import Any, Optional import sqlite3 import redis from .bug_watcher import Anomaly, AnomalyType, Severity class RiskLevel(str, Enum): """Risk level for implementing a suggestion""" CRITICAL = "critical" # Could break system HIGH = "high" # Significant change MEDIUM = "medium" # Moderate change LOW = "low" # Safe change TRIVIAL = "trivial" # No risk class ImpactLevel(str, Enum): """Impact level of implementing a suggestion""" TRANSFORMATIVE = "transformative" # Major system improvement HIGH = "high" # Significant improvement MEDIUM = "medium" # Moderate improvement LOW = "low" # Minor improvement MINIMAL = "minimal" # Negligible improvement class SuggestionStatus(str, Enum): """Status of a suggestion""" PENDING = "pending" APPROVED = "approved" REJECTED = "rejected" IMPLEMENTED = "implemented" FAILED = "failed" @dataclass class Suggestion: """A suggested fix or improvement""" id: str anomaly_id: str title: str description: str reasoning: str fix_steps: list[str] risk: RiskLevel impact: ImpactLevel estimated_effort: str # "trivial", "small", "medium", "large", "epic" files_affected: list[str] = field(default_factory=list) code_changes: Optional[dict] = None # {"file": {"old": ..., "new": ...}} auto_fixable: bool = False requires_human: bool = True status: SuggestionStatus = SuggestionStatus.PENDING created_at: str = "" reviewed_at: Optional[str] = None reviewed_by: Optional[str] = None outcome: Optional[str] = None def __post_init__(self): if not self.created_at: self.created_at = datetime.now(timezone.utc).isoformat() if not self.id: self.id = f"sug-{hashlib.sha256(f'{self.anomaly_id}{self.title}{self.created_at}'.encode()).hexdigest()[:12]}" @property def priority_score(self) -> float: """Calculate priority score (higher = more urgent)""" risk_scores = {"critical": 0.2, "high": 0.4, "medium": 0.6, "low": 0.8, "trivial": 1.0} impact_scores = {"transformative": 1.0, "high": 0.8, "medium": 0.6, "low": 0.4, "minimal": 0.2} # Handle both enum and string values risk_val = self.risk.value if hasattr(self.risk, 'value') else self.risk impact_val = self.impact.value if hasattr(self.impact, 'value') else self.impact # Higher impact + lower risk = higher priority return impact_scores.get(impact_val, 0.5) * risk_scores.get(risk_val, 0.5) @dataclass class HistoricalContext: """Context gathered from historical data""" recent_checkpoints: list[dict] = field(default_factory=list) related_anomalies: list[dict] = field(default_factory=list) past_suggestions: list[dict] = field(default_factory=list) memory_entries: list[dict] = field(default_factory=list) status_history: list[dict] = field(default_factory=list) class SuggestionEngine: """ Analyzes anomalies and generates fix suggestions using historical context. Process: 1. Gather context from checkpoints, memory, STATUS files 2. Analyze anomaly patterns 3. Generate suggestions with reasoning 4. Rank by risk/impact 5. Track outcomes for learning """ # Anomaly type -> common fixes mapping FIX_PATTERNS = { AnomalyType.UNHANDLED_ERROR: [ ("Add try-except block", "medium", "medium"), ("Add error logging", "low", "low"), ("Create error handler", "medium", "high"), ], AnomalyType.REGRESSION: [ ("Rollback to previous version", "high", "high"), ("Add regression test", "low", "medium"), ("Investigate root cause", "low", "high"), ], AnomalyType.MISSING_ARTIFACT: [ ("Create missing file", "low", "medium"), ("Update documentation", "trivial", "low"), ("Add artifact generation step", "medium", "medium"), ], AnomalyType.STATE_INCONSISTENCY: [ ("Resync state from source", "medium", "high"), ("Add state validation", "low", "medium"), ("Implement state machine", "high", "high"), ], AnomalyType.DEPENDENCY_UNAVAILABLE: [ ("Restart dependency service", "low", "high"), ("Add fallback mechanism", "medium", "high"), ("Implement circuit breaker", "medium", "high"), ], AnomalyType.HEALTH_CHECK_FAILURE: [ ("Investigate failing check", "low", "medium"), ("Restart affected service", "low", "high"), ("Add health recovery logic", "medium", "high"), ], AnomalyType.SECURITY_VIOLATION: [ ("Revoke compromised credentials", "low", "transformative"), ("Audit access logs", "low", "high"), ("Strengthen access controls", "medium", "high"), ], AnomalyType.TIMEOUT: [ ("Increase timeout threshold", "low", "low"), ("Optimize slow operation", "medium", "high"), ("Add async processing", "high", "high"), ], } def __init__(self, base_path: str = "/opt/agent-governance"): self.base_path = Path(base_path) self.ledger_db = self.base_path / "ledger" / "governance.db" self.checkpoint_dir = self.base_path / "checkpoint" / "storage" self.memory_dir = self.base_path / "memory" self.suggestions: list[Suggestion] = [] self._redis: Optional[redis.Redis] = None self._setup_redis() def _setup_redis(self): """Connect to DragonflyDB""" try: self._redis = redis.Redis( host='127.0.0.1', port=6379, password='governance2026', decode_responses=True ) self._redis.ping() except Exception: self._redis = None def _now(self) -> str: return datetime.now(timezone.utc).isoformat() def gather_context(self, anomaly: Anomaly) -> HistoricalContext: """Gather historical context relevant to an anomaly""" context = HistoricalContext() # 1. Recent checkpoints context.recent_checkpoints = self._get_recent_checkpoints(5) # 2. Related anomalies (same type or phase) context.related_anomalies = self._get_related_anomalies(anomaly) # 3. Past suggestions for similar issues context.past_suggestions = self._get_past_suggestions(anomaly) # 4. Memory entries related to the directory/phase context.memory_entries = self._get_memory_entries(anomaly.directory) # 5. STATUS history for the affected directory context.status_history = self._get_status_history(anomaly.directory) return context def _get_recent_checkpoints(self, limit: int) -> list[dict]: """Get recent checkpoints""" checkpoints = [] if not self.checkpoint_dir.exists(): return checkpoints files = sorted(self.checkpoint_dir.glob("ckpt-*.json"), reverse=True)[:limit] for f in files: try: data = json.loads(f.read_text()) checkpoints.append({ "id": data.get("checkpoint_id"), "phase": data.get("phase", {}).get("name"), "created_at": data.get("created_at"), "notes": data.get("phase", {}).get("notes", "") }) except Exception: continue return checkpoints def _get_related_anomalies(self, anomaly: Anomaly) -> list[dict]: """Get related anomalies from history""" related = [] if not self._redis: return related try: # Get anomalies of same type raw = self._redis.lrange("oversight:anomalies", 0, 100) for item in raw: data = json.loads(item) if data.get("type") == anomaly.type.value or data.get("phase") == anomaly.phase: if data.get("id") != anomaly.id: related.append(data) if len(related) >= 10: break except Exception: pass return related def _get_past_suggestions(self, anomaly: Anomaly) -> list[dict]: """Get past suggestions for similar issues""" suggestions = [] if not self._redis: return suggestions try: raw = self._redis.lrange("oversight:suggestions", 0, 100) for item in raw: data = json.loads(item) # Match by anomaly type pattern in title if anomaly.type.value.lower() in data.get("title", "").lower(): suggestions.append(data) if len(suggestions) >= 5: break except Exception: pass return suggestions def _get_memory_entries(self, directory: str) -> list[dict]: """Get memory entries related to a directory""" entries = [] summaries_dir = self.memory_dir / "summaries" if not summaries_dir.exists(): return entries try: files = sorted(summaries_dir.glob("*.json"), reverse=True)[:10] for f in files: data = json.loads(f.read_text()) if directory in data.get("metadata", {}).get("source", ""): entries.append({ "id": data.get("id"), "summary": data.get("summary", "")[:200], "created_at": data.get("created_at") }) except Exception: pass return entries def _get_status_history(self, directory: str) -> list[dict]: """Get STATUS.md history for a directory""" history = [] status_file = self.base_path / directory / "STATUS.md" if not status_file.exists(): return history try: content = status_file.read_text() # Parse activity log section if "## Activity Log" in content: log_section = content.split("## Activity Log")[1] # Extract entries (format: ### YYYY-MM-DD ...) import re entries = re.findall(r'### (\d{4}-\d{2}-\d{2}[^\n]*)\n([^#]*)', log_section) for date, details in entries[:5]: history.append({ "date": date.strip(), "details": details.strip()[:200] }) except Exception: pass return history def generate_suggestions(self, anomaly: Anomaly) -> list[Suggestion]: """Generate fix suggestions for an anomaly""" suggestions = [] context = self.gather_context(anomaly) # 1. Get pattern-based suggestions pattern_suggestions = self._generate_pattern_suggestions(anomaly) suggestions.extend(pattern_suggestions) # 2. Generate context-aware suggestions context_suggestions = self._generate_context_suggestions(anomaly, context) suggestions.extend(context_suggestions) # 3. Generate phase-specific suggestions phase_suggestions = self._generate_phase_suggestions(anomaly) suggestions.extend(phase_suggestions) # Sort by priority score suggestions.sort(key=lambda s: s.priority_score, reverse=True) # Persist suggestions self._persist_suggestions(suggestions) self.suggestions.extend(suggestions) return suggestions def _generate_pattern_suggestions(self, anomaly: Anomaly) -> list[Suggestion]: """Generate suggestions based on known patterns""" suggestions = [] patterns = self.FIX_PATTERNS.get(anomaly.type, []) for title, risk, impact in patterns: suggestion = Suggestion( id="", anomaly_id=anomaly.id, title=title, description=f"Standard fix for {anomaly.type.value}: {title}", reasoning=f"This is a common fix pattern for {anomaly.type.value} anomalies. " f"Historical data shows this approach resolves similar issues.", fix_steps=[ f"1. Analyze the specific {anomaly.type.value} in {anomaly.directory}", f"2. Apply {title.lower()}", "3. Verify fix with tests", "4. Update STATUS.md and create checkpoint" ], risk=RiskLevel(risk), impact=ImpactLevel(impact), estimated_effort="small" if risk == "low" else "medium", files_affected=[anomaly.directory], auto_fixable=risk in ["low", "trivial"], requires_human=risk in ["critical", "high"] ) suggestions.append(suggestion) return suggestions def _generate_context_suggestions(self, anomaly: Anomaly, context: HistoricalContext) -> list[Suggestion]: """Generate suggestions based on historical context""" suggestions = [] # Check if similar anomalies were resolved before for past in context.past_suggestions: if past.get("status") == "implemented" and past.get("outcome") == "success": suggestion = Suggestion( id="", anomaly_id=anomaly.id, title=f"Repeat: {past.get('title', 'Previous fix')}", description=f"This fix worked for a similar issue before", reasoning=f"A similar anomaly was resolved on {past.get('created_at', 'unknown date')} " f"using this approach. The outcome was successful.", fix_steps=past.get("fix_steps", ["Review and apply previous fix"]), risk=RiskLevel.LOW, # Lower risk since proven impact=ImpactLevel(past.get("impact", "medium")), estimated_effort="small", auto_fixable=True, requires_human=False ) suggestions.append(suggestion) break # Check checkpoint notes for relevant context for ckpt in context.recent_checkpoints: notes = ckpt.get("notes", "") if anomaly.type.value.lower() in notes.lower() or anomaly.directory in notes: suggestion = Suggestion( id="", anomaly_id=anomaly.id, title="Review recent checkpoint context", description=f"Recent checkpoint {ckpt.get('id')} may have relevant context", reasoning=f"Checkpoint notes mention related content: {notes[:100]}...", fix_steps=[ f"1. Load checkpoint {ckpt.get('id')}", "2. Review context and changes", "3. Determine if rollback or forward-fix needed" ], risk=RiskLevel.LOW, impact=ImpactLevel.MEDIUM, estimated_effort="small", requires_human=True ) suggestions.append(suggestion) break return suggestions def _generate_phase_suggestions(self, anomaly: Anomaly) -> list[Suggestion]: """Generate phase-specific suggestions""" suggestions = [] if anomaly.phase == 5: # Agent Bootstrapping - SPECIAL ATTENTION if "config" in anomaly.message.lower() or "missing" in anomaly.message.lower(): suggestions.append(Suggestion( id="", anomaly_id=anomaly.id, title="Regenerate agent configuration", description="Agent configuration may be corrupted or missing", reasoning="Phase 5 (Agent Bootstrapping) is critical. Missing configuration " "can block agent initialization and all dependent phases.", fix_steps=[ "1. Check agents/tier0-agent/config/ directory", "2. Regenerate agent.json from template", "3. Verify AppRole credentials in credentials/", "4. Run agent.py status to verify", "5. Create checkpoint" ], risk=RiskLevel.MEDIUM, impact=ImpactLevel.HIGH, estimated_effort="medium", files_affected=[ "agents/tier0-agent/config/agent.json", "agents/tier0-agent/credentials/approle.json" ], requires_human=True )) elif anomaly.phase == 8: # Production Hardening if "health" in anomaly.message.lower(): suggestions.append(Suggestion( id="", anomaly_id=anomaly.id, title="Implement health_manager.py (Phase 8 prerequisite)", description="Health management is a critical Phase 8 component", reasoning="Phase 8 Production Hardening requires health monitoring. " "This is blocking other Phase 8 work (circuit breaker, alerts).", fix_steps=[ "1. Create runtime/health_manager.py", "2. Implement HealthStatus tracking", "3. Add DragonflyDB persistence", "4. Connect to evidence.py health checks", "5. Update monitors.py with HealthWatcher" ], risk=RiskLevel.MEDIUM, impact=ImpactLevel.TRANSFORMATIVE, estimated_effort="large", files_affected=[ "runtime/health_manager.py", "runtime/monitors.py", "evidence/evidence.py" ], requires_human=True )) return suggestions def _persist_suggestions(self, suggestions: list[Suggestion]): """Persist suggestions to storage""" if not self._redis: return for suggestion in suggestions: self._redis.lpush( "oversight:suggestions", json.dumps(asdict(suggestion)) ) # Keep only last 500 suggestions self._redis.ltrim("oversight:suggestions", 0, 499) def update_suggestion_status( self, suggestion_id: str, status: SuggestionStatus, reviewed_by: str = "system", outcome: Optional[str] = None ) -> bool: """Update suggestion status after review""" if not self._redis: for sug in self.suggestions: if sug.id == suggestion_id: sug.status = status sug.reviewed_at = self._now() sug.reviewed_by = reviewed_by sug.outcome = outcome return True return False self._redis.hset(f"oversight:suggestion:{suggestion_id}", mapping={ "status": status.value, "reviewed_at": self._now(), "reviewed_by": reviewed_by, "outcome": outcome or "" }) return True def get_suggestions( self, status: Optional[SuggestionStatus] = None, limit: int = 50 ) -> list[Suggestion]: """Get suggestions with optional status filter""" if not self._redis: filtered = self.suggestions if status: filtered = [s for s in filtered if s.status == status] return sorted(filtered, key=lambda s: s.priority_score, reverse=True)[:limit] raw = self._redis.lrange("oversight:suggestions", 0, limit - 1) suggestions = [] for item in raw: try: data = json.loads(item) # Convert enum strings back to enums data['risk'] = RiskLevel(data['risk']) data['impact'] = ImpactLevel(data['impact']) data['status'] = SuggestionStatus(data['status']) suggestion = Suggestion(**data) if status and suggestion.status != status: continue suggestions.append(suggestion) except Exception: continue return sorted(suggestions, key=lambda s: s.priority_score, reverse=True) def get_summary(self) -> dict: """Get summary of suggestions""" suggestions = self.get_suggestions(limit=500) by_status = {s.value: 0 for s in SuggestionStatus} by_risk = {r.value: 0 for r in RiskLevel} by_impact = {i.value: 0 for i in ImpactLevel} auto_fixable = 0 for s in suggestions: # Handle both enum and string values status_val = s.status.value if hasattr(s.status, 'value') else s.status risk_val = s.risk.value if hasattr(s.risk, 'value') else s.risk impact_val = s.impact.value if hasattr(s.impact, 'value') else s.impact by_status[status_val] = by_status.get(status_val, 0) + 1 by_risk[risk_val] = by_risk.get(risk_val, 0) + 1 by_impact[impact_val] = by_impact.get(impact_val, 0) + 1 if s.auto_fixable: auto_fixable += 1 return { "total": len(suggestions), "pending": by_status.get("pending", 0), "approved": by_status.get("approved", 0), "implemented": by_status.get("implemented", 0), "auto_fixable": auto_fixable, "by_status": by_status, "by_risk": by_risk, "by_impact": by_impact } if __name__ == "__main__": import argparse from .bug_watcher import BugWindowWatcher parser = argparse.ArgumentParser(description="Suggestion Engine") parser.add_argument("command", choices=["analyze", "list", "status"]) parser.add_argument("--phase", type=int, help="Phase to analyze") parser.add_argument("--json", action="store_true") args = parser.parse_args() engine = SuggestionEngine() if args.command == "analyze": watcher = BugWindowWatcher() anomalies = watcher.scan_all_phases() if not args.phase else watcher.scan_phase(args.phase) all_suggestions = [] for anomaly in anomalies: suggestions = engine.generate_suggestions(anomaly) all_suggestions.extend(suggestions) if args.json: print(json.dumps([asdict(s) for s in all_suggestions], indent=2)) else: print(f"\n{'='*60}") print(f"SUGGESTION ENGINE - Analysis Results") print(f"{'='*60}") print(f"Anomalies analyzed: {len(anomalies)}") print(f"Suggestions generated: {len(all_suggestions)}") print() for s in all_suggestions[:10]: print(f"[{s.risk.value.upper()}] {s.title}") print(f" Impact: {s.impact.value} | Auto-fix: {s.auto_fixable}") print(f" {s.reasoning[:100]}...") print() elif args.command == "list": suggestions = engine.get_suggestions() if args.json: print(json.dumps([asdict(s) for s in suggestions], indent=2)) else: for s in suggestions: status_icon = {"pending": "⏳", "approved": "✅", "rejected": "❌", "implemented": "🎯"}.get(s.status.value, "❓") print(f"{status_icon} [{s.id}] {s.title} ({s.risk.value}/{s.impact.value})") elif args.command == "status": summary = engine.get_summary() if args.json: print(json.dumps(summary, indent=2)) else: print(f"\nSuggestion Engine Status") print(f"Total: {summary['total']} | Pending: {summary['pending']} | Auto-fixable: {summary['auto_fixable']}")