agent-governance/testing/oversight/suggestion_engine.py
profit 77655c298c Initial commit: Agent Governance System Phase 8
Phase 8 Production Hardening with complete governance infrastructure:

- Vault integration with tiered policies (T0-T4)
- DragonflyDB state management
- SQLite audit ledger
- Pipeline DSL and templates
- Promotion/revocation engine
- Checkpoint system for session persistence
- Health manager and circuit breaker for fault tolerance
- GitHub/Slack integrations
- Architectural test pipeline with bug watcher, suggestion engine, council review
- Multi-agent chaos testing framework

Test Results:
- Governance tests: 68/68 passing
- E2E workflow: 16/16 passing
- Phase 2 Vault: 14/14 passing
- Integration tests: 27/27 passing

Coverage: 57.6% average across 12 phases

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 22:07:06 -05:00

657 lines
24 KiB
Python

"""
Suggestion + Reasoning Pipeline
===============================
Analyzes outputs and bug reports using historical context to generate
suggested fixes with rationale, ranked by risk/impact.
Features:
- Uses checkpoints, memory logs, STATUS files for context
- Generates fix suggestions with reasoning
- Ranks by risk and impact
- Tracks suggestion outcomes for learning
"""
import json
import hashlib
from datetime import datetime, timezone
from dataclasses import dataclass, field, asdict
from enum import Enum
from pathlib import Path
from typing import Any, Optional
import sqlite3
import redis
from .bug_watcher import Anomaly, AnomalyType, Severity
class RiskLevel(str, Enum):
"""Risk level for implementing a suggestion"""
CRITICAL = "critical" # Could break system
HIGH = "high" # Significant change
MEDIUM = "medium" # Moderate change
LOW = "low" # Safe change
TRIVIAL = "trivial" # No risk
class ImpactLevel(str, Enum):
"""Impact level of implementing a suggestion"""
TRANSFORMATIVE = "transformative" # Major system improvement
HIGH = "high" # Significant improvement
MEDIUM = "medium" # Moderate improvement
LOW = "low" # Minor improvement
MINIMAL = "minimal" # Negligible improvement
class SuggestionStatus(str, Enum):
"""Status of a suggestion"""
PENDING = "pending"
APPROVED = "approved"
REJECTED = "rejected"
IMPLEMENTED = "implemented"
FAILED = "failed"
@dataclass
class Suggestion:
"""A suggested fix or improvement"""
id: str
anomaly_id: str
title: str
description: str
reasoning: str
fix_steps: list[str]
risk: RiskLevel
impact: ImpactLevel
estimated_effort: str # "trivial", "small", "medium", "large", "epic"
files_affected: list[str] = field(default_factory=list)
code_changes: Optional[dict] = None # {"file": {"old": ..., "new": ...}}
auto_fixable: bool = False
requires_human: bool = True
status: SuggestionStatus = SuggestionStatus.PENDING
created_at: str = ""
reviewed_at: Optional[str] = None
reviewed_by: Optional[str] = None
outcome: Optional[str] = None
def __post_init__(self):
if not self.created_at:
self.created_at = datetime.now(timezone.utc).isoformat()
if not self.id:
self.id = f"sug-{hashlib.sha256(f'{self.anomaly_id}{self.title}{self.created_at}'.encode()).hexdigest()[:12]}"
@property
def priority_score(self) -> float:
"""Calculate priority score (higher = more urgent)"""
risk_scores = {"critical": 0.2, "high": 0.4, "medium": 0.6, "low": 0.8, "trivial": 1.0}
impact_scores = {"transformative": 1.0, "high": 0.8, "medium": 0.6, "low": 0.4, "minimal": 0.2}
# Handle both enum and string values
risk_val = self.risk.value if hasattr(self.risk, 'value') else self.risk
impact_val = self.impact.value if hasattr(self.impact, 'value') else self.impact
# Higher impact + lower risk = higher priority
return impact_scores.get(impact_val, 0.5) * risk_scores.get(risk_val, 0.5)
@dataclass
class HistoricalContext:
"""Context gathered from historical data"""
recent_checkpoints: list[dict] = field(default_factory=list)
related_anomalies: list[dict] = field(default_factory=list)
past_suggestions: list[dict] = field(default_factory=list)
memory_entries: list[dict] = field(default_factory=list)
status_history: list[dict] = field(default_factory=list)
class SuggestionEngine:
"""
Analyzes anomalies and generates fix suggestions using historical context.
Process:
1. Gather context from checkpoints, memory, STATUS files
2. Analyze anomaly patterns
3. Generate suggestions with reasoning
4. Rank by risk/impact
5. Track outcomes for learning
"""
# Anomaly type -> common fixes mapping
FIX_PATTERNS = {
AnomalyType.UNHANDLED_ERROR: [
("Add try-except block", "medium", "medium"),
("Add error logging", "low", "low"),
("Create error handler", "medium", "high"),
],
AnomalyType.REGRESSION: [
("Rollback to previous version", "high", "high"),
("Add regression test", "low", "medium"),
("Investigate root cause", "low", "high"),
],
AnomalyType.MISSING_ARTIFACT: [
("Create missing file", "low", "medium"),
("Update documentation", "trivial", "low"),
("Add artifact generation step", "medium", "medium"),
],
AnomalyType.STATE_INCONSISTENCY: [
("Resync state from source", "medium", "high"),
("Add state validation", "low", "medium"),
("Implement state machine", "high", "high"),
],
AnomalyType.DEPENDENCY_UNAVAILABLE: [
("Restart dependency service", "low", "high"),
("Add fallback mechanism", "medium", "high"),
("Implement circuit breaker", "medium", "high"),
],
AnomalyType.HEALTH_CHECK_FAILURE: [
("Investigate failing check", "low", "medium"),
("Restart affected service", "low", "high"),
("Add health recovery logic", "medium", "high"),
],
AnomalyType.SECURITY_VIOLATION: [
("Revoke compromised credentials", "low", "transformative"),
("Audit access logs", "low", "high"),
("Strengthen access controls", "medium", "high"),
],
AnomalyType.TIMEOUT: [
("Increase timeout threshold", "low", "low"),
("Optimize slow operation", "medium", "high"),
("Add async processing", "high", "high"),
],
}
def __init__(self, base_path: str = "/opt/agent-governance"):
self.base_path = Path(base_path)
self.ledger_db = self.base_path / "ledger" / "governance.db"
self.checkpoint_dir = self.base_path / "checkpoint" / "storage"
self.memory_dir = self.base_path / "memory"
self.suggestions: list[Suggestion] = []
self._redis: Optional[redis.Redis] = None
self._setup_redis()
def _setup_redis(self):
"""Connect to DragonflyDB"""
try:
self._redis = redis.Redis(
host='127.0.0.1',
port=6379,
password='governance2026',
decode_responses=True
)
self._redis.ping()
except Exception:
self._redis = None
def _now(self) -> str:
return datetime.now(timezone.utc).isoformat()
def gather_context(self, anomaly: Anomaly) -> HistoricalContext:
"""Gather historical context relevant to an anomaly"""
context = HistoricalContext()
# 1. Recent checkpoints
context.recent_checkpoints = self._get_recent_checkpoints(5)
# 2. Related anomalies (same type or phase)
context.related_anomalies = self._get_related_anomalies(anomaly)
# 3. Past suggestions for similar issues
context.past_suggestions = self._get_past_suggestions(anomaly)
# 4. Memory entries related to the directory/phase
context.memory_entries = self._get_memory_entries(anomaly.directory)
# 5. STATUS history for the affected directory
context.status_history = self._get_status_history(anomaly.directory)
return context
def _get_recent_checkpoints(self, limit: int) -> list[dict]:
"""Get recent checkpoints"""
checkpoints = []
if not self.checkpoint_dir.exists():
return checkpoints
files = sorted(self.checkpoint_dir.glob("ckpt-*.json"), reverse=True)[:limit]
for f in files:
try:
data = json.loads(f.read_text())
checkpoints.append({
"id": data.get("checkpoint_id"),
"phase": data.get("phase", {}).get("name"),
"created_at": data.get("created_at"),
"notes": data.get("phase", {}).get("notes", "")
})
except Exception:
continue
return checkpoints
def _get_related_anomalies(self, anomaly: Anomaly) -> list[dict]:
"""Get related anomalies from history"""
related = []
if not self._redis:
return related
try:
# Get anomalies of same type
raw = self._redis.lrange("oversight:anomalies", 0, 100)
for item in raw:
data = json.loads(item)
if data.get("type") == anomaly.type.value or data.get("phase") == anomaly.phase:
if data.get("id") != anomaly.id:
related.append(data)
if len(related) >= 10:
break
except Exception:
pass
return related
def _get_past_suggestions(self, anomaly: Anomaly) -> list[dict]:
"""Get past suggestions for similar issues"""
suggestions = []
if not self._redis:
return suggestions
try:
raw = self._redis.lrange("oversight:suggestions", 0, 100)
for item in raw:
data = json.loads(item)
# Match by anomaly type pattern in title
if anomaly.type.value.lower() in data.get("title", "").lower():
suggestions.append(data)
if len(suggestions) >= 5:
break
except Exception:
pass
return suggestions
def _get_memory_entries(self, directory: str) -> list[dict]:
"""Get memory entries related to a directory"""
entries = []
summaries_dir = self.memory_dir / "summaries"
if not summaries_dir.exists():
return entries
try:
files = sorted(summaries_dir.glob("*.json"), reverse=True)[:10]
for f in files:
data = json.loads(f.read_text())
if directory in data.get("metadata", {}).get("source", ""):
entries.append({
"id": data.get("id"),
"summary": data.get("summary", "")[:200],
"created_at": data.get("created_at")
})
except Exception:
pass
return entries
def _get_status_history(self, directory: str) -> list[dict]:
"""Get STATUS.md history for a directory"""
history = []
status_file = self.base_path / directory / "STATUS.md"
if not status_file.exists():
return history
try:
content = status_file.read_text()
# Parse activity log section
if "## Activity Log" in content:
log_section = content.split("## Activity Log")[1]
# Extract entries (format: ### YYYY-MM-DD ...)
import re
entries = re.findall(r'### (\d{4}-\d{2}-\d{2}[^\n]*)\n([^#]*)', log_section)
for date, details in entries[:5]:
history.append({
"date": date.strip(),
"details": details.strip()[:200]
})
except Exception:
pass
return history
def generate_suggestions(self, anomaly: Anomaly) -> list[Suggestion]:
"""Generate fix suggestions for an anomaly"""
suggestions = []
context = self.gather_context(anomaly)
# 1. Get pattern-based suggestions
pattern_suggestions = self._generate_pattern_suggestions(anomaly)
suggestions.extend(pattern_suggestions)
# 2. Generate context-aware suggestions
context_suggestions = self._generate_context_suggestions(anomaly, context)
suggestions.extend(context_suggestions)
# 3. Generate phase-specific suggestions
phase_suggestions = self._generate_phase_suggestions(anomaly)
suggestions.extend(phase_suggestions)
# Sort by priority score
suggestions.sort(key=lambda s: s.priority_score, reverse=True)
# Persist suggestions
self._persist_suggestions(suggestions)
self.suggestions.extend(suggestions)
return suggestions
def _generate_pattern_suggestions(self, anomaly: Anomaly) -> list[Suggestion]:
"""Generate suggestions based on known patterns"""
suggestions = []
patterns = self.FIX_PATTERNS.get(anomaly.type, [])
for title, risk, impact in patterns:
suggestion = Suggestion(
id="",
anomaly_id=anomaly.id,
title=title,
description=f"Standard fix for {anomaly.type.value}: {title}",
reasoning=f"This is a common fix pattern for {anomaly.type.value} anomalies. "
f"Historical data shows this approach resolves similar issues.",
fix_steps=[
f"1. Analyze the specific {anomaly.type.value} in {anomaly.directory}",
f"2. Apply {title.lower()}",
"3. Verify fix with tests",
"4. Update STATUS.md and create checkpoint"
],
risk=RiskLevel(risk),
impact=ImpactLevel(impact),
estimated_effort="small" if risk == "low" else "medium",
files_affected=[anomaly.directory],
auto_fixable=risk in ["low", "trivial"],
requires_human=risk in ["critical", "high"]
)
suggestions.append(suggestion)
return suggestions
def _generate_context_suggestions(self, anomaly: Anomaly, context: HistoricalContext) -> list[Suggestion]:
"""Generate suggestions based on historical context"""
suggestions = []
# Check if similar anomalies were resolved before
for past in context.past_suggestions:
if past.get("status") == "implemented" and past.get("outcome") == "success":
suggestion = Suggestion(
id="",
anomaly_id=anomaly.id,
title=f"Repeat: {past.get('title', 'Previous fix')}",
description=f"This fix worked for a similar issue before",
reasoning=f"A similar anomaly was resolved on {past.get('created_at', 'unknown date')} "
f"using this approach. The outcome was successful.",
fix_steps=past.get("fix_steps", ["Review and apply previous fix"]),
risk=RiskLevel.LOW, # Lower risk since proven
impact=ImpactLevel(past.get("impact", "medium")),
estimated_effort="small",
auto_fixable=True,
requires_human=False
)
suggestions.append(suggestion)
break
# Check checkpoint notes for relevant context
for ckpt in context.recent_checkpoints:
notes = ckpt.get("notes", "")
if anomaly.type.value.lower() in notes.lower() or anomaly.directory in notes:
suggestion = Suggestion(
id="",
anomaly_id=anomaly.id,
title="Review recent checkpoint context",
description=f"Recent checkpoint {ckpt.get('id')} may have relevant context",
reasoning=f"Checkpoint notes mention related content: {notes[:100]}...",
fix_steps=[
f"1. Load checkpoint {ckpt.get('id')}",
"2. Review context and changes",
"3. Determine if rollback or forward-fix needed"
],
risk=RiskLevel.LOW,
impact=ImpactLevel.MEDIUM,
estimated_effort="small",
requires_human=True
)
suggestions.append(suggestion)
break
return suggestions
def _generate_phase_suggestions(self, anomaly: Anomaly) -> list[Suggestion]:
"""Generate phase-specific suggestions"""
suggestions = []
if anomaly.phase == 5: # Agent Bootstrapping - SPECIAL ATTENTION
if "config" in anomaly.message.lower() or "missing" in anomaly.message.lower():
suggestions.append(Suggestion(
id="",
anomaly_id=anomaly.id,
title="Regenerate agent configuration",
description="Agent configuration may be corrupted or missing",
reasoning="Phase 5 (Agent Bootstrapping) is critical. Missing configuration "
"can block agent initialization and all dependent phases.",
fix_steps=[
"1. Check agents/tier0-agent/config/ directory",
"2. Regenerate agent.json from template",
"3. Verify AppRole credentials in credentials/",
"4. Run agent.py status to verify",
"5. Create checkpoint"
],
risk=RiskLevel.MEDIUM,
impact=ImpactLevel.HIGH,
estimated_effort="medium",
files_affected=[
"agents/tier0-agent/config/agent.json",
"agents/tier0-agent/credentials/approle.json"
],
requires_human=True
))
elif anomaly.phase == 8: # Production Hardening
if "health" in anomaly.message.lower():
suggestions.append(Suggestion(
id="",
anomaly_id=anomaly.id,
title="Implement health_manager.py (Phase 8 prerequisite)",
description="Health management is a critical Phase 8 component",
reasoning="Phase 8 Production Hardening requires health monitoring. "
"This is blocking other Phase 8 work (circuit breaker, alerts).",
fix_steps=[
"1. Create runtime/health_manager.py",
"2. Implement HealthStatus tracking",
"3. Add DragonflyDB persistence",
"4. Connect to evidence.py health checks",
"5. Update monitors.py with HealthWatcher"
],
risk=RiskLevel.MEDIUM,
impact=ImpactLevel.TRANSFORMATIVE,
estimated_effort="large",
files_affected=[
"runtime/health_manager.py",
"runtime/monitors.py",
"evidence/evidence.py"
],
requires_human=True
))
return suggestions
def _persist_suggestions(self, suggestions: list[Suggestion]):
"""Persist suggestions to storage"""
if not self._redis:
return
for suggestion in suggestions:
self._redis.lpush(
"oversight:suggestions",
json.dumps(asdict(suggestion))
)
# Keep only last 500 suggestions
self._redis.ltrim("oversight:suggestions", 0, 499)
def update_suggestion_status(
self,
suggestion_id: str,
status: SuggestionStatus,
reviewed_by: str = "system",
outcome: Optional[str] = None
) -> bool:
"""Update suggestion status after review"""
if not self._redis:
for sug in self.suggestions:
if sug.id == suggestion_id:
sug.status = status
sug.reviewed_at = self._now()
sug.reviewed_by = reviewed_by
sug.outcome = outcome
return True
return False
self._redis.hset(f"oversight:suggestion:{suggestion_id}", mapping={
"status": status.value,
"reviewed_at": self._now(),
"reviewed_by": reviewed_by,
"outcome": outcome or ""
})
return True
def get_suggestions(
self,
status: Optional[SuggestionStatus] = None,
limit: int = 50
) -> list[Suggestion]:
"""Get suggestions with optional status filter"""
if not self._redis:
filtered = self.suggestions
if status:
filtered = [s for s in filtered if s.status == status]
return sorted(filtered, key=lambda s: s.priority_score, reverse=True)[:limit]
raw = self._redis.lrange("oversight:suggestions", 0, limit - 1)
suggestions = []
for item in raw:
try:
data = json.loads(item)
# Convert enum strings back to enums
data['risk'] = RiskLevel(data['risk'])
data['impact'] = ImpactLevel(data['impact'])
data['status'] = SuggestionStatus(data['status'])
suggestion = Suggestion(**data)
if status and suggestion.status != status:
continue
suggestions.append(suggestion)
except Exception:
continue
return sorted(suggestions, key=lambda s: s.priority_score, reverse=True)
def get_summary(self) -> dict:
"""Get summary of suggestions"""
suggestions = self.get_suggestions(limit=500)
by_status = {s.value: 0 for s in SuggestionStatus}
by_risk = {r.value: 0 for r in RiskLevel}
by_impact = {i.value: 0 for i in ImpactLevel}
auto_fixable = 0
for s in suggestions:
# Handle both enum and string values
status_val = s.status.value if hasattr(s.status, 'value') else s.status
risk_val = s.risk.value if hasattr(s.risk, 'value') else s.risk
impact_val = s.impact.value if hasattr(s.impact, 'value') else s.impact
by_status[status_val] = by_status.get(status_val, 0) + 1
by_risk[risk_val] = by_risk.get(risk_val, 0) + 1
by_impact[impact_val] = by_impact.get(impact_val, 0) + 1
if s.auto_fixable:
auto_fixable += 1
return {
"total": len(suggestions),
"pending": by_status.get("pending", 0),
"approved": by_status.get("approved", 0),
"implemented": by_status.get("implemented", 0),
"auto_fixable": auto_fixable,
"by_status": by_status,
"by_risk": by_risk,
"by_impact": by_impact
}
if __name__ == "__main__":
import argparse
from .bug_watcher import BugWindowWatcher
parser = argparse.ArgumentParser(description="Suggestion Engine")
parser.add_argument("command", choices=["analyze", "list", "status"])
parser.add_argument("--phase", type=int, help="Phase to analyze")
parser.add_argument("--json", action="store_true")
args = parser.parse_args()
engine = SuggestionEngine()
if args.command == "analyze":
watcher = BugWindowWatcher()
anomalies = watcher.scan_all_phases() if not args.phase else watcher.scan_phase(args.phase)
all_suggestions = []
for anomaly in anomalies:
suggestions = engine.generate_suggestions(anomaly)
all_suggestions.extend(suggestions)
if args.json:
print(json.dumps([asdict(s) for s in all_suggestions], indent=2))
else:
print(f"\n{'='*60}")
print(f"SUGGESTION ENGINE - Analysis Results")
print(f"{'='*60}")
print(f"Anomalies analyzed: {len(anomalies)}")
print(f"Suggestions generated: {len(all_suggestions)}")
print()
for s in all_suggestions[:10]:
print(f"[{s.risk.value.upper()}] {s.title}")
print(f" Impact: {s.impact.value} | Auto-fix: {s.auto_fixable}")
print(f" {s.reasoning[:100]}...")
print()
elif args.command == "list":
suggestions = engine.get_suggestions()
if args.json:
print(json.dumps([asdict(s) for s in suggestions], indent=2))
else:
for s in suggestions:
status_icon = {"pending": "", "approved": "", "rejected": "", "implemented": "🎯"}.get(s.status.value, "")
print(f"{status_icon} [{s.id}] {s.title} ({s.risk.value}/{s.impact.value})")
elif args.command == "status":
summary = engine.get_summary()
if args.json:
print(json.dumps(summary, indent=2))
else:
print(f"\nSuggestion Engine Status")
print(f"Total: {summary['total']} | Pending: {summary['pending']} | Auto-fixable: {summary['auto_fixable']}")