#!/usr/bin/env python3 """ Auto-Recovery Test for Consensus Failures Tests the system's ability to automatically recover from consensus failures by spawning new pipelines with the collected failure context. Exit codes: - 0: All tests passed - 1: Some tests failed - 2: Consensus failure (triggers auto-recovery) - 3: Aborted (timeout/stuck - triggers auto-recovery) """ import asyncio import json import time import subprocess import sys from datetime import datetime from pathlib import Path import redis REDIS_HOST = "127.0.0.1" REDIS_PORT = 6379 REDIS_PASSWORD = "governance2026" UI_BASE_URL = "http://127.0.0.1:3000" class AutoRecoveryTester: """Tests auto-recovery for consensus failures.""" def __init__(self): self.redis = redis.Redis( host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, decode_responses=True ) self.results = [] def log(self, msg: str, level: str = "INFO"): """Log a message with timestamp.""" ts = datetime.utcnow().strftime("%H:%M:%S") print(f"[{ts}] [{level}] {msg}") def setup_mock_pipeline(self, pipeline_id: str, task_id: str, objective: str) -> bool: """Set up a mock pipeline in DragonflyDB for testing.""" try: self.redis.hset(f"pipeline:{pipeline_id}", mapping={ "task_id": task_id, "objective": objective, "status": "RUNNING", "created_at": datetime.utcnow().isoformat(), "agents": json.dumps(["ALPHA", "BETA"]), "run_number": "1" }) self.log(f"Created mock pipeline: {pipeline_id}") return True except Exception as e: self.log(f"Failed to create pipeline: {e}", "ERROR") return False def simulate_consensus_failure(self, pipeline_id: str) -> bool: """Simulate a consensus failure by setting appropriate state.""" try: # Set consensus failure state self.redis.hset(f"pipeline:{pipeline_id}", mapping={ "status": "CONSENSUS_FAILED", "consensus": json.dumps({ "achieved": False, "proposals": [ {"agent": "ALPHA", "proposal": "Solution A", "score": 0.6}, {"agent": "BETA", "proposal": "Solution B", "score": 0.5} ], "conflicts": [ {"type": "approach", "agents": ["ALPHA", "BETA"]} ] }) }) # Store failure context failure_context = { "pipeline_id": pipeline_id, "failure_reason": "consensus_failed", "proposals": [ {"agent": "ALPHA", "proposal": "Solution A", "score": 0.6}, {"agent": "BETA", "proposal": "Solution B", "score": 0.5} ], "conflicts": [{"type": "approach"}], "iteration_count": 5, "run_number": 1, "timestamp": datetime.utcnow().isoformat() } self.redis.set( f"pipeline:{pipeline_id}:failure_context", json.dumps(failure_context) ) self.log(f"Simulated consensus failure for pipeline: {pipeline_id}") return True except Exception as e: self.log(f"Failed to simulate failure: {e}", "ERROR") return False def check_recovery_pipeline_created(self, original_pipeline_id: str, timeout: float = 5.0) -> dict: """Check if a recovery pipeline was created.""" start = time.time() while time.time() - start < timeout: try: # Check if original pipeline has recovery_pipeline reference recovery_id = self.redis.hget(f"pipeline:{original_pipeline_id}", "recovery_pipeline") if recovery_id: # Verify recovery pipeline exists recovery_data = self.redis.hgetall(f"pipeline:{recovery_id}") if recovery_data: return { "found": True, "recovery_pipeline_id": recovery_id, "recovery_data": recovery_data, "elapsed_ms": (time.time() - start) * 1000 } except Exception: pass time.sleep(0.2) return {"found": False, "elapsed_ms": timeout * 1000} def verify_failure_context_passed(self, recovery_pipeline_id: str) -> dict: """Verify the recovery pipeline received the failure context.""" try: data = self.redis.hgetall(f"pipeline:{recovery_pipeline_id}") prior_context = data.get("prior_context") return { "has_prior_context": prior_context is not None, "prior_pipeline": data.get("prior_pipeline"), "run_number": data.get("run_number"), "force_gamma": data.get("force_gamma") == "true" } except Exception as e: return {"error": str(e)} def cleanup_test_pipelines(self, pipeline_ids: list): """Clean up test pipelines.""" for pid in pipeline_ids: try: keys = self.redis.keys(f"pipeline:{pid}*") if keys: self.redis.delete(*keys) self.log(f"Cleaned up pipeline: {pid}") except Exception: pass def test_consensus_failure_detection(self) -> dict: """Test 1: Verify consensus failure is properly detected.""" test_name = "Consensus Failure Detection" pipeline_id = f"test-recovery-{int(time.time())}" task_id = "test-task-001" objective = "Test consensus failure detection" try: # Setup self.setup_mock_pipeline(pipeline_id, task_id, objective) # Simulate failure self.simulate_consensus_failure(pipeline_id) # Check state status = self.redis.hget(f"pipeline:{pipeline_id}", "status") failure_ctx = self.redis.get(f"pipeline:{pipeline_id}:failure_context") passed = status == "CONSENSUS_FAILED" and failure_ctx is not None result = { "test": test_name, "passed": passed, "status_detected": status, "failure_context_stored": failure_ctx is not None } # Cleanup self.cleanup_test_pipelines([pipeline_id]) return result except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def test_max_recovery_limit(self) -> dict: """Test 2: Verify max recovery attempts are respected.""" test_name = "Max Recovery Limit" pipeline_id = f"test-max-recovery-{int(time.time())}" try: # Create pipeline at max recovery attempts self.redis.hset(f"pipeline:{pipeline_id}", mapping={ "task_id": "test-task", "objective": "Test max recovery", "status": "CONSENSUS_FAILED", "run_number": "3" # Already at max (3) }) # Store failure context indicating max reached failure_context = { "run_number": 3, "failure_reason": "consensus_failed", "proposals": [], "conflicts": [] } self.redis.set( f"pipeline:{pipeline_id}:failure_context", json.dumps(failure_context) ) # Verify no further recovery is attempted time.sleep(0.5) recovery_id = self.redis.hget(f"pipeline:{pipeline_id}", "recovery_pipeline") passed = recovery_id is None # Should NOT have recovery result = { "test": test_name, "passed": passed, "run_number": 3, "recovery_attempted": recovery_id is not None } self.cleanup_test_pipelines([pipeline_id]) return result except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def test_failure_context_structure(self) -> dict: """Test 3: Verify failure context has required structure.""" test_name = "Failure Context Structure" required_fields = [ "pipeline_id", "failure_reason", "proposals", "conflicts", "run_number", "timestamp" ] try: sample_context = { "pipeline_id": "test-123", "failure_reason": "consensus_failed", "proposals": [ {"agent": "ALPHA", "proposal": "Test", "score": 0.5} ], "conflicts": [{"type": "approach"}], "iteration_count": 5, "run_number": 1, "timestamp": datetime.utcnow().isoformat() } missing = [f for f in required_fields if f not in sample_context] passed = len(missing) == 0 return { "test": test_name, "passed": passed, "required_fields": required_fields, "missing_fields": missing, "sample_valid": passed } except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def test_gamma_force_on_recovery(self) -> dict: """Test 4: Verify GAMMA is forced on recovery attempts.""" test_name = "GAMMA Force on Recovery" pipeline_id = f"test-gamma-{int(time.time())}" recovery_id = f"test-gamma-recovery-{int(time.time())}" try: # Create original pipeline self.setup_mock_pipeline(pipeline_id, "task-gamma", "Test GAMMA forcing") # Create mock recovery pipeline with force_gamma self.redis.hset(f"pipeline:{recovery_id}", mapping={ "task_id": "task-gamma", "objective": "[RECOVERY] Test GAMMA forcing", "status": "SPAWNED", "prior_pipeline": pipeline_id, "run_number": "2", "force_gamma": "true" }) # Link them self.redis.hset(f"pipeline:{pipeline_id}", "recovery_pipeline", recovery_id) # Verify force_gamma = self.redis.hget(f"pipeline:{recovery_id}", "force_gamma") passed = force_gamma == "true" result = { "test": test_name, "passed": passed, "force_gamma_set": force_gamma == "true", "recovery_linked": True } self.cleanup_test_pipelines([pipeline_id, recovery_id]) return result except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def test_iteration_timeout_abort(self) -> dict: """Test 5: Verify iteration timeout triggers abort.""" test_name = "Iteration Timeout Abort" pipeline_id = f"test-timeout-{int(time.time())}" try: # Create pipeline that has exceeded iterations self.redis.hset(f"pipeline:{pipeline_id}", mapping={ "task_id": "task-timeout", "objective": "Test timeout", "status": "ABORTED", "iteration_count": "12", "max_iterations": "10", "abort_reason": "iteration_limit" }) # Verify abort state status = self.redis.hget(f"pipeline:{pipeline_id}", "status") reason = self.redis.hget(f"pipeline:{pipeline_id}", "abort_reason") passed = status == "ABORTED" and reason == "iteration_limit" result = { "test": test_name, "passed": passed, "status": status, "abort_reason": reason } self.cleanup_test_pipelines([pipeline_id]) return result except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def test_stuck_detection(self) -> dict: """Test 6: Verify stuck agent detection.""" test_name = "Stuck Agent Detection" pipeline_id = f"test-stuck-{int(time.time())}" try: # Create pipeline with old last_progress timestamp old_time = datetime.utcnow().timestamp() - 120 # 2 minutes ago self.redis.hset(f"pipeline:{pipeline_id}", mapping={ "task_id": "task-stuck", "objective": "Test stuck detection", "status": "RUNNING", "last_progress": str(old_time) }) # Check if stuck detection would trigger (60 second timeout) last_progress = float(self.redis.hget(f"pipeline:{pipeline_id}", "last_progress") or 0) current = datetime.utcnow().timestamp() stuck = (current - last_progress) > 60 passed = stuck # Should be detected as stuck result = { "test": test_name, "passed": passed, "time_since_progress_seconds": current - last_progress, "stuck_detected": stuck } self.cleanup_test_pipelines([pipeline_id]) return result except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def run_all_tests(self) -> dict: """Run all auto-recovery tests.""" print("\n" + "=" * 60) print("AUTO-RECOVERY TEST SUITE") print("=" * 60 + "\n") tests = [ self.test_consensus_failure_detection, self.test_max_recovery_limit, self.test_failure_context_structure, self.test_gamma_force_on_recovery, self.test_iteration_timeout_abort, self.test_stuck_detection, ] passed = 0 failed = 0 for test_func in tests: result = test_func() self.results.append(result) status = "PASS" if result["passed"] else "FAIL" symbol = "✓" if result["passed"] else "✗" print(f" {symbol} {status}: {result['test']}") if result["passed"]: passed += 1 else: failed += 1 if "error" in result: print(f" Error: {result['error']}") print(f"\n{'='*60}") print(f"RESULTS: {passed}/{passed+failed} passed") print(f"{'='*60}\n") return { "total_tests": len(tests), "passed": passed, "failed": failed, "success_rate": passed / len(tests) if tests else 0, "results": self.results } def main(): """Run auto-recovery tests.""" tester = AutoRecoveryTester() results = tester.run_all_tests() print("\nDetailed Results:") print("-" * 40) for r in results["results"]: status = "PASS" if r["passed"] else "FAIL" print(f" [{status}] {r['test']}") for k, v in r.items(): if k not in ["test", "passed"]: print(f" {k}: {v}") return 0 if results["failed"] == 0 else 1 if __name__ == "__main__": sys.exit(main())