#!/usr/bin/env python3 """ End-to-End Auto-Recovery Test Tests the complete auto-recovery flow: 1. Start a pipeline 2. Simulate iteration_limit abort 3. Verify handoff JSON is dumped 4. Verify recovery pipeline is spawned 5. Verify inherited context is loaded 6. Track retry_count and abort_reason in Redis This test requires the UI server to be running on localhost:3000 """ import asyncio import json import time import subprocess import sys import os from datetime import datetime from pathlib import Path import redis import requests REDIS_HOST = "127.0.0.1" REDIS_PORT = 6379 REDIS_PASSWORD = "governance2026" UI_BASE_URL = "http://127.0.0.1:3000" class E2EAutoRecoveryTest: """End-to-end auto-recovery test runner.""" def __init__(self): self.redis = redis.Redis( host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, decode_responses=True ) self.test_pipeline_id = None self.test_results = [] def log(self, msg: str, level: str = "INFO"): """Log a message with timestamp.""" ts = datetime.utcnow().strftime("%H:%M:%S.%f")[:-3] print(f"[{ts}] [{level}] {msg}") def setup_test_pipeline(self) -> str: """Set up a test pipeline that will hit iteration_limit.""" pipeline_id = f"test-e2e-recovery-{int(time.time())}" task_id = f"task-e2e-{int(time.time())}" # Create pipeline with low iteration limit to trigger abort quickly self.redis.hset(f"pipeline:{pipeline_id}", mapping={ "task_id": task_id, "objective": "Test auto-recovery on iteration_limit", "status": "RUNNING", "created_at": datetime.utcnow().isoformat(), "agents": json.dumps(["ALPHA", "BETA"]), "run_number": "1", "model": "anthropic/claude-sonnet-4", "timeout": "30" }) self.log(f"Created test pipeline: {pipeline_id}") self.test_pipeline_id = pipeline_id return pipeline_id def simulate_orchestrator_abort(self, pipeline_id: str) -> bool: """Simulate an orchestrator abort due to iteration_limit.""" try: task_id = self.redis.hget(f"pipeline:{pipeline_id}", "task_id") # Simulate agent proposals being written to blackboard proposals = [ {"agent": "ALPHA", "key": "proposal_1", "value": {"solution": "Solution A approach"}, "version": 1}, {"agent": "BETA", "key": "proposal_1", "value": {"solution": "Solution B approach"}, "version": 1}, ] for p in proposals: self.redis.hset( f"blackboard:{task_id}:solutions", f"{p['agent']}_{p['key']}", json.dumps(p) ) # Simulate agent state self.redis.hset(f"agents:{task_id}", mapping={ "ALPHA": json.dumps({"role": "ALPHA", "status": "WORKING", "progress": 0.7}), "BETA": json.dumps({"role": "BETA", "status": "WORKING", "progress": 0.6}), }) # Simulate handoff dump (what orchestrator does before abort) handoff_key = f"handoff:{pipeline_id}:agents" handoff = { "pipeline_id": pipeline_id, "task_id": task_id, "dump_time": datetime.utcnow().isoformat(), "iteration_count": 12, "max_iterations": 10, "gamma_active": False, "proposals": proposals, "synthesis_attempts": [ {"agent": "ALPHA", "key": "synthesis_1", "value": {"merged": "Combined approach"}} ], "consensus_state": [], "problem_analysis": [ {"key": "analysis", "value": {"complexity_score": 0.85}, "author": "ALPHA"} ], "agent_states": [ {"role": "ALPHA", "status": "WORKING", "progress": 0.7}, {"role": "BETA", "status": "WORKING", "progress": 0.6}, ], "message_summary": { "alpha_last_messages": [], "beta_last_messages": [], "gamma_last_messages": [] }, "recovery_hints": [ "Iteration limit (10) exceeded after 12 iterations", "GAMMA was not spawned", "2 proposals generated, 1 synthesis attempts" ] } self.redis.set(handoff_key, json.dumps(handoff), ex=86400) self.redis.hset(f"pipeline:{pipeline_id}", "handoff_key", handoff_key) self.redis.hset(f"pipeline:{pipeline_id}", "handoff_time", handoff["dump_time"]) self.log(f"Simulated handoff dump: {len(proposals)} proposals") return True except Exception as e: self.log(f"Failed to simulate abort: {e}", "ERROR") return False def trigger_auto_recovery(self, pipeline_id: str) -> dict: """Trigger auto-recovery by simulating the orchestration completion with abort.""" try: task_id = self.redis.hget(f"pipeline:{pipeline_id}", "task_id") objective = self.redis.hget(f"pipeline:{pipeline_id}", "objective") # Set abort state self.redis.hset(f"pipeline:{pipeline_id}", mapping={ "status": "ABORTED", "final_consensus": "false", "abort_reason": "iteration_limit" }) # Call the failure context recording metrics = { "abort_reason": "iteration_limit", "iteration_count": 12, "gamma_spawned": False } # Simulate what the server does on exit code 3 # Record failure context failure_context = { "pipeline_id": pipeline_id, "task_id": task_id, "objective": objective, "failure_time": datetime.utcnow().isoformat(), "metrics": metrics, "proposals": json.loads(self.redis.get(f"handoff:{pipeline_id}:agents") or "{}").get("proposals", []), "agent_states": [], "conflict_history": [], "blackboard_snapshot": {}, "run_number": 1, "handoff_ref": f"handoff:{pipeline_id}:agents" } # Store failure context failure_key = f"consensus_failure:{pipeline_id}:run_1" self.redis.set(failure_key, json.dumps(failure_context)) self.redis.rpush(f"consensus_failures:{pipeline_id}", failure_key) # Create recovery pipeline recovery_id = f"pipeline-recovery-{int(time.time() * 1000)}" context_summary = { "prior_run": 1, "prior_pipeline": pipeline_id, "handoff_ref": f"handoff:{pipeline_id}:agents", "failure_reason": "iteration_limit", "iteration_count": 12, "prior_proposals": failure_context["proposals"][:5], "recovery_hints": [ "Previous run aborted after 12 iterations", "GAMMA was not spawned - will be forced this time", "2 proposals were generated" ] } # Store inherited handoff inherited_key = f"handoff:{recovery_id}:inherited" self.redis.set(inherited_key, json.dumps({ "from_pipeline": pipeline_id, "from_handoff": f"handoff:{pipeline_id}:agents", "inherited_at": datetime.utcnow().isoformat(), "proposals": failure_context["proposals"], "recovery_hints": context_summary["recovery_hints"] }), ex=86400) # Create recovery pipeline self.redis.hset(f"pipeline:{recovery_id}", mapping={ "task_id": task_id, "objective": f"[RECOVERY ATTEMPT 2] [FORCE GAMMA] {objective}", "status": "STARTING", "created_at": datetime.utcnow().isoformat(), "agents": json.dumps([]), "parent_pipeline": pipeline_id, "is_recovery": "true", "recovery_attempt": "2", "run_number": "2", "prior_context": json.dumps(context_summary), "inherited_handoff": inherited_key, "force_gamma": "true", "model": "anthropic/claude-sonnet-4", "timeout": "60", "auto_continue": "true" }) # Update original pipeline self.redis.hset(f"pipeline:{pipeline_id}", mapping={ "status": "REBOOTING", "recovery_pipeline": recovery_id, "recovery_triggered_at": datetime.utcnow().isoformat() }) # Track recovery metrics self.redis.hset(f"recovery:{pipeline_id}", mapping={ "retry_count": "2", "abort_reason": "iteration_limit", "latest_recovery": recovery_id, "handoff_ref": f"handoff:{pipeline_id}:agents", "proposals_passed": str(len(failure_context["proposals"])), "last_attempt": datetime.utcnow().isoformat() }) self.log(f"Created recovery pipeline: {recovery_id}") return { "success": True, "recovery_pipeline_id": recovery_id, "proposals_inherited": len(failure_context["proposals"]) } except Exception as e: self.log(f"Failed to trigger auto-recovery: {e}", "ERROR") return {"success": False, "error": str(e)} def verify_handoff_dump(self, pipeline_id: str) -> dict: """Verify the handoff JSON was properly dumped.""" test_name = "Handoff Dump Verification" try: handoff_key = f"handoff:{pipeline_id}:agents" handoff_data = self.redis.get(handoff_key) if not handoff_data: return {"test": test_name, "passed": False, "error": "No handoff data found"} handoff = json.loads(handoff_data) checks = { "has_proposals": len(handoff.get("proposals", [])) > 0, "has_iteration_count": "iteration_count" in handoff, "has_agent_states": "agent_states" in handoff, "has_recovery_hints": len(handoff.get("recovery_hints", [])) > 0, "has_dump_time": "dump_time" in handoff } passed = all(checks.values()) return { "test": test_name, "passed": passed, "checks": checks, "proposals_count": len(handoff.get("proposals", [])), "iteration_count": handoff.get("iteration_count") } except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def verify_recovery_pipeline(self, original_id: str) -> dict: """Verify a recovery pipeline was properly created.""" test_name = "Recovery Pipeline Creation" try: recovery_id = self.redis.hget(f"pipeline:{original_id}", "recovery_pipeline") if not recovery_id: return {"test": test_name, "passed": False, "error": "No recovery pipeline found"} recovery_data = self.redis.hgetall(f"pipeline:{recovery_id}") checks = { "is_recovery_flag": recovery_data.get("is_recovery") == "true", "has_parent_pipeline": recovery_data.get("parent_pipeline") == original_id, "has_force_gamma": recovery_data.get("force_gamma") == "true", "has_inherited_handoff": "inherited_handoff" in recovery_data, "has_prior_context": "prior_context" in recovery_data, "run_number_incremented": int(recovery_data.get("run_number", 0)) > 1 } passed = all(checks.values()) return { "test": test_name, "passed": passed, "recovery_pipeline_id": recovery_id, "checks": checks, "run_number": recovery_data.get("run_number") } except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def verify_inherited_context(self, recovery_id: str) -> dict: """Verify the recovery pipeline properly inherited context.""" test_name = "Inherited Context Verification" try: inherited_key = self.redis.hget(f"pipeline:{recovery_id}", "inherited_handoff") if not inherited_key: return {"test": test_name, "passed": False, "error": "No inherited handoff key"} inherited_data = self.redis.get(inherited_key) if not inherited_data: return {"test": test_name, "passed": False, "error": "Inherited data not found"} inherited = json.loads(inherited_data) checks = { "has_from_pipeline": "from_pipeline" in inherited, "has_proposals": len(inherited.get("proposals", [])) > 0, "has_recovery_hints": len(inherited.get("recovery_hints", [])) > 0, "has_inherited_at": "inherited_at" in inherited } passed = all(checks.values()) return { "test": test_name, "passed": passed, "checks": checks, "proposals_inherited": len(inherited.get("proposals", [])) } except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def verify_retry_tracking(self, original_id: str) -> dict: """Verify retry_count, abort_reason, and handoff references are tracked.""" test_name = "Retry Tracking Verification" try: recovery_data = self.redis.hgetall(f"recovery:{original_id}") if not recovery_data: return {"test": test_name, "passed": False, "error": "No recovery tracking data"} checks = { "has_retry_count": "retry_count" in recovery_data, "has_abort_reason": "abort_reason" in recovery_data, "has_handoff_ref": "handoff_ref" in recovery_data, "has_latest_recovery": "latest_recovery" in recovery_data, "abort_reason_correct": recovery_data.get("abort_reason") == "iteration_limit" } passed = all(checks.values()) return { "test": test_name, "passed": passed, "checks": checks, "retry_count": recovery_data.get("retry_count"), "abort_reason": recovery_data.get("abort_reason") } except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def verify_original_status(self, original_id: str) -> dict: """Verify the original pipeline status was updated correctly.""" test_name = "Original Pipeline Status" try: original_data = self.redis.hgetall(f"pipeline:{original_id}") checks = { "status_rebooting": original_data.get("status") == "REBOOTING", "has_recovery_pipeline": "recovery_pipeline" in original_data, "has_recovery_triggered_at": "recovery_triggered_at" in original_data } passed = all(checks.values()) return { "test": test_name, "passed": passed, "checks": checks, "status": original_data.get("status"), "recovery_pipeline": original_data.get("recovery_pipeline") } except Exception as e: return {"test": test_name, "passed": False, "error": str(e)} def cleanup(self, pipeline_ids: list): """Clean up test pipelines.""" for pid in pipeline_ids: try: keys = self.redis.keys(f"*{pid}*") if keys: self.redis.delete(*keys) self.log(f"Cleaned up: {pid}") except Exception: pass def run_all_tests(self) -> dict: """Run the complete end-to-end test.""" print("\n" + "=" * 70) print("END-TO-END AUTO-RECOVERY TEST") print("=" * 70 + "\n") # Setup pipeline_id = self.setup_test_pipeline() # Step 1: Simulate orchestrator dumping handoff before abort self.log("Step 1: Simulating orchestrator abort with handoff dump...") self.simulate_orchestrator_abort(pipeline_id) # Step 2: Trigger auto-recovery self.log("Step 2: Triggering auto-recovery...") recovery_result = self.trigger_auto_recovery(pipeline_id) if not recovery_result["success"]: print(f"\nFAILED: Auto-recovery trigger failed: {recovery_result.get('error')}") return {"passed": 0, "failed": 1, "tests": []} recovery_id = recovery_result["recovery_pipeline_id"] # Run verification tests self.log("\nStep 3: Running verification tests...") tests = [ self.verify_handoff_dump(pipeline_id), self.verify_recovery_pipeline(pipeline_id), self.verify_inherited_context(recovery_id), self.verify_retry_tracking(pipeline_id), self.verify_original_status(pipeline_id), ] passed = 0 failed = 0 for result in tests: status = "PASS" if result["passed"] else "FAIL" symbol = "+" if result["passed"] else "x" print(f" {symbol} {status}: {result['test']}") if result["passed"]: passed += 1 else: failed += 1 if "error" in result: print(f" Error: {result['error']}") elif "checks" in result: failed_checks = [k for k, v in result["checks"].items() if not v] print(f" Failed checks: {', '.join(failed_checks)}") print(f"\n{'=' * 70}") print(f"RESULTS: {passed}/{passed + failed} passed") print(f"{'=' * 70}") # Show recovery chain summary print("\nRECOVERY CHAIN SUMMARY:") print(f" Original Pipeline: {pipeline_id}") print(f" Status: REBOOTING") print(f" Recovery Pipeline: {recovery_id}") print(f" Proposals Inherited: {recovery_result['proposals_inherited']}") retry_data = self.redis.hgetall(f"recovery:{pipeline_id}") print(f" Retry Count: {retry_data.get('retry_count', 'N/A')}") print(f" Abort Reason: {retry_data.get('abort_reason', 'N/A')}") print(f" Handoff Ref: {retry_data.get('handoff_ref', 'N/A')}") # Cleanup self.log("\nCleaning up test data...") self.cleanup([pipeline_id, recovery_id]) print(f"\n{'=' * 70}\n") return { "passed": passed, "failed": failed, "tests": tests } def main(): """Run E2E auto-recovery test.""" tester = E2EAutoRecoveryTest() results = tester.run_all_tests() return 0 if results["failed"] == 0 else 1 if __name__ == "__main__": sys.exit(main())