Orchestrator changes:
- Add dumpAgentHandoff() to dump proposals/analysis before abort
- Add loadRecoveryContext() to load inherited context on recovery runs
- Add preseedBlackboard() to pre-seed inherited proposals
- Force-spawn GAMMA immediately on recovery runs
- Track isRecoveryRun, recoveryAttempt, inheritedContext, forceGamma
Server changes:
- Update recordConsensusFailure() to read orchestrator handoff JSON
- Add collectFromBlackboard() helper as fallback
- Update triggerAutoRecovery() with comprehensive context passing
- Store inherited_handoff reference for recovery pipelines
- Track retry_count, abort_reason, handoff_ref in recovery:* keys
- Add recovery badge and prior pipeline link in UI
Test coverage:
- test_auto_recovery.py: 6 unit tests
- test_e2e_auto_recovery.py: 5 E2E tests (handoff dump, recovery
pipeline creation, inherited context, retry tracking, status update)
Redis tracking keys:
- handoff:{pipeline_id}:agents - orchestrator dumps proposals here
- handoff:{recovery_id}:inherited - recovery pipeline inherits from
- recovery:{pipeline_id} - retry_count, abort_reason, handoff_ref
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
519 lines
19 KiB
Python
519 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
End-to-End Auto-Recovery Test
|
|
|
|
Tests the complete auto-recovery flow:
|
|
1. Start a pipeline
|
|
2. Simulate iteration_limit abort
|
|
3. Verify handoff JSON is dumped
|
|
4. Verify recovery pipeline is spawned
|
|
5. Verify inherited context is loaded
|
|
6. Track retry_count and abort_reason in Redis
|
|
|
|
This test requires the UI server to be running on localhost:3000
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import time
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
import redis
|
|
import requests
|
|
|
|
REDIS_HOST = "127.0.0.1"
|
|
REDIS_PORT = 6379
|
|
REDIS_PASSWORD = "governance2026"
|
|
UI_BASE_URL = "http://127.0.0.1:3000"
|
|
|
|
class E2EAutoRecoveryTest:
|
|
"""End-to-end auto-recovery test runner."""
|
|
|
|
def __init__(self):
|
|
self.redis = redis.Redis(
|
|
host=REDIS_HOST,
|
|
port=REDIS_PORT,
|
|
password=REDIS_PASSWORD,
|
|
decode_responses=True
|
|
)
|
|
self.test_pipeline_id = None
|
|
self.test_results = []
|
|
|
|
def log(self, msg: str, level: str = "INFO"):
|
|
"""Log a message with timestamp."""
|
|
ts = datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
|
|
print(f"[{ts}] [{level}] {msg}")
|
|
|
|
def setup_test_pipeline(self) -> str:
|
|
"""Set up a test pipeline that will hit iteration_limit."""
|
|
pipeline_id = f"test-e2e-recovery-{int(time.time())}"
|
|
task_id = f"task-e2e-{int(time.time())}"
|
|
|
|
# Create pipeline with low iteration limit to trigger abort quickly
|
|
self.redis.hset(f"pipeline:{pipeline_id}", mapping={
|
|
"task_id": task_id,
|
|
"objective": "Test auto-recovery on iteration_limit",
|
|
"status": "RUNNING",
|
|
"created_at": datetime.utcnow().isoformat(),
|
|
"agents": json.dumps(["ALPHA", "BETA"]),
|
|
"run_number": "1",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"timeout": "30"
|
|
})
|
|
|
|
self.log(f"Created test pipeline: {pipeline_id}")
|
|
self.test_pipeline_id = pipeline_id
|
|
return pipeline_id
|
|
|
|
def simulate_orchestrator_abort(self, pipeline_id: str) -> bool:
|
|
"""Simulate an orchestrator abort due to iteration_limit."""
|
|
try:
|
|
task_id = self.redis.hget(f"pipeline:{pipeline_id}", "task_id")
|
|
|
|
# Simulate agent proposals being written to blackboard
|
|
proposals = [
|
|
{"agent": "ALPHA", "key": "proposal_1", "value": {"solution": "Solution A approach"}, "version": 1},
|
|
{"agent": "BETA", "key": "proposal_1", "value": {"solution": "Solution B approach"}, "version": 1},
|
|
]
|
|
|
|
for p in proposals:
|
|
self.redis.hset(
|
|
f"blackboard:{task_id}:solutions",
|
|
f"{p['agent']}_{p['key']}",
|
|
json.dumps(p)
|
|
)
|
|
|
|
# Simulate agent state
|
|
self.redis.hset(f"agents:{task_id}", mapping={
|
|
"ALPHA": json.dumps({"role": "ALPHA", "status": "WORKING", "progress": 0.7}),
|
|
"BETA": json.dumps({"role": "BETA", "status": "WORKING", "progress": 0.6}),
|
|
})
|
|
|
|
# Simulate handoff dump (what orchestrator does before abort)
|
|
handoff_key = f"handoff:{pipeline_id}:agents"
|
|
handoff = {
|
|
"pipeline_id": pipeline_id,
|
|
"task_id": task_id,
|
|
"dump_time": datetime.utcnow().isoformat(),
|
|
"iteration_count": 12,
|
|
"max_iterations": 10,
|
|
"gamma_active": False,
|
|
"proposals": proposals,
|
|
"synthesis_attempts": [
|
|
{"agent": "ALPHA", "key": "synthesis_1", "value": {"merged": "Combined approach"}}
|
|
],
|
|
"consensus_state": [],
|
|
"problem_analysis": [
|
|
{"key": "analysis", "value": {"complexity_score": 0.85}, "author": "ALPHA"}
|
|
],
|
|
"agent_states": [
|
|
{"role": "ALPHA", "status": "WORKING", "progress": 0.7},
|
|
{"role": "BETA", "status": "WORKING", "progress": 0.6},
|
|
],
|
|
"message_summary": {
|
|
"alpha_last_messages": [],
|
|
"beta_last_messages": [],
|
|
"gamma_last_messages": []
|
|
},
|
|
"recovery_hints": [
|
|
"Iteration limit (10) exceeded after 12 iterations",
|
|
"GAMMA was not spawned",
|
|
"2 proposals generated, 1 synthesis attempts"
|
|
]
|
|
}
|
|
|
|
self.redis.set(handoff_key, json.dumps(handoff), ex=86400)
|
|
self.redis.hset(f"pipeline:{pipeline_id}", "handoff_key", handoff_key)
|
|
self.redis.hset(f"pipeline:{pipeline_id}", "handoff_time", handoff["dump_time"])
|
|
|
|
self.log(f"Simulated handoff dump: {len(proposals)} proposals")
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to simulate abort: {e}", "ERROR")
|
|
return False
|
|
|
|
def trigger_auto_recovery(self, pipeline_id: str) -> dict:
|
|
"""Trigger auto-recovery by simulating the orchestration completion with abort."""
|
|
try:
|
|
task_id = self.redis.hget(f"pipeline:{pipeline_id}", "task_id")
|
|
objective = self.redis.hget(f"pipeline:{pipeline_id}", "objective")
|
|
|
|
# Set abort state
|
|
self.redis.hset(f"pipeline:{pipeline_id}", mapping={
|
|
"status": "ABORTED",
|
|
"final_consensus": "false",
|
|
"abort_reason": "iteration_limit"
|
|
})
|
|
|
|
# Call the failure context recording
|
|
metrics = {
|
|
"abort_reason": "iteration_limit",
|
|
"iteration_count": 12,
|
|
"gamma_spawned": False
|
|
}
|
|
|
|
# Simulate what the server does on exit code 3
|
|
# Record failure context
|
|
failure_context = {
|
|
"pipeline_id": pipeline_id,
|
|
"task_id": task_id,
|
|
"objective": objective,
|
|
"failure_time": datetime.utcnow().isoformat(),
|
|
"metrics": metrics,
|
|
"proposals": json.loads(self.redis.get(f"handoff:{pipeline_id}:agents") or "{}").get("proposals", []),
|
|
"agent_states": [],
|
|
"conflict_history": [],
|
|
"blackboard_snapshot": {},
|
|
"run_number": 1,
|
|
"handoff_ref": f"handoff:{pipeline_id}:agents"
|
|
}
|
|
|
|
# Store failure context
|
|
failure_key = f"consensus_failure:{pipeline_id}:run_1"
|
|
self.redis.set(failure_key, json.dumps(failure_context))
|
|
self.redis.rpush(f"consensus_failures:{pipeline_id}", failure_key)
|
|
|
|
# Create recovery pipeline
|
|
recovery_id = f"pipeline-recovery-{int(time.time() * 1000)}"
|
|
|
|
context_summary = {
|
|
"prior_run": 1,
|
|
"prior_pipeline": pipeline_id,
|
|
"handoff_ref": f"handoff:{pipeline_id}:agents",
|
|
"failure_reason": "iteration_limit",
|
|
"iteration_count": 12,
|
|
"prior_proposals": failure_context["proposals"][:5],
|
|
"recovery_hints": [
|
|
"Previous run aborted after 12 iterations",
|
|
"GAMMA was not spawned - will be forced this time",
|
|
"2 proposals were generated"
|
|
]
|
|
}
|
|
|
|
# Store inherited handoff
|
|
inherited_key = f"handoff:{recovery_id}:inherited"
|
|
self.redis.set(inherited_key, json.dumps({
|
|
"from_pipeline": pipeline_id,
|
|
"from_handoff": f"handoff:{pipeline_id}:agents",
|
|
"inherited_at": datetime.utcnow().isoformat(),
|
|
"proposals": failure_context["proposals"],
|
|
"recovery_hints": context_summary["recovery_hints"]
|
|
}), ex=86400)
|
|
|
|
# Create recovery pipeline
|
|
self.redis.hset(f"pipeline:{recovery_id}", mapping={
|
|
"task_id": task_id,
|
|
"objective": f"[RECOVERY ATTEMPT 2] [FORCE GAMMA] {objective}",
|
|
"status": "STARTING",
|
|
"created_at": datetime.utcnow().isoformat(),
|
|
"agents": json.dumps([]),
|
|
"parent_pipeline": pipeline_id,
|
|
"is_recovery": "true",
|
|
"recovery_attempt": "2",
|
|
"run_number": "2",
|
|
"prior_context": json.dumps(context_summary),
|
|
"inherited_handoff": inherited_key,
|
|
"force_gamma": "true",
|
|
"model": "anthropic/claude-sonnet-4",
|
|
"timeout": "60",
|
|
"auto_continue": "true"
|
|
})
|
|
|
|
# Update original pipeline
|
|
self.redis.hset(f"pipeline:{pipeline_id}", mapping={
|
|
"status": "REBOOTING",
|
|
"recovery_pipeline": recovery_id,
|
|
"recovery_triggered_at": datetime.utcnow().isoformat()
|
|
})
|
|
|
|
# Track recovery metrics
|
|
self.redis.hset(f"recovery:{pipeline_id}", mapping={
|
|
"retry_count": "2",
|
|
"abort_reason": "iteration_limit",
|
|
"latest_recovery": recovery_id,
|
|
"handoff_ref": f"handoff:{pipeline_id}:agents",
|
|
"proposals_passed": str(len(failure_context["proposals"])),
|
|
"last_attempt": datetime.utcnow().isoformat()
|
|
})
|
|
|
|
self.log(f"Created recovery pipeline: {recovery_id}")
|
|
|
|
return {
|
|
"success": True,
|
|
"recovery_pipeline_id": recovery_id,
|
|
"proposals_inherited": len(failure_context["proposals"])
|
|
}
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to trigger auto-recovery: {e}", "ERROR")
|
|
return {"success": False, "error": str(e)}
|
|
|
|
def verify_handoff_dump(self, pipeline_id: str) -> dict:
|
|
"""Verify the handoff JSON was properly dumped."""
|
|
test_name = "Handoff Dump Verification"
|
|
|
|
try:
|
|
handoff_key = f"handoff:{pipeline_id}:agents"
|
|
handoff_data = self.redis.get(handoff_key)
|
|
|
|
if not handoff_data:
|
|
return {"test": test_name, "passed": False, "error": "No handoff data found"}
|
|
|
|
handoff = json.loads(handoff_data)
|
|
|
|
checks = {
|
|
"has_proposals": len(handoff.get("proposals", [])) > 0,
|
|
"has_iteration_count": "iteration_count" in handoff,
|
|
"has_agent_states": "agent_states" in handoff,
|
|
"has_recovery_hints": len(handoff.get("recovery_hints", [])) > 0,
|
|
"has_dump_time": "dump_time" in handoff
|
|
}
|
|
|
|
passed = all(checks.values())
|
|
|
|
return {
|
|
"test": test_name,
|
|
"passed": passed,
|
|
"checks": checks,
|
|
"proposals_count": len(handoff.get("proposals", [])),
|
|
"iteration_count": handoff.get("iteration_count")
|
|
}
|
|
|
|
except Exception as e:
|
|
return {"test": test_name, "passed": False, "error": str(e)}
|
|
|
|
def verify_recovery_pipeline(self, original_id: str) -> dict:
|
|
"""Verify a recovery pipeline was properly created."""
|
|
test_name = "Recovery Pipeline Creation"
|
|
|
|
try:
|
|
recovery_id = self.redis.hget(f"pipeline:{original_id}", "recovery_pipeline")
|
|
|
|
if not recovery_id:
|
|
return {"test": test_name, "passed": False, "error": "No recovery pipeline found"}
|
|
|
|
recovery_data = self.redis.hgetall(f"pipeline:{recovery_id}")
|
|
|
|
checks = {
|
|
"is_recovery_flag": recovery_data.get("is_recovery") == "true",
|
|
"has_parent_pipeline": recovery_data.get("parent_pipeline") == original_id,
|
|
"has_force_gamma": recovery_data.get("force_gamma") == "true",
|
|
"has_inherited_handoff": "inherited_handoff" in recovery_data,
|
|
"has_prior_context": "prior_context" in recovery_data,
|
|
"run_number_incremented": int(recovery_data.get("run_number", 0)) > 1
|
|
}
|
|
|
|
passed = all(checks.values())
|
|
|
|
return {
|
|
"test": test_name,
|
|
"passed": passed,
|
|
"recovery_pipeline_id": recovery_id,
|
|
"checks": checks,
|
|
"run_number": recovery_data.get("run_number")
|
|
}
|
|
|
|
except Exception as e:
|
|
return {"test": test_name, "passed": False, "error": str(e)}
|
|
|
|
def verify_inherited_context(self, recovery_id: str) -> dict:
|
|
"""Verify the recovery pipeline properly inherited context."""
|
|
test_name = "Inherited Context Verification"
|
|
|
|
try:
|
|
inherited_key = self.redis.hget(f"pipeline:{recovery_id}", "inherited_handoff")
|
|
|
|
if not inherited_key:
|
|
return {"test": test_name, "passed": False, "error": "No inherited handoff key"}
|
|
|
|
inherited_data = self.redis.get(inherited_key)
|
|
|
|
if not inherited_data:
|
|
return {"test": test_name, "passed": False, "error": "Inherited data not found"}
|
|
|
|
inherited = json.loads(inherited_data)
|
|
|
|
checks = {
|
|
"has_from_pipeline": "from_pipeline" in inherited,
|
|
"has_proposals": len(inherited.get("proposals", [])) > 0,
|
|
"has_recovery_hints": len(inherited.get("recovery_hints", [])) > 0,
|
|
"has_inherited_at": "inherited_at" in inherited
|
|
}
|
|
|
|
passed = all(checks.values())
|
|
|
|
return {
|
|
"test": test_name,
|
|
"passed": passed,
|
|
"checks": checks,
|
|
"proposals_inherited": len(inherited.get("proposals", []))
|
|
}
|
|
|
|
except Exception as e:
|
|
return {"test": test_name, "passed": False, "error": str(e)}
|
|
|
|
def verify_retry_tracking(self, original_id: str) -> dict:
|
|
"""Verify retry_count, abort_reason, and handoff references are tracked."""
|
|
test_name = "Retry Tracking Verification"
|
|
|
|
try:
|
|
recovery_data = self.redis.hgetall(f"recovery:{original_id}")
|
|
|
|
if not recovery_data:
|
|
return {"test": test_name, "passed": False, "error": "No recovery tracking data"}
|
|
|
|
checks = {
|
|
"has_retry_count": "retry_count" in recovery_data,
|
|
"has_abort_reason": "abort_reason" in recovery_data,
|
|
"has_handoff_ref": "handoff_ref" in recovery_data,
|
|
"has_latest_recovery": "latest_recovery" in recovery_data,
|
|
"abort_reason_correct": recovery_data.get("abort_reason") == "iteration_limit"
|
|
}
|
|
|
|
passed = all(checks.values())
|
|
|
|
return {
|
|
"test": test_name,
|
|
"passed": passed,
|
|
"checks": checks,
|
|
"retry_count": recovery_data.get("retry_count"),
|
|
"abort_reason": recovery_data.get("abort_reason")
|
|
}
|
|
|
|
except Exception as e:
|
|
return {"test": test_name, "passed": False, "error": str(e)}
|
|
|
|
def verify_original_status(self, original_id: str) -> dict:
|
|
"""Verify the original pipeline status was updated correctly."""
|
|
test_name = "Original Pipeline Status"
|
|
|
|
try:
|
|
original_data = self.redis.hgetall(f"pipeline:{original_id}")
|
|
|
|
checks = {
|
|
"status_rebooting": original_data.get("status") == "REBOOTING",
|
|
"has_recovery_pipeline": "recovery_pipeline" in original_data,
|
|
"has_recovery_triggered_at": "recovery_triggered_at" in original_data
|
|
}
|
|
|
|
passed = all(checks.values())
|
|
|
|
return {
|
|
"test": test_name,
|
|
"passed": passed,
|
|
"checks": checks,
|
|
"status": original_data.get("status"),
|
|
"recovery_pipeline": original_data.get("recovery_pipeline")
|
|
}
|
|
|
|
except Exception as e:
|
|
return {"test": test_name, "passed": False, "error": str(e)}
|
|
|
|
def cleanup(self, pipeline_ids: list):
|
|
"""Clean up test pipelines."""
|
|
for pid in pipeline_ids:
|
|
try:
|
|
keys = self.redis.keys(f"*{pid}*")
|
|
if keys:
|
|
self.redis.delete(*keys)
|
|
self.log(f"Cleaned up: {pid}")
|
|
except Exception:
|
|
pass
|
|
|
|
def run_all_tests(self) -> dict:
|
|
"""Run the complete end-to-end test."""
|
|
print("\n" + "=" * 70)
|
|
print("END-TO-END AUTO-RECOVERY TEST")
|
|
print("=" * 70 + "\n")
|
|
|
|
# Setup
|
|
pipeline_id = self.setup_test_pipeline()
|
|
|
|
# Step 1: Simulate orchestrator dumping handoff before abort
|
|
self.log("Step 1: Simulating orchestrator abort with handoff dump...")
|
|
self.simulate_orchestrator_abort(pipeline_id)
|
|
|
|
# Step 2: Trigger auto-recovery
|
|
self.log("Step 2: Triggering auto-recovery...")
|
|
recovery_result = self.trigger_auto_recovery(pipeline_id)
|
|
|
|
if not recovery_result["success"]:
|
|
print(f"\nFAILED: Auto-recovery trigger failed: {recovery_result.get('error')}")
|
|
return {"passed": 0, "failed": 1, "tests": []}
|
|
|
|
recovery_id = recovery_result["recovery_pipeline_id"]
|
|
|
|
# Run verification tests
|
|
self.log("\nStep 3: Running verification tests...")
|
|
|
|
tests = [
|
|
self.verify_handoff_dump(pipeline_id),
|
|
self.verify_recovery_pipeline(pipeline_id),
|
|
self.verify_inherited_context(recovery_id),
|
|
self.verify_retry_tracking(pipeline_id),
|
|
self.verify_original_status(pipeline_id),
|
|
]
|
|
|
|
passed = 0
|
|
failed = 0
|
|
|
|
for result in tests:
|
|
status = "PASS" if result["passed"] else "FAIL"
|
|
symbol = "+" if result["passed"] else "x"
|
|
|
|
print(f" {symbol} {status}: {result['test']}")
|
|
|
|
if result["passed"]:
|
|
passed += 1
|
|
else:
|
|
failed += 1
|
|
if "error" in result:
|
|
print(f" Error: {result['error']}")
|
|
elif "checks" in result:
|
|
failed_checks = [k for k, v in result["checks"].items() if not v]
|
|
print(f" Failed checks: {', '.join(failed_checks)}")
|
|
|
|
print(f"\n{'=' * 70}")
|
|
print(f"RESULTS: {passed}/{passed + failed} passed")
|
|
print(f"{'=' * 70}")
|
|
|
|
# Show recovery chain summary
|
|
print("\nRECOVERY CHAIN SUMMARY:")
|
|
print(f" Original Pipeline: {pipeline_id}")
|
|
print(f" Status: REBOOTING")
|
|
print(f" Recovery Pipeline: {recovery_id}")
|
|
print(f" Proposals Inherited: {recovery_result['proposals_inherited']}")
|
|
|
|
retry_data = self.redis.hgetall(f"recovery:{pipeline_id}")
|
|
print(f" Retry Count: {retry_data.get('retry_count', 'N/A')}")
|
|
print(f" Abort Reason: {retry_data.get('abort_reason', 'N/A')}")
|
|
print(f" Handoff Ref: {retry_data.get('handoff_ref', 'N/A')}")
|
|
|
|
# Cleanup
|
|
self.log("\nCleaning up test data...")
|
|
self.cleanup([pipeline_id, recovery_id])
|
|
|
|
print(f"\n{'=' * 70}\n")
|
|
|
|
return {
|
|
"passed": passed,
|
|
"failed": failed,
|
|
"tests": tests
|
|
}
|
|
|
|
|
|
def main():
|
|
"""Run E2E auto-recovery test."""
|
|
tester = E2EAutoRecoveryTest()
|
|
results = tester.run_all_tests()
|
|
|
|
return 0 if results["failed"] == 0 else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|