#!/usr/bin/env python3 """ Architectural Test Pipeline =========================== Multi-layer oversight system for continuous validation across all 12 phases. Layers: 1. Bug Window Watcher - Real-time anomaly detection 2. Suggestion Engine - AI-driven fix recommendations 3. Council Review - Multi-agent decision making 4. Phase Validator - Coverage across all phases 5. Error Injector - Controlled fault injection 6. Reporter - Comprehensive reporting Usage: # Run full validation python pipeline.py run # Run with injection tests python pipeline.py run --inject # Validate specific phase python pipeline.py validate --phase 5 # Generate report only python pipeline.py report """ import json import sys import time from datetime import datetime, timezone from dataclasses import dataclass, field, asdict from pathlib import Path from typing import Optional from .bug_watcher import BugWindowWatcher, Anomaly, Severity from .suggestion_engine import SuggestionEngine, Suggestion from .council import CouncilReview, Decision, DecisionType from .phase_validator import PhaseValidator, ValidationLevel from .error_injector import ErrorInjector from .reporter import OversightReporter, OversightReport @dataclass class PipelineConfig: """Configuration for the pipeline""" base_path: str = "/opt/agent-governance" validation_level: ValidationLevel = ValidationLevel.STANDARD run_injections: bool = False safe_mode: bool = True focus_phase: Optional[int] = None # Special attention phase (default: 5) max_suggestions_per_anomaly: int = 3 auto_fix_enabled: bool = False generate_report: bool = True verbose: bool = False @dataclass class PipelineResult: """Result of pipeline execution""" success: bool started_at: str completed_at: str duration_ms: int phases_validated: int anomalies_detected: int suggestions_generated: int council_decisions: int auto_fixes_applied: int injection_tests_run: int injection_tests_passed: int report_id: Optional[str] = None errors: list[str] = field(default_factory=list) class ArchitecturalTestPipeline: """ Main orchestrator for the architectural test pipeline. Runs all oversight layers in sequence: 1. Phase Validation - Ensure all phases have required components 2. Bug Detection - Scan for anomalies across all phases 3. Suggestion Generation - Create fix recommendations 4. Council Review - Multi-perspective decision making 5. Auto-Fix (if enabled) - Apply approved low-risk fixes 6. Injection Testing (if enabled) - Verify oversight works 7. Reporting - Generate comprehensive report """ def __init__(self, config: Optional[PipelineConfig] = None): self.config = config or PipelineConfig() self.base_path = Path(self.config.base_path) # Initialize components self.watcher = BugWindowWatcher(str(self.base_path)) self.suggestion_engine = SuggestionEngine(str(self.base_path)) self.council = CouncilReview(str(self.base_path)) self.phase_validator = PhaseValidator(str(self.base_path)) self.error_injector = ErrorInjector(str(self.base_path), safe_mode=self.config.safe_mode) self.reporter = OversightReporter(str(self.base_path)) # Results tracking self.anomalies: list[Anomaly] = [] self.suggestions: list[Suggestion] = [] self.decisions: list[Decision] = [] def _now(self) -> str: return datetime.now(timezone.utc).isoformat() def _log(self, message: str, level: str = "info"): """Log a message""" if self.config.verbose or level in ["error", "warning"]: timestamp = datetime.now().strftime("%H:%M:%S") icons = {"info": "ℹ️", "success": "✅", "warning": "⚠️", "error": "❌", "step": "➡️"} icon = icons.get(level, "•") print(f"[{timestamp}] {icon} {message}") def run(self) -> PipelineResult: """Run the full pipeline""" start_time = time.time() started_at = self._now() errors = [] self._log("Starting Architectural Test Pipeline", "step") print("=" * 60) # 1. Phase Validation self._log("Phase 1/7: Validating all phases...", "step") try: validation_results = self.phase_validator.validate_all_phases(self.config.validation_level) phases_validated = len(validation_results) self._log(f"Validated {phases_validated} phases", "success") # Special attention to focus phase focus = self.config.focus_phase or 5 if focus in validation_results: focus_result = validation_results[focus] self._log(f"Phase {focus} ({focus_result.phase_name}): {focus_result.status.value}, {focus_result.coverage_percent:.1f}% coverage", "info") except Exception as e: errors.append(f"Phase validation failed: {e}") self._log(f"Phase validation error: {e}", "error") phases_validated = 0 # 2. Bug Detection self._log("Phase 2/7: Scanning for anomalies...", "step") try: self.watcher.start() if self.config.focus_phase: self.anomalies = self.watcher.scan_phase(self.config.focus_phase) else: self.anomalies = self.watcher.scan_all_phases() self._log(f"Detected {len(self.anomalies)} anomalies", "success") # Report critical anomalies critical = [a for a in self.anomalies if a.severity == Severity.CRITICAL] if critical: self._log(f"⚠️ {len(critical)} CRITICAL anomalies found!", "warning") except Exception as e: errors.append(f"Bug detection failed: {e}") self._log(f"Bug detection error: {e}", "error") # 3. Suggestion Generation self._log("Phase 3/7: Generating suggestions...", "step") try: for anomaly in self.anomalies[:20]: # Limit for performance suggestions = self.suggestion_engine.generate_suggestions(anomaly) self.suggestions.extend(suggestions[:self.config.max_suggestions_per_anomaly]) self._log(f"Generated {len(self.suggestions)} suggestions", "success") except Exception as e: errors.append(f"Suggestion generation failed: {e}") self._log(f"Suggestion generation error: {e}", "error") # 4. Council Review self._log("Phase 4/7: Council reviewing suggestions...", "step") try: for suggestion in self.suggestions[:15]: # Limit for performance decision = self.council.review_suggestion(suggestion) self.decisions.append(decision) auto_approved = sum(1 for d in self.decisions if d.decision == DecisionType.AUTO_APPROVE) human_approved = sum(1 for d in self.decisions if d.decision == DecisionType.HUMAN_APPROVE) rejected = sum(1 for d in self.decisions if d.decision == DecisionType.REJECT) self._log(f"Council decisions: {auto_approved} auto-approve, {human_approved} human-approve, {rejected} rejected", "success") except Exception as e: errors.append(f"Council review failed: {e}") self._log(f"Council review error: {e}", "error") # 5. Auto-Fix (if enabled) auto_fixes_applied = 0 if self.config.auto_fix_enabled: self._log("Phase 5/7: Applying auto-fixes...", "step") auto_approved = [d for d in self.decisions if d.auto_fix_approved] self._log(f"Auto-fix disabled in safe mode. {len(auto_approved)} fixes would be applied.", "info") else: self._log("Phase 5/7: Auto-fix disabled, skipping...", "step") # 6. Injection Testing injection_tests_run = 0 injection_tests_passed = 0 if self.config.run_injections: self._log("Phase 6/7: Running injection tests...", "step") try: scenarios = list(self.error_injector.SCENARIOS.keys())[:4] # Limit for scenario in scenarios: result = self.error_injector.run_scenario(scenario) injection_tests_run += 1 if result.test_passed: injection_tests_passed += 1 self._log(f" ✅ {scenario}: PASSED", "info") else: self._log(f" ❌ {scenario}: FAILED", "warning") self._log(f"Injection tests: {injection_tests_passed}/{injection_tests_run} passed", "success") except Exception as e: errors.append(f"Injection testing failed: {e}") self._log(f"Injection testing error: {e}", "error") else: self._log("Phase 6/7: Injection tests disabled, skipping...", "step") # 7. Generate Report report_id = None if self.config.generate_report: self._log("Phase 7/7: Generating report...", "step") try: report = self.reporter.generate_report(include_injections=self.config.run_injections) report_id = report.report_id self._log(f"Report generated: {report_id}", "success") except Exception as e: errors.append(f"Report generation failed: {e}") self._log(f"Report generation error: {e}", "error") else: self._log("Phase 7/7: Report generation disabled, skipping...", "step") # Calculate duration duration_ms = int((time.time() - start_time) * 1000) completed_at = self._now() # Determine success success = len(errors) == 0 print("=" * 60) self._log(f"Pipeline {'completed successfully' if success else 'completed with errors'}", "success" if success else "warning") self._log(f"Duration: {duration_ms}ms", "info") return PipelineResult( success=success, started_at=started_at, completed_at=completed_at, duration_ms=duration_ms, phases_validated=phases_validated, anomalies_detected=len(self.anomalies), suggestions_generated=len(self.suggestions), council_decisions=len(self.decisions), auto_fixes_applied=auto_fixes_applied, injection_tests_run=injection_tests_run, injection_tests_passed=injection_tests_passed, report_id=report_id, errors=errors ) def run_quick_validation(self) -> dict: """Run a quick validation without full pipeline""" self._log("Running quick validation...", "step") # Just validate phases and scan for anomalies validation_results = self.phase_validator.validate_all_phases(ValidationLevel.BASIC) self.watcher.start() anomalies = self.watcher.scan_all_phases() summary = self.phase_validator.get_summary() watcher_summary = self.watcher.get_summary() return { "phases": summary, "anomalies": watcher_summary, "critical_issues": summary.get('critical_gaps', []), "phase_5_status": summary.get('phase_5_status', 'unknown') } def validate_phase(self, phase_num: int) -> dict: """Validate a specific phase in detail""" self._log(f"Validating Phase {phase_num}...", "step") # Validate phase result = self.phase_validator.validate_phase(phase_num, self.config.validation_level) # Scan for anomalies self.watcher.start() anomalies = self.watcher.scan_phase(phase_num) # Generate suggestions for anomalies suggestions = [] for anomaly in anomalies[:10]: sugs = self.suggestion_engine.generate_suggestions(anomaly) suggestions.extend(sugs[:2]) # Council review decisions = [] for sug in suggestions[:5]: decision = self.council.review_suggestion(sug) decisions.append({ "suggestion": sug.title, "decision": decision.decision.value, "auto_fix": decision.auto_fix_approved }) return { "phase": phase_num, "name": result.phase_name, "status": result.status.value, "coverage": result.coverage_percent, "anomalies": len(anomalies), "suggestions": len(suggestions), "decisions": decisions, "gaps": result.gaps, "recommendations": result.recommendations } def get_status(self) -> dict: """Get current pipeline status""" return { "config": asdict(self.config), "watcher": self.watcher.get_summary() if self.watcher else {}, "suggestions": self.suggestion_engine.get_summary() if self.suggestion_engine else {}, "council": self.council.get_summary() if self.council else {}, "phases": self.phase_validator.get_summary() if self.phase_validator else {} } def main(): """CLI entry point""" import argparse parser = argparse.ArgumentParser( description="Architectural Test Pipeline - Multi-layer oversight system", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python pipeline.py run # Full pipeline python pipeline.py run --inject # With injection tests python pipeline.py run --phase 5 # Focus on Phase 5 python pipeline.py validate --phase 5 # Validate specific phase python pipeline.py quick # Quick validation python pipeline.py report # Generate report only python pipeline.py matrix # Show phase matrix """ ) parser.add_argument("command", choices=["run", "quick", "validate", "report", "matrix", "status"], help="Command to execute") parser.add_argument("--phase", type=int, help="Focus on specific phase") parser.add_argument("--inject", action="store_true", help="Run injection tests") parser.add_argument("--unsafe", action="store_true", help="Disable safe mode") parser.add_argument("--auto-fix", action="store_true", help="Enable auto-fix") parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") parser.add_argument("--json", action="store_true", help="Output as JSON") args = parser.parse_args() # Build config config = PipelineConfig( run_injections=args.inject, safe_mode=not args.unsafe, focus_phase=args.phase, auto_fix_enabled=args.auto_fix, verbose=args.verbose ) pipeline = ArchitecturalTestPipeline(config) if args.command == "run": result = pipeline.run() if args.json: print(json.dumps(asdict(result), indent=2)) else: print(f"\n{'='*60}") print("PIPELINE RESULT SUMMARY") print(f"{'='*60}") print(f"Status: {'✅ SUCCESS' if result.success else '❌ FAILED'}") print(f"Duration: {result.duration_ms}ms") print(f"Phases Validated: {result.phases_validated}") print(f"Anomalies Detected: {result.anomalies_detected}") print(f"Suggestions Generated: {result.suggestions_generated}") print(f"Council Decisions: {result.council_decisions}") if result.injection_tests_run > 0: print(f"Injection Tests: {result.injection_tests_passed}/{result.injection_tests_run} passed") if result.report_id: print(f"\nReport: testing/oversight/reports/{result.report_id}.md") if result.errors: print(f"\nErrors:") for err in result.errors: print(f" - {err}") elif args.command == "quick": result = pipeline.run_quick_validation() if args.json: print(json.dumps(result, indent=2)) else: print(f"\n{'='*60}") print("QUICK VALIDATION SUMMARY") print(f"{'='*60}") print(f"Phases: {result['phases'].get('phases_validated', 0)}") print(f"Coverage: {result['phases'].get('average_coverage', 0)}%") print(f"Anomalies: {result['anomalies'].get('total_anomalies', 0)}") print(f"Phase 5: {result['phase_5_status']}") if result['critical_issues']: print(f"\nCritical Issues:") for issue in result['critical_issues'][:5]: print(f" - {issue}") elif args.command == "validate" and args.phase: result = pipeline.validate_phase(args.phase) if args.json: print(json.dumps(result, indent=2)) else: print(f"\n{'='*60}") print(f"PHASE {result['phase']} VALIDATION: {result['name']}") print(f"{'='*60}") print(f"Status: {result['status']}") print(f"Coverage: {result['coverage']:.1f}%") print(f"Anomalies: {result['anomalies']}") print(f"Suggestions: {result['suggestions']}") if result['decisions']: print(f"\nCouncil Decisions:") for d in result['decisions']: icon = "🤖" if d['auto_fix'] else "👤" print(f" {icon} {d['decision']}: {d['suggestion'][:50]}...") if result['gaps']: print(f"\nGaps:") for gap in result['gaps'][:5]: print(f" - {gap}") elif args.command == "report": report = pipeline.reporter.generate_report(include_injections=args.inject) if args.json: print(json.dumps(asdict(report), indent=2, default=str)) else: print(report.to_markdown()) elif args.command == "matrix": pipeline.phase_validator.validate_all_phases() print(pipeline.phase_validator.get_phase_matrix()) elif args.command == "status": status = pipeline.get_status() if args.json: print(json.dumps(status, indent=2)) else: print(f"\n{'='*60}") print("PIPELINE STATUS") print(f"{'='*60}") print(f"Safe Mode: {status['config']['safe_mode']}") print(f"Focus Phase: {status['config']['focus_phase'] or 'All'}") print(f"Auto-Fix: {status['config']['auto_fix_enabled']}") if __name__ == "__main__": main()