agent-governance/testing/oversight/pipeline.py

#!/usr/bin/env python3
"""
Architectural Test Pipeline
===========================
Multi-layer oversight system for continuous validation across all 12 phases.

Layers:
1. Bug Window Watcher - Real-time anomaly detection
2. Suggestion Engine - AI-driven fix recommendations
3. Council Review - Multi-agent decision making
4. Phase Validator - Coverage across all phases
5. Error Injector - Controlled fault injection
6. Reporter - Comprehensive reporting

Usage:
    # Run full validation
    python pipeline.py run

    # Run with injection tests
    python pipeline.py run --inject

    # Validate specific phase
    python pipeline.py validate --phase 5

    # Generate report only
    python pipeline.py report
"""

import json
import sys
import time
from datetime import datetime, timezone
from dataclasses import dataclass, field, asdict
from pathlib import Path
from typing import Optional

from .bug_watcher import BugWindowWatcher, Anomaly, Severity
from .suggestion_engine import SuggestionEngine, Suggestion
from .council import CouncilReview, Decision, DecisionType
from .phase_validator import PhaseValidator, ValidationLevel
from .error_injector import ErrorInjector
from .reporter import OversightReporter, OversightReport


@dataclass
class PipelineConfig:
    """Configuration for the pipeline"""
    base_path: str = "/opt/agent-governance"
    validation_level: ValidationLevel = ValidationLevel.STANDARD
    run_injections: bool = False
    safe_mode: bool = True
    focus_phase: Optional[int] = None  # Special attention phase (default: 5)
    max_suggestions_per_anomaly: int = 3
    auto_fix_enabled: bool = False
    generate_report: bool = True
    verbose: bool = False


@dataclass
class PipelineResult:
    """Result of pipeline execution"""
    success: bool
    started_at: str
    completed_at: str
    duration_ms: int
    phases_validated: int
    anomalies_detected: int
    suggestions_generated: int
    council_decisions: int
    auto_fixes_applied: int
    injection_tests_run: int
    injection_tests_passed: int
    report_id: Optional[str] = None
    errors: list[str] = field(default_factory=list)


class ArchitecturalTestPipeline:
    """
    Main orchestrator for the architectural test pipeline.

    Runs all oversight layers in sequence:
    1. Phase Validation - Ensure all phases have required components
    2. Bug Detection - Scan for anomalies across all phases
    3. Suggestion Generation - Create fix recommendations
    4. Council Review - Multi-perspective decision making
    5. Auto-Fix (if enabled) - Apply approved low-risk fixes
    6. Injection Testing (if enabled) - Verify oversight works
    7. Reporting - Generate comprehensive report
    """

    def __init__(self, config: Optional[PipelineConfig] = None):
        self.config = config or PipelineConfig()
        self.base_path = Path(self.config.base_path)

        # Initialize components
        self.watcher = BugWindowWatcher(str(self.base_path))
        self.suggestion_engine = SuggestionEngine(str(self.base_path))
        self.council = CouncilReview(str(self.base_path))
        self.phase_validator = PhaseValidator(str(self.base_path))
        self.error_injector = ErrorInjector(str(self.base_path), safe_mode=self.config.safe_mode)
        self.reporter = OversightReporter(str(self.base_path))

        # Results tracking
        self.anomalies: list[Anomaly] = []
        self.suggestions: list[Suggestion] = []
        self.decisions: list[Decision] = []

    def _now(self) -> str:
        return datetime.now(timezone.utc).isoformat()

    def _log(self, message: str, level: str = "info"):
        """Log a message"""
        if self.config.verbose or level in ["error", "warning"]:
            timestamp = datetime.now().strftime("%H:%M:%S")
            icons = {"info": "ℹ️", "success": "✅", "warning": "⚠️", "error": "❌", "step": "➡️"}
            icon = icons.get(level, "•")
            print(f"[{timestamp}] {icon} {message}")

    def run(self) -> PipelineResult:
        """Run the full pipeline"""
        start_time = time.time()
        started_at = self._now()
        errors = []

        self._log("Starting Architectural Test Pipeline", "step")
        print("=" * 60)

        # 1. Phase Validation
        self._log("Phase 1/7: Validating all phases...", "step")
        try:
            validation_results = self.phase_validator.validate_all_phases(self.config.validation_level)
            phases_validated = len(validation_results)
            self._log(f"Validated {phases_validated} phases", "success")

            # Special attention to focus phase
            focus = self.config.focus_phase or 5
            if focus in validation_results:
                focus_result = validation_results[focus]
                self._log(f"Phase {focus} ({focus_result.phase_name}): {focus_result.status.value}, {focus_result.coverage_percent:.1f}% coverage", "info")
        except Exception as e:
            errors.append(f"Phase validation failed: {e}")
            self._log(f"Phase validation error: {e}", "error")
            phases_validated = 0

        # 2. Bug Detection
        self._log("Phase 2/7: Scanning for anomalies...", "step")
        try:
            self.watcher.start()

            if self.config.focus_phase:
                self.anomalies = self.watcher.scan_phase(self.config.focus_phase)
            else:
                self.anomalies = self.watcher.scan_all_phases()

            self._log(f"Detected {len(self.anomalies)} anomalies", "success")

            # Report critical anomalies
            critical = [a for a in self.anomalies if a.severity == Severity.CRITICAL]
            if critical:
                self._log(f"⚠️ {len(critical)} CRITICAL anomalies found!", "warning")
        except Exception as e:
            errors.append(f"Bug detection failed: {e}")
            self._log(f"Bug detection error: {e}", "error")

        # 3. Suggestion Generation
        self._log("Phase 3/7: Generating suggestions...", "step")
        try:
            for anomaly in self.anomalies[:20]:  # Limit for performance
                suggestions = self.suggestion_engine.generate_suggestions(anomaly)
                self.suggestions.extend(suggestions[:self.config.max_suggestions_per_anomaly])

            self._log(f"Generated {len(self.suggestions)} suggestions", "success")
        except Exception as e:
            errors.append(f"Suggestion generation failed: {e}")
            self._log(f"Suggestion generation error: {e}", "error")

        # 4. Council Review
        self._log("Phase 4/7: Council reviewing suggestions...", "step")
        try:
            for suggestion in self.suggestions[:15]:  # Limit for performance
                decision = self.council.review_suggestion(suggestion)
                self.decisions.append(decision)

            auto_approved = sum(1 for d in self.decisions if d.decision == DecisionType.AUTO_APPROVE)
            human_approved = sum(1 for d in self.decisions if d.decision == DecisionType.HUMAN_APPROVE)
            rejected = sum(1 for d in self.decisions if d.decision == DecisionType.REJECT)

            self._log(f"Council decisions: {auto_approved} auto-approve, {human_approved} human-approve, {rejected} rejected", "success")
        except Exception as e:
            errors.append(f"Council review failed: {e}")
            self._log(f"Council review error: {e}", "error")

        # 5. Auto-Fix (if enabled)
        auto_fixes_applied = 0
        if self.config.auto_fix_enabled:
            self._log("Phase 5/7: Applying auto-fixes...", "step")
            auto_approved = [d for d in self.decisions if d.auto_fix_approved]
            self._log(f"Auto-fix disabled in safe mode. {len(auto_approved)} fixes would be applied.", "info")
        else:
            self._log("Phase 5/7: Auto-fix disabled, skipping...", "step")

        # 6. Injection Testing
        injection_tests_run = 0
        injection_tests_passed = 0

        if self.config.run_injections:
            self._log("Phase 6/7: Running injection tests...", "step")
            try:
                scenarios = list(self.error_injector.SCENARIOS.keys())[:4]  # Limit

                for scenario in scenarios:
                    result = self.error_injector.run_scenario(scenario)
                    injection_tests_run += 1
                    if result.test_passed:
                        injection_tests_passed += 1
                        self._log(f"  ✅ {scenario}: PASSED", "info")
                    else:
                        self._log(f"  ❌ {scenario}: FAILED", "warning")

                self._log(f"Injection tests: {injection_tests_passed}/{injection_tests_run} passed", "success")
            except Exception as e:
                errors.append(f"Injection testing failed: {e}")
                self._log(f"Injection testing error: {e}", "error")
        else:
            self._log("Phase 6/7: Injection tests disabled, skipping...", "step")

        # 7. Generate Report
        report_id = None
        if self.config.generate_report:
            self._log("Phase 7/7: Generating report...", "step")
            try:
                report = self.reporter.generate_report(include_injections=self.config.run_injections)
                report_id = report.report_id
                self._log(f"Report generated: {report_id}", "success")
            except Exception as e:
                errors.append(f"Report generation failed: {e}")
                self._log(f"Report generation error: {e}", "error")
        else:
            self._log("Phase 7/7: Report generation disabled, skipping...", "step")

        # Calculate duration
        duration_ms = int((time.time() - start_time) * 1000)
        completed_at = self._now()

        # Determine success
        success = len(errors) == 0

        print("=" * 60)
        self._log(f"Pipeline {'completed successfully' if success else 'completed with errors'}", "success" if success else "warning")
        self._log(f"Duration: {duration_ms}ms", "info")

        return PipelineResult(
            success=success,
            started_at=started_at,
            completed_at=completed_at,
            duration_ms=duration_ms,
            phases_validated=phases_validated,
            anomalies_detected=len(self.anomalies),
            suggestions_generated=len(self.suggestions),
            council_decisions=len(self.decisions),
            auto_fixes_applied=auto_fixes_applied,
            injection_tests_run=injection_tests_run,
            injection_tests_passed=injection_tests_passed,
            report_id=report_id,
            errors=errors
        )

    def run_quick_validation(self) -> dict:
        """Run a quick validation without full pipeline"""
        self._log("Running quick validation...", "step")

        # Just validate phases and scan for anomalies
        validation_results = self.phase_validator.validate_all_phases(ValidationLevel.BASIC)
        self.watcher.start()
        anomalies = self.watcher.scan_all_phases()

        summary = self.phase_validator.get_summary()
        watcher_summary = self.watcher.get_summary()

        return {
            "phases": summary,
            "anomalies": watcher_summary,
            "critical_issues": summary.get('critical_gaps', []),
            "phase_5_status": summary.get('phase_5_status', 'unknown')
        }

    def validate_phase(self, phase_num: int) -> dict:
        """Validate a specific phase in detail"""
        self._log(f"Validating Phase {phase_num}...", "step")

        # Validate phase
        result = self.phase_validator.validate_phase(phase_num, self.config.validation_level)

        # Scan for anomalies
        self.watcher.start()
        anomalies = self.watcher.scan_phase(phase_num)

        # Generate suggestions for anomalies
        suggestions = []
        for anomaly in anomalies[:10]:
            sugs = self.suggestion_engine.generate_suggestions(anomaly)
            suggestions.extend(sugs[:2])

        # Council review
        decisions = []
        for sug in suggestions[:5]:
            decision = self.council.review_suggestion(sug)
            decisions.append({
                "suggestion": sug.title,
                "decision": decision.decision.value,
                "auto_fix": decision.auto_fix_approved
            })

        return {
            "phase": phase_num,
            "name": result.phase_name,
            "status": result.status.value,
            "coverage": result.coverage_percent,
            "anomalies": len(anomalies),
            "suggestions": len(suggestions),
            "decisions": decisions,
            "gaps": result.gaps,
            "recommendations": result.recommendations
        }

    def get_status(self) -> dict:
        """Get current pipeline status"""
        return {
            "config": asdict(self.config),
            "watcher": self.watcher.get_summary() if self.watcher else {},
            "suggestions": self.suggestion_engine.get_summary() if self.suggestion_engine else {},
            "council": self.council.get_summary() if self.council else {},
            "phases": self.phase_validator.get_summary() if self.phase_validator else {}
        }


def main():
    """CLI entry point"""
    import argparse

    parser = argparse.ArgumentParser(
        description="Architectural Test Pipeline - Multi-layer oversight system",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python pipeline.py run                    # Full pipeline
  python pipeline.py run --inject           # With injection tests
  python pipeline.py run --phase 5          # Focus on Phase 5
  python pipeline.py validate --phase 5     # Validate specific phase
  python pipeline.py quick                  # Quick validation
  python pipeline.py report                 # Generate report only
  python pipeline.py matrix                 # Show phase matrix
        """
    )

    parser.add_argument("command", choices=["run", "quick", "validate", "report", "matrix", "status"],
                       help="Command to execute")
    parser.add_argument("--phase", type=int, help="Focus on specific phase")
    parser.add_argument("--inject", action="store_true", help="Run injection tests")
    parser.add_argument("--unsafe", action="store_true", help="Disable safe mode")
    parser.add_argument("--auto-fix", action="store_true", help="Enable auto-fix")
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
    parser.add_argument("--json", action="store_true", help="Output as JSON")

    args = parser.parse_args()

    # Build config
    config = PipelineConfig(
        run_injections=args.inject,
        safe_mode=not args.unsafe,
        focus_phase=args.phase,
        auto_fix_enabled=args.auto_fix,
        verbose=args.verbose
    )

    pipeline = ArchitecturalTestPipeline(config)

    if args.command == "run":
        result = pipeline.run()

        if args.json:
            print(json.dumps(asdict(result), indent=2))
        else:
            print(f"\n{'='*60}")
            print("PIPELINE RESULT SUMMARY")
            print(f"{'='*60}")
            print(f"Status: {'✅ SUCCESS' if result.success else '❌ FAILED'}")
            print(f"Duration: {result.duration_ms}ms")
            print(f"Phases Validated: {result.phases_validated}")
            print(f"Anomalies Detected: {result.anomalies_detected}")
            print(f"Suggestions Generated: {result.suggestions_generated}")
            print(f"Council Decisions: {result.council_decisions}")

            if result.injection_tests_run > 0:
                print(f"Injection Tests: {result.injection_tests_passed}/{result.injection_tests_run} passed")

            if result.report_id:
                print(f"\nReport: testing/oversight/reports/{result.report_id}.md")

            if result.errors:
                print(f"\nErrors:")
                for err in result.errors:
                    print(f"  - {err}")

    elif args.command == "quick":
        result = pipeline.run_quick_validation()

        if args.json:
            print(json.dumps(result, indent=2))
        else:
            print(f"\n{'='*60}")
            print("QUICK VALIDATION SUMMARY")
            print(f"{'='*60}")
            print(f"Phases: {result['phases'].get('phases_validated', 0)}")
            print(f"Coverage: {result['phases'].get('average_coverage', 0)}%")
            print(f"Anomalies: {result['anomalies'].get('total_anomalies', 0)}")
            print(f"Phase 5: {result['phase_5_status']}")

            if result['critical_issues']:
                print(f"\nCritical Issues:")
                for issue in result['critical_issues'][:5]:
                    print(f"  - {issue}")

    elif args.command == "validate" and args.phase:
        result = pipeline.validate_phase(args.phase)

        if args.json:
            print(json.dumps(result, indent=2))
        else:
            print(f"\n{'='*60}")
            print(f"PHASE {result['phase']} VALIDATION: {result['name']}")
            print(f"{'='*60}")
            print(f"Status: {result['status']}")
            print(f"Coverage: {result['coverage']:.1f}%")
            print(f"Anomalies: {result['anomalies']}")
            print(f"Suggestions: {result['suggestions']}")

            if result['decisions']:
                print(f"\nCouncil Decisions:")
                for d in result['decisions']:
                    icon = "🤖" if d['auto_fix'] else "👤"
                    print(f"  {icon} {d['decision']}: {d['suggestion'][:50]}...")

            if result['gaps']:
                print(f"\nGaps:")
                for gap in result['gaps'][:5]:
                    print(f"  - {gap}")

    elif args.command == "report":
        report = pipeline.reporter.generate_report(include_injections=args.inject)

        if args.json:
            print(json.dumps(asdict(report), indent=2, default=str))
        else:
            print(report.to_markdown())

    elif args.command == "matrix":
        pipeline.phase_validator.validate_all_phases()
        print(pipeline.phase_validator.get_phase_matrix())

    elif args.command == "status":
        status = pipeline.get_status()

        if args.json:
            print(json.dumps(status, indent=2))
        else:
            print(f"\n{'='*60}")
            print("PIPELINE STATUS")
            print(f"{'='*60}")
            print(f"Safe Mode: {status['config']['safe_mode']}")
            print(f"Focus Phase: {status['config']['focus_phase'] or 'All'}")
            print(f"Auto-Fix: {status['config']['auto_fix_enabled']}")


if __name__ == "__main__":
    main()