agent-governance/tests/governance/test_phase6_orchestration.py

#!/usr/bin/env python3
"""
Phase 6: Multi-Agent Orchestration Tests
=========================================
Tests for model controller, pipeline execution, and multi-agent coordination.

Required tests:
- model_controller: Verify model controller initialization and config
- pipeline_parser: Verify pipeline YAML parsing
- pipeline_validation: Verify schema validation for pipelines
- pipeline_execution: Verify pipeline stage execution
- template_generation: Verify agent templates load and merge configs
- test_execution: Verify pipeline executor runs a simple pipeline
- multi_agent_coordination: Verify agent coordination mechanisms
"""

import json
import os
import sys
from pathlib import Path

# Add paths
BASE_PATH = Path("/opt/agent-governance")
ORCHESTRATOR_PATH = BASE_PATH / "orchestrator"
PIPELINE_PATH = BASE_PATH / "pipeline"
AGENTS_PATH = BASE_PATH / "agents"

sys.path.insert(0, str(ORCHESTRATOR_PATH))
sys.path.insert(0, str(PIPELINE_PATH))
sys.path.insert(0, str(AGENTS_PATH / "multi-agent"))

# Test results
PASSED = 0
FAILED = 0


def log(msg: str, status: str = "info"):
    """Log a message"""
    icons = {"pass": "\033[92m✓\033[0m", "fail": "\033[91m✗\033[0m", "info": "→"}
    print(f"  {icons.get(status, '•')} {msg}")


def load_pipeline_module():
    import importlib.util
    import sys

    core_path = PIPELINE_PATH / "core.py"
    pipeline_path = PIPELINE_PATH / "pipeline.py"

    core_spec = importlib.util.spec_from_file_location("pipeline.core", core_path)
    if core_spec is None or core_spec.loader is None:
        raise ImportError("pipeline.core spec missing")
    core_module = importlib.util.module_from_spec(core_spec)
    core_spec.loader.exec_module(core_module)
    sys.modules["pipeline.core"] = core_module

    pipeline_spec = importlib.util.spec_from_file_location("pipeline", pipeline_path)
    if pipeline_spec is None or pipeline_spec.loader is None:
        raise ImportError("pipeline module spec missing")
    pipeline_module = importlib.util.module_from_spec(pipeline_spec)
    sys.modules["pipeline"] = pipeline_module
    pipeline_spec.loader.exec_module(pipeline_module)

    return pipeline_module


def load_core_module():
    import importlib.util

    core_path = PIPELINE_PATH / "core.py"
    core_spec = importlib.util.spec_from_file_location("pipeline.core", core_path)
    if core_spec is None or core_spec.loader is None:
        raise ImportError("pipeline.core spec missing")
    core_module = importlib.util.module_from_spec(core_spec)
    core_spec.loader.exec_module(core_module)

    return core_module


def test_model_controller():
    """Test model controller initialization and configuration"""
    global PASSED, FAILED

    print("\n[TEST] model_controller")

    # 1. Check model controller module exists
    controller_module = ORCHESTRATOR_PATH / "model_controller.py"
    if not controller_module.exists():
        log(f"Model controller not found: {controller_module}", "fail")
        FAILED += 1
        return False

    log("Model controller module exists", "pass")
    PASSED += 1

    # 2. Check orchestrator config exists
    config_file = ORCHESTRATOR_PATH / "config.json"
    if not config_file.exists():
        log(f"Config file not found: {config_file}", "fail")
        FAILED += 1
        return False

    log("Orchestrator config exists", "pass")
    PASSED += 1

    # 3. Validate config structure
    try:
        with open(config_file) as f:
            config = json.load(f)

        required_sections = ["models", "execution", "safety"]
        for section in required_sections:
            if section in config:
                log(f"Config has '{section}' section", "pass")
                PASSED += 1
            else:
                log(f"Config missing '{section}' section", "fail")
                FAILED += 1
    except json.JSONDecodeError as e:
        log(f"Config JSON invalid: {e}", "fail")
        FAILED += 1
        return False

    # 4. Check model definitions
    if "models" in config:
        models = config["models"]
        expected_models = ["minimax", "gemini", "gemini-pro"]
        for model in expected_models:
            if model in models:
                log(f"Model '{model}' configured", "pass")
                PASSED += 1
            else:
                log(f"Model '{model}' missing", "fail")
                FAILED += 1

    # 5. Check safety config
    if "safety" in config:
        safety = config["safety"]
        if "max_retries" in safety:
            log(f"Safety max_retries: {safety['max_retries']}", "pass")
            PASSED += 1
        if "timeout" in safety:
            log(f"Safety timeout: {safety['timeout']}s", "pass")
            PASSED += 1

    # 6. Try importing the module
    try:
        # Import test - just check syntax
        import importlib.util

        spec = importlib.util.spec_from_file_location(
            "model_controller", controller_module
        )
        if spec is None or spec.loader is None:
            log("Model controller spec missing", "fail")
            FAILED += 1
        else:
            importlib.util.module_from_spec(spec)
            log("Model controller module importable", "pass")
            PASSED += 1
    except Exception as e:
        log(f"Model controller import failed: {e}", "fail")
        FAILED += 1

    return True


def test_pipeline_parser():
    """Test pipeline YAML parsing capabilities"""
    global PASSED, FAILED

    print("\n[TEST] pipeline_parser")

    # 1. Check pipeline module exists
    pipeline_module = PIPELINE_PATH / "pipeline.py"
    if not pipeline_module.exists():
        log(f"Pipeline module not found: {pipeline_module}", "fail")
        FAILED += 1
        return False

    log("Pipeline module exists", "pass")
    PASSED += 1

    # 2. Check core definitions exist
    core_module = PIPELINE_PATH / "core.py"
    if not core_module.exists():
        log(f"Core module not found: {core_module}", "fail")
        FAILED += 1
        return False

    log("Core definitions module exists", "pass")
    PASSED += 1

    # 3. Import core definitions
    try:
        core_module = load_core_module()
        StageType = core_module.StageType
        StageStatus = core_module.StageStatus
        AgentPhase = core_module.AgentPhase
        PipelineContext = core_module.PipelineContext
        RedisKeys = core_module.RedisKeys

        log("Core types importable", "pass")
        PASSED += 1

        # 4. Verify StageType enum
        stage_types = [e.value for e in StageType]
        expected_stages = ["agent", "gate", "parallel", "condition"]
        for stage in expected_stages:
            if stage in stage_types:
                log(f"StageType.{stage} exists", "pass")
                PASSED += 1
            else:
                log(f"StageType.{stage} missing", "fail")
                FAILED += 1

        # 5. Verify StageStatus enum
        status_values = [e.value for e in StageStatus]
        expected_statuses = ["pending", "running", "completed", "failed", "skipped"]
        for status in expected_statuses:
            if status in status_values:
                log(f"StageStatus.{status} exists", "pass")
                PASSED += 1
            else:
                log(f"StageStatus.{status} missing", "fail")
                FAILED += 1

        # 6. Verify AgentPhase enum
        phase_values = [e.value for e in AgentPhase]
        expected_phases = ["PREFLIGHT", "PLAN", "EXECUTE", "VERIFY", "REPORT"]
        for phase in expected_phases:
            if phase in phase_values:
                log(f"AgentPhase.{phase} exists", "pass")
                PASSED += 1
            else:
                log(f"AgentPhase.{phase} missing", "fail")
                FAILED += 1

        # 7. Verify RedisKeys class
        if hasattr(RedisKeys, "agent_state"):
            log("RedisKeys.agent_state exists", "pass")
            PASSED += 1
        if hasattr(RedisKeys, "instruction_queue"):
            log("RedisKeys.instruction_queue exists", "pass")
            PASSED += 1

    except ImportError as e:
        log(f"Core import failed: {e}", "fail")
        FAILED += 1
        return False

    # 8. Check schema exists
    schema_dir = PIPELINE_PATH / "schemas"
    if schema_dir.exists():
        schemas = list(schema_dir.glob("*.json")) + list(schema_dir.glob("*.yaml"))
        log(f"Pipeline schemas directory exists ({len(schemas)} files)", "pass")
        PASSED += 1
    else:
        log("Pipeline schemas directory missing", "fail")
        FAILED += 1

    # 9. Check templates exist
    templates_dir = PIPELINE_PATH / "templates"
    if templates_dir.exists():
        templates = list(templates_dir.glob("*.yaml")) + list(
            templates_dir.glob("*.yml")
        )
        log(f"Pipeline templates directory exists ({len(templates)} files)", "pass")
        PASSED += 1
    else:
        log("Pipeline templates directory missing", "fail")
        FAILED += 1

    return True


def test_pipeline_execution():
    """Test pipeline execution infrastructure"""
    global PASSED, FAILED

    print("\n[TEST] pipeline_execution")

    # 1. Check ledger exists for pipeline logging
    ledger_path = BASE_PATH / "ledger" / "governance.db"
    if ledger_path.exists():
        log("Governance ledger exists", "pass")
        PASSED += 1
    else:
        log("Governance ledger missing", "fail")
        FAILED += 1
        return False

    # 2. Check ledger has orchestration tables
    try:
        import sqlite3

        conn = sqlite3.connect(ledger_path)
        cursor = conn.cursor()

        # Check for pipeline-related tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
        tables = [row[0] for row in cursor.fetchall()]

        expected_tables = ["agent_actions", "agent_metrics"]
        for table in expected_tables:
            if table in tables:
                log(f"Ledger has '{table}' table", "pass")
                PASSED += 1
            else:
                log(f"Ledger missing '{table}' table", "fail")
                FAILED += 1

        # 3. Check orchestration_log table
        if "orchestration_log" in tables:
            log("Ledger has 'orchestration_log' table", "pass")
            PASSED += 1

            # Check columns
            cursor.execute("PRAGMA table_info(orchestration_log)")
            columns = [row[1] for row in cursor.fetchall()]
            expected_columns = ["timestamp", "agent_id", "action"]
            found = sum(1 for c in expected_columns if c in columns)
            log(
                f"orchestration_log has {found}/{len(expected_columns)} expected columns",
                "pass",
            )
            PASSED += 1
        else:
            log("orchestration_log table missing (may be created on first use)", "info")

        conn.close()
    except Exception as e:
        log(f"Ledger check failed: {e}", "fail")
        FAILED += 1

    # 4. Check DragonflyDB connectivity for pipeline state
    try:
        import redis

        # Try to get password from Vault
        password = ""
        try:
            import subprocess

            with open("/opt/vault/init-keys.json") as f:
                token = json.load(f)["root_token"]
            result = subprocess.run(
                [
                    "curl",
                    "-sk",
                    "-H",
                    f"X-Vault-Token: {token}",
                    "https://127.0.0.1:8200/v1/secret/data/services/dragonfly",
                ],
                capture_output=True,
                text=True,
                timeout=5,
            )
            if result.returncode == 0:
                creds = json.loads(result.stdout).get("data", {}).get("data", {})
                password = creds.get("password", "")
        except Exception:
            pass

        r = redis.Redis(
            host="127.0.0.1", port=6379, password=password, decode_responses=True
        )
        r.ping()
        log("DragonflyDB reachable for pipeline state", "pass")
        PASSED += 1

        # Test key patterns
        test_key = "pipeline:test:phase6"
        r.set(test_key, "test", ex=5)
        if r.get(test_key) == "test":
            log("DragonflyDB read/write working", "pass")
            PASSED += 1
        r.delete(test_key)
    except Exception as e:
        log(f"DragonflyDB check failed: {e}", "fail")
        FAILED += 1

    # 5. Verify PipelineContext can be instantiated
    try:
        core_module = load_core_module()
        ctx = core_module.PipelineContext(
            pipeline_name="test-pipeline", run_id="run-001", inputs={}
        )
        log(f"PipelineContext instantiated: {ctx.pipeline_name}", "pass")
        PASSED += 1
    except Exception as e:
        log(f"PipelineContext failed: {e}", "fail")
        FAILED += 1

    return True


def test_pipeline_validation():
    """Test pipeline schema validation"""
    global PASSED, FAILED

    print("\n[TEST] pipeline_validation")

    try:
        pipeline_module = load_pipeline_module()
        parser = pipeline_module.PipelineParser()

        valid_pipeline = {
            "name": "sample-pipeline",
            "version": "1.0.0",
            "stages": [
                {"name": "plan", "type": "agent", "agent": {"template": "default"}}
            ],
        }

        parser.validate(valid_pipeline)
        log("Valid pipeline passes schema validation", "pass")
        PASSED += 1

        invalid_pipeline = {"name": "BadName", "version": "1", "stages": []}

        try:
            parser.validate(invalid_pipeline)
            log("Invalid pipeline unexpectedly validated", "fail")
            FAILED += 1
        except ValueError:
            log("Invalid pipeline rejected by schema", "pass")
            PASSED += 1

    except Exception as e:
        log(f"Pipeline validation test failed: {e}", "fail")
        FAILED += 1

    return True


def test_template_generation():
    """Test agent template loading and config merging"""
    global PASSED, FAILED

    print("\n[TEST] template_generation")

    try:
        pipeline_module = load_pipeline_module()
        template = pipeline_module.AgentTemplate("default")
        if template.config.get("tier") == 0:
            log("Default template tier is 0", "pass")
            PASSED += 1
        else:
            log("Default template tier mismatch", "fail")
            FAILED += 1

        allowed_actions = template.config.get("allowed_actions", [])
        if "read_docs" in allowed_actions:
            log("Default template includes read_docs", "pass")
            PASSED += 1
        else:
            log("Default template missing read_docs", "fail")
            FAILED += 1

        instance = template.instantiate({"role": "observer", "extra": "value"})
        if instance.get("role") == "observer" and instance.get("extra") == "value":
            log("Template instantiation merges overrides", "pass")
            PASSED += 1
        else:
            log("Template instantiation missing overrides", "fail")
            FAILED += 1

    except Exception as e:
        log(f"Template generation test failed: {e}", "fail")
        FAILED += 1

    return True


def test_execution():
    """Test pipeline executor with a minimal pipeline"""
    global PASSED, FAILED

    print("\n[TEST] test_execution")

    try:
        import asyncio

        pipeline_module = load_pipeline_module()
        pipeline_def = {
            "name": "execution-test",
            "version": "1.0.0",
            "stages": [
                {"name": "plan", "type": "agent", "agent": {"template": "default"}}
            ],
        }

        executor = pipeline_module.PipelineExecutor()
        success, result = asyncio.run(executor.execute(pipeline_def))

        if success and "plan" in result.get("stages", {}):
            log("Pipeline executor completed minimal run", "pass")
            PASSED += 1
        else:
            log("Pipeline executor failed minimal run", "fail")
            FAILED += 1

    except Exception as e:
        log(f"Pipeline execution test failed: {e}", "fail")
        FAILED += 1

    return True


def test_multi_agent_coordination():
    """Test multi-agent coordination mechanisms"""
    global PASSED, FAILED

    print("\n[TEST] multi_agent_coordination")

    multi_agent_path = AGENTS_PATH / "multi-agent"

    # 1. Check multi-agent directory exists
    if not multi_agent_path.exists():
        log(f"Multi-agent directory not found: {multi_agent_path}", "fail")
        FAILED += 1
        return False

    log("Multi-agent directory exists", "pass")
    PASSED += 1

    # 2. Check TypeScript files exist
    ts_files = ["orchestrator.ts", "agents.ts", "coordination.ts", "types.ts"]
    for ts_file in ts_files:
        file_path = multi_agent_path / ts_file
        if file_path.exists():
            log(f"{ts_file} exists", "pass")
            PASSED += 1
        else:
            log(f"{ts_file} missing", "fail")
            FAILED += 1

    # 3. Check package.json
    package_json = multi_agent_path / "package.json"
    if package_json.exists():
        try:
            with open(package_json) as f:
                pkg = json.load(f)
            log(f"package.json valid (name: {pkg.get('name', 'N/A')})", "pass")
            PASSED += 1
        except json.JSONDecodeError:
            log("package.json invalid JSON", "fail")
            FAILED += 1
    else:
        log("package.json missing", "fail")
        FAILED += 1

    # 4. Check node_modules installed
    node_modules = multi_agent_path / "node_modules"
    if node_modules.exists() and node_modules.is_dir():
        module_count = len(list(node_modules.iterdir()))
        log(f"node_modules installed ({module_count} packages)", "pass")
        PASSED += 1
    else:
        log("node_modules not installed", "fail")
        FAILED += 1

    # 5. Check coordination patterns in coordination.ts
    coordination_ts = multi_agent_path / "coordination.ts"
    if coordination_ts.exists():
        content = coordination_ts.read_text()

        patterns = [
            ("class", "coordination class defined"),
            ("async", "async patterns used"),
            ("Promise", "Promise-based coordination"),
        ]

        for pattern, desc in patterns:
            if pattern in content:
                log(f"{desc}", "pass")
                PASSED += 1
            else:
                log(f"{desc} - not found", "info")

    # 6. Check orchestrator patterns in orchestrator.ts
    orchestrator_ts = multi_agent_path / "orchestrator.ts"
    if orchestrator_ts.exists():
        content = orchestrator_ts.read_text()

        if "Orchestrator" in content:
            log("Orchestrator class defined", "pass")
            PASSED += 1
        if "delegate" in content.lower() or "dispatch" in content.lower():
            log("Delegation/dispatch pattern found", "pass")
            PASSED += 1

    # 7. Check agent registry in agents.ts
    agents_ts = multi_agent_path / "agents.ts"
    if agents_ts.exists():
        content = agents_ts.read_text()

        if "Agent" in content:
            log("Agent definitions found", "pass")
            PASSED += 1
        if "register" in content.lower() or "Registry" in content:
            log("Agent registry pattern found", "pass")
            PASSED += 1

    return True


def main():
    """Run all Phase 6 tests"""
    global PASSED, FAILED

    print("\n" + "=" * 60)
    print("PHASE 6: MULTI-AGENT ORCHESTRATION TESTS")
    print("=" * 60)

    try:
        test_model_controller()
        test_pipeline_parser()
        test_pipeline_validation()
        test_pipeline_execution()
        test_template_generation()
        test_execution()
        test_multi_agent_coordination()
    except Exception as e:
        print(f"\n\033[91mTest execution error: {e}\033[0m")
        import traceback

        traceback.print_exc()
        FAILED += 1

    print("\n" + "=" * 60)
    print(f"RESULTS: {PASSED} passed, {FAILED} failed")
    print("=" * 60 + "\n")

    return FAILED == 0


if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)