agent-governance/tests/governance/test_phase6_orchestration.py
profit 8c6e7831e9 Add Phase 10-12 implementation: multi-tenant, marketplace, observability
Major additions:
- marketplace/: Agent template registry with FTS5 search, ratings, versioning
- observability/: Prometheus metrics, distributed tracing, structured logging
- ledger/migrations/: Database migration scripts for multi-tenant support
- tests/governance/: 15 new test files for phases 6-12 (295 total tests)
- bin/validate-phases: Full 12-phase validation script

New features:
- Multi-tenant support with tenant isolation and quota enforcement
- Agent marketplace with semantic versioning and search
- Observability with metrics, tracing, and log correlation
- Tier-1 agent bootstrap scripts

Updated components:
- ledger/api.py: Extended API for tenants, marketplace, observability
- ledger/schema.sql: Added tenant, project, marketplace tables
- testing/framework.ts: Enhanced test framework
- checkpoint/checkpoint.py: Improved checkpoint management

Archived:
- External integrations (Slack/GitHub/PagerDuty) moved to .archive/
- Old checkpoint files cleaned up

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 18:39:47 -05:00

642 lines
20 KiB
Python

#!/usr/bin/env python3
"""
Phase 6: Multi-Agent Orchestration Tests
=========================================
Tests for model controller, pipeline execution, and multi-agent coordination.
Required tests:
- model_controller: Verify model controller initialization and config
- pipeline_parser: Verify pipeline YAML parsing
- pipeline_validation: Verify schema validation for pipelines
- pipeline_execution: Verify pipeline stage execution
- template_generation: Verify agent templates load and merge configs
- test_execution: Verify pipeline executor runs a simple pipeline
- multi_agent_coordination: Verify agent coordination mechanisms
"""
import json
import os
import sys
from pathlib import Path
# Add paths
BASE_PATH = Path("/opt/agent-governance")
ORCHESTRATOR_PATH = BASE_PATH / "orchestrator"
PIPELINE_PATH = BASE_PATH / "pipeline"
AGENTS_PATH = BASE_PATH / "agents"
sys.path.insert(0, str(ORCHESTRATOR_PATH))
sys.path.insert(0, str(PIPELINE_PATH))
sys.path.insert(0, str(AGENTS_PATH / "multi-agent"))
# Test results
PASSED = 0
FAILED = 0
def log(msg: str, status: str = "info"):
"""Log a message"""
icons = {"pass": "\033[92m✓\033[0m", "fail": "\033[91m✗\033[0m", "info": ""}
print(f" {icons.get(status, '')} {msg}")
def load_pipeline_module():
import importlib.util
import sys
core_path = PIPELINE_PATH / "core.py"
pipeline_path = PIPELINE_PATH / "pipeline.py"
core_spec = importlib.util.spec_from_file_location("pipeline.core", core_path)
if core_spec is None or core_spec.loader is None:
raise ImportError("pipeline.core spec missing")
core_module = importlib.util.module_from_spec(core_spec)
core_spec.loader.exec_module(core_module)
sys.modules["pipeline.core"] = core_module
pipeline_spec = importlib.util.spec_from_file_location("pipeline", pipeline_path)
if pipeline_spec is None or pipeline_spec.loader is None:
raise ImportError("pipeline module spec missing")
pipeline_module = importlib.util.module_from_spec(pipeline_spec)
sys.modules["pipeline"] = pipeline_module
pipeline_spec.loader.exec_module(pipeline_module)
return pipeline_module
def load_core_module():
import importlib.util
core_path = PIPELINE_PATH / "core.py"
core_spec = importlib.util.spec_from_file_location("pipeline.core", core_path)
if core_spec is None or core_spec.loader is None:
raise ImportError("pipeline.core spec missing")
core_module = importlib.util.module_from_spec(core_spec)
core_spec.loader.exec_module(core_module)
return core_module
def test_model_controller():
"""Test model controller initialization and configuration"""
global PASSED, FAILED
print("\n[TEST] model_controller")
# 1. Check model controller module exists
controller_module = ORCHESTRATOR_PATH / "model_controller.py"
if not controller_module.exists():
log(f"Model controller not found: {controller_module}", "fail")
FAILED += 1
return False
log("Model controller module exists", "pass")
PASSED += 1
# 2. Check orchestrator config exists
config_file = ORCHESTRATOR_PATH / "config.json"
if not config_file.exists():
log(f"Config file not found: {config_file}", "fail")
FAILED += 1
return False
log("Orchestrator config exists", "pass")
PASSED += 1
# 3. Validate config structure
try:
with open(config_file) as f:
config = json.load(f)
required_sections = ["models", "execution", "safety"]
for section in required_sections:
if section in config:
log(f"Config has '{section}' section", "pass")
PASSED += 1
else:
log(f"Config missing '{section}' section", "fail")
FAILED += 1
except json.JSONDecodeError as e:
log(f"Config JSON invalid: {e}", "fail")
FAILED += 1
return False
# 4. Check model definitions
if "models" in config:
models = config["models"]
expected_models = ["minimax", "gemini", "gemini-pro"]
for model in expected_models:
if model in models:
log(f"Model '{model}' configured", "pass")
PASSED += 1
else:
log(f"Model '{model}' missing", "fail")
FAILED += 1
# 5. Check safety config
if "safety" in config:
safety = config["safety"]
if "max_retries" in safety:
log(f"Safety max_retries: {safety['max_retries']}", "pass")
PASSED += 1
if "timeout" in safety:
log(f"Safety timeout: {safety['timeout']}s", "pass")
PASSED += 1
# 6. Try importing the module
try:
# Import test - just check syntax
import importlib.util
spec = importlib.util.spec_from_file_location(
"model_controller", controller_module
)
if spec is None or spec.loader is None:
log("Model controller spec missing", "fail")
FAILED += 1
else:
importlib.util.module_from_spec(spec)
log("Model controller module importable", "pass")
PASSED += 1
except Exception as e:
log(f"Model controller import failed: {e}", "fail")
FAILED += 1
return True
def test_pipeline_parser():
"""Test pipeline YAML parsing capabilities"""
global PASSED, FAILED
print("\n[TEST] pipeline_parser")
# 1. Check pipeline module exists
pipeline_module = PIPELINE_PATH / "pipeline.py"
if not pipeline_module.exists():
log(f"Pipeline module not found: {pipeline_module}", "fail")
FAILED += 1
return False
log("Pipeline module exists", "pass")
PASSED += 1
# 2. Check core definitions exist
core_module = PIPELINE_PATH / "core.py"
if not core_module.exists():
log(f"Core module not found: {core_module}", "fail")
FAILED += 1
return False
log("Core definitions module exists", "pass")
PASSED += 1
# 3. Import core definitions
try:
core_module = load_core_module()
StageType = core_module.StageType
StageStatus = core_module.StageStatus
AgentPhase = core_module.AgentPhase
PipelineContext = core_module.PipelineContext
RedisKeys = core_module.RedisKeys
log("Core types importable", "pass")
PASSED += 1
# 4. Verify StageType enum
stage_types = [e.value for e in StageType]
expected_stages = ["agent", "gate", "parallel", "condition"]
for stage in expected_stages:
if stage in stage_types:
log(f"StageType.{stage} exists", "pass")
PASSED += 1
else:
log(f"StageType.{stage} missing", "fail")
FAILED += 1
# 5. Verify StageStatus enum
status_values = [e.value for e in StageStatus]
expected_statuses = ["pending", "running", "completed", "failed", "skipped"]
for status in expected_statuses:
if status in status_values:
log(f"StageStatus.{status} exists", "pass")
PASSED += 1
else:
log(f"StageStatus.{status} missing", "fail")
FAILED += 1
# 6. Verify AgentPhase enum
phase_values = [e.value for e in AgentPhase]
expected_phases = ["PREFLIGHT", "PLAN", "EXECUTE", "VERIFY", "REPORT"]
for phase in expected_phases:
if phase in phase_values:
log(f"AgentPhase.{phase} exists", "pass")
PASSED += 1
else:
log(f"AgentPhase.{phase} missing", "fail")
FAILED += 1
# 7. Verify RedisKeys class
if hasattr(RedisKeys, "agent_state"):
log("RedisKeys.agent_state exists", "pass")
PASSED += 1
if hasattr(RedisKeys, "instruction_queue"):
log("RedisKeys.instruction_queue exists", "pass")
PASSED += 1
except ImportError as e:
log(f"Core import failed: {e}", "fail")
FAILED += 1
return False
# 8. Check schema exists
schema_dir = PIPELINE_PATH / "schemas"
if schema_dir.exists():
schemas = list(schema_dir.glob("*.json")) + list(schema_dir.glob("*.yaml"))
log(f"Pipeline schemas directory exists ({len(schemas)} files)", "pass")
PASSED += 1
else:
log("Pipeline schemas directory missing", "fail")
FAILED += 1
# 9. Check templates exist
templates_dir = PIPELINE_PATH / "templates"
if templates_dir.exists():
templates = list(templates_dir.glob("*.yaml")) + list(
templates_dir.glob("*.yml")
)
log(f"Pipeline templates directory exists ({len(templates)} files)", "pass")
PASSED += 1
else:
log("Pipeline templates directory missing", "fail")
FAILED += 1
return True
def test_pipeline_execution():
"""Test pipeline execution infrastructure"""
global PASSED, FAILED
print("\n[TEST] pipeline_execution")
# 1. Check ledger exists for pipeline logging
ledger_path = BASE_PATH / "ledger" / "governance.db"
if ledger_path.exists():
log("Governance ledger exists", "pass")
PASSED += 1
else:
log("Governance ledger missing", "fail")
FAILED += 1
return False
# 2. Check ledger has orchestration tables
try:
import sqlite3
conn = sqlite3.connect(ledger_path)
cursor = conn.cursor()
# Check for pipeline-related tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = [row[0] for row in cursor.fetchall()]
expected_tables = ["agent_actions", "agent_metrics"]
for table in expected_tables:
if table in tables:
log(f"Ledger has '{table}' table", "pass")
PASSED += 1
else:
log(f"Ledger missing '{table}' table", "fail")
FAILED += 1
# 3. Check orchestration_log table
if "orchestration_log" in tables:
log("Ledger has 'orchestration_log' table", "pass")
PASSED += 1
# Check columns
cursor.execute("PRAGMA table_info(orchestration_log)")
columns = [row[1] for row in cursor.fetchall()]
expected_columns = ["timestamp", "agent_id", "action"]
found = sum(1 for c in expected_columns if c in columns)
log(
f"orchestration_log has {found}/{len(expected_columns)} expected columns",
"pass",
)
PASSED += 1
else:
log("orchestration_log table missing (may be created on first use)", "info")
conn.close()
except Exception as e:
log(f"Ledger check failed: {e}", "fail")
FAILED += 1
# 4. Check DragonflyDB connectivity for pipeline state
try:
import redis
# Try to get password from Vault
password = ""
try:
import subprocess
with open("/opt/vault/init-keys.json") as f:
token = json.load(f)["root_token"]
result = subprocess.run(
[
"curl",
"-sk",
"-H",
f"X-Vault-Token: {token}",
"https://127.0.0.1:8200/v1/secret/data/services/dragonfly",
],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode == 0:
creds = json.loads(result.stdout).get("data", {}).get("data", {})
password = creds.get("password", "")
except Exception:
pass
r = redis.Redis(
host="127.0.0.1", port=6379, password=password, decode_responses=True
)
r.ping()
log("DragonflyDB reachable for pipeline state", "pass")
PASSED += 1
# Test key patterns
test_key = "pipeline:test:phase6"
r.set(test_key, "test", ex=5)
if r.get(test_key) == "test":
log("DragonflyDB read/write working", "pass")
PASSED += 1
r.delete(test_key)
except Exception as e:
log(f"DragonflyDB check failed: {e}", "fail")
FAILED += 1
# 5. Verify PipelineContext can be instantiated
try:
core_module = load_core_module()
ctx = core_module.PipelineContext(
pipeline_name="test-pipeline", run_id="run-001", inputs={}
)
log(f"PipelineContext instantiated: {ctx.pipeline_name}", "pass")
PASSED += 1
except Exception as e:
log(f"PipelineContext failed: {e}", "fail")
FAILED += 1
return True
def test_pipeline_validation():
"""Test pipeline schema validation"""
global PASSED, FAILED
print("\n[TEST] pipeline_validation")
try:
pipeline_module = load_pipeline_module()
parser = pipeline_module.PipelineParser()
valid_pipeline = {
"name": "sample-pipeline",
"version": "1.0.0",
"stages": [
{"name": "plan", "type": "agent", "agent": {"template": "default"}}
],
}
parser.validate(valid_pipeline)
log("Valid pipeline passes schema validation", "pass")
PASSED += 1
invalid_pipeline = {"name": "BadName", "version": "1", "stages": []}
try:
parser.validate(invalid_pipeline)
log("Invalid pipeline unexpectedly validated", "fail")
FAILED += 1
except ValueError:
log("Invalid pipeline rejected by schema", "pass")
PASSED += 1
except Exception as e:
log(f"Pipeline validation test failed: {e}", "fail")
FAILED += 1
return True
def test_template_generation():
"""Test agent template loading and config merging"""
global PASSED, FAILED
print("\n[TEST] template_generation")
try:
pipeline_module = load_pipeline_module()
template = pipeline_module.AgentTemplate("default")
if template.config.get("tier") == 0:
log("Default template tier is 0", "pass")
PASSED += 1
else:
log("Default template tier mismatch", "fail")
FAILED += 1
allowed_actions = template.config.get("allowed_actions", [])
if "read_docs" in allowed_actions:
log("Default template includes read_docs", "pass")
PASSED += 1
else:
log("Default template missing read_docs", "fail")
FAILED += 1
instance = template.instantiate({"role": "observer", "extra": "value"})
if instance.get("role") == "observer" and instance.get("extra") == "value":
log("Template instantiation merges overrides", "pass")
PASSED += 1
else:
log("Template instantiation missing overrides", "fail")
FAILED += 1
except Exception as e:
log(f"Template generation test failed: {e}", "fail")
FAILED += 1
return True
def test_execution():
"""Test pipeline executor with a minimal pipeline"""
global PASSED, FAILED
print("\n[TEST] test_execution")
try:
import asyncio
pipeline_module = load_pipeline_module()
pipeline_def = {
"name": "execution-test",
"version": "1.0.0",
"stages": [
{"name": "plan", "type": "agent", "agent": {"template": "default"}}
],
}
executor = pipeline_module.PipelineExecutor()
success, result = asyncio.run(executor.execute(pipeline_def))
if success and "plan" in result.get("stages", {}):
log("Pipeline executor completed minimal run", "pass")
PASSED += 1
else:
log("Pipeline executor failed minimal run", "fail")
FAILED += 1
except Exception as e:
log(f"Pipeline execution test failed: {e}", "fail")
FAILED += 1
return True
def test_multi_agent_coordination():
"""Test multi-agent coordination mechanisms"""
global PASSED, FAILED
print("\n[TEST] multi_agent_coordination")
multi_agent_path = AGENTS_PATH / "multi-agent"
# 1. Check multi-agent directory exists
if not multi_agent_path.exists():
log(f"Multi-agent directory not found: {multi_agent_path}", "fail")
FAILED += 1
return False
log("Multi-agent directory exists", "pass")
PASSED += 1
# 2. Check TypeScript files exist
ts_files = ["orchestrator.ts", "agents.ts", "coordination.ts", "types.ts"]
for ts_file in ts_files:
file_path = multi_agent_path / ts_file
if file_path.exists():
log(f"{ts_file} exists", "pass")
PASSED += 1
else:
log(f"{ts_file} missing", "fail")
FAILED += 1
# 3. Check package.json
package_json = multi_agent_path / "package.json"
if package_json.exists():
try:
with open(package_json) as f:
pkg = json.load(f)
log(f"package.json valid (name: {pkg.get('name', 'N/A')})", "pass")
PASSED += 1
except json.JSONDecodeError:
log("package.json invalid JSON", "fail")
FAILED += 1
else:
log("package.json missing", "fail")
FAILED += 1
# 4. Check node_modules installed
node_modules = multi_agent_path / "node_modules"
if node_modules.exists() and node_modules.is_dir():
module_count = len(list(node_modules.iterdir()))
log(f"node_modules installed ({module_count} packages)", "pass")
PASSED += 1
else:
log("node_modules not installed", "fail")
FAILED += 1
# 5. Check coordination patterns in coordination.ts
coordination_ts = multi_agent_path / "coordination.ts"
if coordination_ts.exists():
content = coordination_ts.read_text()
patterns = [
("class", "coordination class defined"),
("async", "async patterns used"),
("Promise", "Promise-based coordination"),
]
for pattern, desc in patterns:
if pattern in content:
log(f"{desc}", "pass")
PASSED += 1
else:
log(f"{desc} - not found", "info")
# 6. Check orchestrator patterns in orchestrator.ts
orchestrator_ts = multi_agent_path / "orchestrator.ts"
if orchestrator_ts.exists():
content = orchestrator_ts.read_text()
if "Orchestrator" in content:
log("Orchestrator class defined", "pass")
PASSED += 1
if "delegate" in content.lower() or "dispatch" in content.lower():
log("Delegation/dispatch pattern found", "pass")
PASSED += 1
# 7. Check agent registry in agents.ts
agents_ts = multi_agent_path / "agents.ts"
if agents_ts.exists():
content = agents_ts.read_text()
if "Agent" in content:
log("Agent definitions found", "pass")
PASSED += 1
if "register" in content.lower() or "Registry" in content:
log("Agent registry pattern found", "pass")
PASSED += 1
return True
def main():
"""Run all Phase 6 tests"""
global PASSED, FAILED
print("\n" + "=" * 60)
print("PHASE 6: MULTI-AGENT ORCHESTRATION TESTS")
print("=" * 60)
try:
test_model_controller()
test_pipeline_parser()
test_pipeline_validation()
test_pipeline_execution()
test_template_generation()
test_execution()
test_multi_agent_coordination()
except Exception as e:
print(f"\n\033[91mTest execution error: {e}\033[0m")
import traceback
traceback.print_exc()
FAILED += 1
print("\n" + "=" * 60)
print(f"RESULTS: {PASSED} passed, {FAILED} failed")
print("=" * 60 + "\n")
return FAILED == 0
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)