Major additions: - marketplace/: Agent template registry with FTS5 search, ratings, versioning - observability/: Prometheus metrics, distributed tracing, structured logging - ledger/migrations/: Database migration scripts for multi-tenant support - tests/governance/: 15 new test files for phases 6-12 (295 total tests) - bin/validate-phases: Full 12-phase validation script New features: - Multi-tenant support with tenant isolation and quota enforcement - Agent marketplace with semantic versioning and search - Observability with metrics, tracing, and log correlation - Tier-1 agent bootstrap scripts Updated components: - ledger/api.py: Extended API for tenants, marketplace, observability - ledger/schema.sql: Added tenant, project, marketplace tables - testing/framework.ts: Enhanced test framework - checkpoint/checkpoint.py: Improved checkpoint management Archived: - External integrations (Slack/GitHub/PagerDuty) moved to .archive/ - Old checkpoint files cleaned up Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
642 lines
20 KiB
Python
642 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Phase 6: Multi-Agent Orchestration Tests
|
|
=========================================
|
|
Tests for model controller, pipeline execution, and multi-agent coordination.
|
|
|
|
Required tests:
|
|
- model_controller: Verify model controller initialization and config
|
|
- pipeline_parser: Verify pipeline YAML parsing
|
|
- pipeline_validation: Verify schema validation for pipelines
|
|
- pipeline_execution: Verify pipeline stage execution
|
|
- template_generation: Verify agent templates load and merge configs
|
|
- test_execution: Verify pipeline executor runs a simple pipeline
|
|
- multi_agent_coordination: Verify agent coordination mechanisms
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add paths
|
|
BASE_PATH = Path("/opt/agent-governance")
|
|
ORCHESTRATOR_PATH = BASE_PATH / "orchestrator"
|
|
PIPELINE_PATH = BASE_PATH / "pipeline"
|
|
AGENTS_PATH = BASE_PATH / "agents"
|
|
|
|
sys.path.insert(0, str(ORCHESTRATOR_PATH))
|
|
sys.path.insert(0, str(PIPELINE_PATH))
|
|
sys.path.insert(0, str(AGENTS_PATH / "multi-agent"))
|
|
|
|
# Test results
|
|
PASSED = 0
|
|
FAILED = 0
|
|
|
|
|
|
def log(msg: str, status: str = "info"):
|
|
"""Log a message"""
|
|
icons = {"pass": "\033[92m✓\033[0m", "fail": "\033[91m✗\033[0m", "info": "→"}
|
|
print(f" {icons.get(status, '•')} {msg}")
|
|
|
|
|
|
def load_pipeline_module():
|
|
import importlib.util
|
|
import sys
|
|
|
|
core_path = PIPELINE_PATH / "core.py"
|
|
pipeline_path = PIPELINE_PATH / "pipeline.py"
|
|
|
|
core_spec = importlib.util.spec_from_file_location("pipeline.core", core_path)
|
|
if core_spec is None or core_spec.loader is None:
|
|
raise ImportError("pipeline.core spec missing")
|
|
core_module = importlib.util.module_from_spec(core_spec)
|
|
core_spec.loader.exec_module(core_module)
|
|
sys.modules["pipeline.core"] = core_module
|
|
|
|
pipeline_spec = importlib.util.spec_from_file_location("pipeline", pipeline_path)
|
|
if pipeline_spec is None or pipeline_spec.loader is None:
|
|
raise ImportError("pipeline module spec missing")
|
|
pipeline_module = importlib.util.module_from_spec(pipeline_spec)
|
|
sys.modules["pipeline"] = pipeline_module
|
|
pipeline_spec.loader.exec_module(pipeline_module)
|
|
|
|
return pipeline_module
|
|
|
|
|
|
def load_core_module():
|
|
import importlib.util
|
|
|
|
core_path = PIPELINE_PATH / "core.py"
|
|
core_spec = importlib.util.spec_from_file_location("pipeline.core", core_path)
|
|
if core_spec is None or core_spec.loader is None:
|
|
raise ImportError("pipeline.core spec missing")
|
|
core_module = importlib.util.module_from_spec(core_spec)
|
|
core_spec.loader.exec_module(core_module)
|
|
|
|
return core_module
|
|
|
|
|
|
def test_model_controller():
|
|
"""Test model controller initialization and configuration"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] model_controller")
|
|
|
|
# 1. Check model controller module exists
|
|
controller_module = ORCHESTRATOR_PATH / "model_controller.py"
|
|
if not controller_module.exists():
|
|
log(f"Model controller not found: {controller_module}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Model controller module exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 2. Check orchestrator config exists
|
|
config_file = ORCHESTRATOR_PATH / "config.json"
|
|
if not config_file.exists():
|
|
log(f"Config file not found: {config_file}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Orchestrator config exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 3. Validate config structure
|
|
try:
|
|
with open(config_file) as f:
|
|
config = json.load(f)
|
|
|
|
required_sections = ["models", "execution", "safety"]
|
|
for section in required_sections:
|
|
if section in config:
|
|
log(f"Config has '{section}' section", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"Config missing '{section}' section", "fail")
|
|
FAILED += 1
|
|
except json.JSONDecodeError as e:
|
|
log(f"Config JSON invalid: {e}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
# 4. Check model definitions
|
|
if "models" in config:
|
|
models = config["models"]
|
|
expected_models = ["minimax", "gemini", "gemini-pro"]
|
|
for model in expected_models:
|
|
if model in models:
|
|
log(f"Model '{model}' configured", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"Model '{model}' missing", "fail")
|
|
FAILED += 1
|
|
|
|
# 5. Check safety config
|
|
if "safety" in config:
|
|
safety = config["safety"]
|
|
if "max_retries" in safety:
|
|
log(f"Safety max_retries: {safety['max_retries']}", "pass")
|
|
PASSED += 1
|
|
if "timeout" in safety:
|
|
log(f"Safety timeout: {safety['timeout']}s", "pass")
|
|
PASSED += 1
|
|
|
|
# 6. Try importing the module
|
|
try:
|
|
# Import test - just check syntax
|
|
import importlib.util
|
|
|
|
spec = importlib.util.spec_from_file_location(
|
|
"model_controller", controller_module
|
|
)
|
|
if spec is None or spec.loader is None:
|
|
log("Model controller spec missing", "fail")
|
|
FAILED += 1
|
|
else:
|
|
importlib.util.module_from_spec(spec)
|
|
log("Model controller module importable", "pass")
|
|
PASSED += 1
|
|
except Exception as e:
|
|
log(f"Model controller import failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_pipeline_parser():
|
|
"""Test pipeline YAML parsing capabilities"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] pipeline_parser")
|
|
|
|
# 1. Check pipeline module exists
|
|
pipeline_module = PIPELINE_PATH / "pipeline.py"
|
|
if not pipeline_module.exists():
|
|
log(f"Pipeline module not found: {pipeline_module}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Pipeline module exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 2. Check core definitions exist
|
|
core_module = PIPELINE_PATH / "core.py"
|
|
if not core_module.exists():
|
|
log(f"Core module not found: {core_module}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Core definitions module exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 3. Import core definitions
|
|
try:
|
|
core_module = load_core_module()
|
|
StageType = core_module.StageType
|
|
StageStatus = core_module.StageStatus
|
|
AgentPhase = core_module.AgentPhase
|
|
PipelineContext = core_module.PipelineContext
|
|
RedisKeys = core_module.RedisKeys
|
|
|
|
log("Core types importable", "pass")
|
|
PASSED += 1
|
|
|
|
# 4. Verify StageType enum
|
|
stage_types = [e.value for e in StageType]
|
|
expected_stages = ["agent", "gate", "parallel", "condition"]
|
|
for stage in expected_stages:
|
|
if stage in stage_types:
|
|
log(f"StageType.{stage} exists", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"StageType.{stage} missing", "fail")
|
|
FAILED += 1
|
|
|
|
# 5. Verify StageStatus enum
|
|
status_values = [e.value for e in StageStatus]
|
|
expected_statuses = ["pending", "running", "completed", "failed", "skipped"]
|
|
for status in expected_statuses:
|
|
if status in status_values:
|
|
log(f"StageStatus.{status} exists", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"StageStatus.{status} missing", "fail")
|
|
FAILED += 1
|
|
|
|
# 6. Verify AgentPhase enum
|
|
phase_values = [e.value for e in AgentPhase]
|
|
expected_phases = ["PREFLIGHT", "PLAN", "EXECUTE", "VERIFY", "REPORT"]
|
|
for phase in expected_phases:
|
|
if phase in phase_values:
|
|
log(f"AgentPhase.{phase} exists", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"AgentPhase.{phase} missing", "fail")
|
|
FAILED += 1
|
|
|
|
# 7. Verify RedisKeys class
|
|
if hasattr(RedisKeys, "agent_state"):
|
|
log("RedisKeys.agent_state exists", "pass")
|
|
PASSED += 1
|
|
if hasattr(RedisKeys, "instruction_queue"):
|
|
log("RedisKeys.instruction_queue exists", "pass")
|
|
PASSED += 1
|
|
|
|
except ImportError as e:
|
|
log(f"Core import failed: {e}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
# 8. Check schema exists
|
|
schema_dir = PIPELINE_PATH / "schemas"
|
|
if schema_dir.exists():
|
|
schemas = list(schema_dir.glob("*.json")) + list(schema_dir.glob("*.yaml"))
|
|
log(f"Pipeline schemas directory exists ({len(schemas)} files)", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Pipeline schemas directory missing", "fail")
|
|
FAILED += 1
|
|
|
|
# 9. Check templates exist
|
|
templates_dir = PIPELINE_PATH / "templates"
|
|
if templates_dir.exists():
|
|
templates = list(templates_dir.glob("*.yaml")) + list(
|
|
templates_dir.glob("*.yml")
|
|
)
|
|
log(f"Pipeline templates directory exists ({len(templates)} files)", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Pipeline templates directory missing", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_pipeline_execution():
|
|
"""Test pipeline execution infrastructure"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] pipeline_execution")
|
|
|
|
# 1. Check ledger exists for pipeline logging
|
|
ledger_path = BASE_PATH / "ledger" / "governance.db"
|
|
if ledger_path.exists():
|
|
log("Governance ledger exists", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Governance ledger missing", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
# 2. Check ledger has orchestration tables
|
|
try:
|
|
import sqlite3
|
|
|
|
conn = sqlite3.connect(ledger_path)
|
|
cursor = conn.cursor()
|
|
|
|
# Check for pipeline-related tables
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
tables = [row[0] for row in cursor.fetchall()]
|
|
|
|
expected_tables = ["agent_actions", "agent_metrics"]
|
|
for table in expected_tables:
|
|
if table in tables:
|
|
log(f"Ledger has '{table}' table", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"Ledger missing '{table}' table", "fail")
|
|
FAILED += 1
|
|
|
|
# 3. Check orchestration_log table
|
|
if "orchestration_log" in tables:
|
|
log("Ledger has 'orchestration_log' table", "pass")
|
|
PASSED += 1
|
|
|
|
# Check columns
|
|
cursor.execute("PRAGMA table_info(orchestration_log)")
|
|
columns = [row[1] for row in cursor.fetchall()]
|
|
expected_columns = ["timestamp", "agent_id", "action"]
|
|
found = sum(1 for c in expected_columns if c in columns)
|
|
log(
|
|
f"orchestration_log has {found}/{len(expected_columns)} expected columns",
|
|
"pass",
|
|
)
|
|
PASSED += 1
|
|
else:
|
|
log("orchestration_log table missing (may be created on first use)", "info")
|
|
|
|
conn.close()
|
|
except Exception as e:
|
|
log(f"Ledger check failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
# 4. Check DragonflyDB connectivity for pipeline state
|
|
try:
|
|
import redis
|
|
|
|
# Try to get password from Vault
|
|
password = ""
|
|
try:
|
|
import subprocess
|
|
|
|
with open("/opt/vault/init-keys.json") as f:
|
|
token = json.load(f)["root_token"]
|
|
result = subprocess.run(
|
|
[
|
|
"curl",
|
|
"-sk",
|
|
"-H",
|
|
f"X-Vault-Token: {token}",
|
|
"https://127.0.0.1:8200/v1/secret/data/services/dragonfly",
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5,
|
|
)
|
|
if result.returncode == 0:
|
|
creds = json.loads(result.stdout).get("data", {}).get("data", {})
|
|
password = creds.get("password", "")
|
|
except Exception:
|
|
pass
|
|
|
|
r = redis.Redis(
|
|
host="127.0.0.1", port=6379, password=password, decode_responses=True
|
|
)
|
|
r.ping()
|
|
log("DragonflyDB reachable for pipeline state", "pass")
|
|
PASSED += 1
|
|
|
|
# Test key patterns
|
|
test_key = "pipeline:test:phase6"
|
|
r.set(test_key, "test", ex=5)
|
|
if r.get(test_key) == "test":
|
|
log("DragonflyDB read/write working", "pass")
|
|
PASSED += 1
|
|
r.delete(test_key)
|
|
except Exception as e:
|
|
log(f"DragonflyDB check failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
# 5. Verify PipelineContext can be instantiated
|
|
try:
|
|
core_module = load_core_module()
|
|
ctx = core_module.PipelineContext(
|
|
pipeline_name="test-pipeline", run_id="run-001", inputs={}
|
|
)
|
|
log(f"PipelineContext instantiated: {ctx.pipeline_name}", "pass")
|
|
PASSED += 1
|
|
except Exception as e:
|
|
log(f"PipelineContext failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_pipeline_validation():
|
|
"""Test pipeline schema validation"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] pipeline_validation")
|
|
|
|
try:
|
|
pipeline_module = load_pipeline_module()
|
|
parser = pipeline_module.PipelineParser()
|
|
|
|
valid_pipeline = {
|
|
"name": "sample-pipeline",
|
|
"version": "1.0.0",
|
|
"stages": [
|
|
{"name": "plan", "type": "agent", "agent": {"template": "default"}}
|
|
],
|
|
}
|
|
|
|
parser.validate(valid_pipeline)
|
|
log("Valid pipeline passes schema validation", "pass")
|
|
PASSED += 1
|
|
|
|
invalid_pipeline = {"name": "BadName", "version": "1", "stages": []}
|
|
|
|
try:
|
|
parser.validate(invalid_pipeline)
|
|
log("Invalid pipeline unexpectedly validated", "fail")
|
|
FAILED += 1
|
|
except ValueError:
|
|
log("Invalid pipeline rejected by schema", "pass")
|
|
PASSED += 1
|
|
|
|
except Exception as e:
|
|
log(f"Pipeline validation test failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_template_generation():
|
|
"""Test agent template loading and config merging"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] template_generation")
|
|
|
|
try:
|
|
pipeline_module = load_pipeline_module()
|
|
template = pipeline_module.AgentTemplate("default")
|
|
if template.config.get("tier") == 0:
|
|
log("Default template tier is 0", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Default template tier mismatch", "fail")
|
|
FAILED += 1
|
|
|
|
allowed_actions = template.config.get("allowed_actions", [])
|
|
if "read_docs" in allowed_actions:
|
|
log("Default template includes read_docs", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Default template missing read_docs", "fail")
|
|
FAILED += 1
|
|
|
|
instance = template.instantiate({"role": "observer", "extra": "value"})
|
|
if instance.get("role") == "observer" and instance.get("extra") == "value":
|
|
log("Template instantiation merges overrides", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Template instantiation missing overrides", "fail")
|
|
FAILED += 1
|
|
|
|
except Exception as e:
|
|
log(f"Template generation test failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_execution():
|
|
"""Test pipeline executor with a minimal pipeline"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] test_execution")
|
|
|
|
try:
|
|
import asyncio
|
|
|
|
pipeline_module = load_pipeline_module()
|
|
pipeline_def = {
|
|
"name": "execution-test",
|
|
"version": "1.0.0",
|
|
"stages": [
|
|
{"name": "plan", "type": "agent", "agent": {"template": "default"}}
|
|
],
|
|
}
|
|
|
|
executor = pipeline_module.PipelineExecutor()
|
|
success, result = asyncio.run(executor.execute(pipeline_def))
|
|
|
|
if success and "plan" in result.get("stages", {}):
|
|
log("Pipeline executor completed minimal run", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Pipeline executor failed minimal run", "fail")
|
|
FAILED += 1
|
|
|
|
except Exception as e:
|
|
log(f"Pipeline execution test failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_multi_agent_coordination():
|
|
"""Test multi-agent coordination mechanisms"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] multi_agent_coordination")
|
|
|
|
multi_agent_path = AGENTS_PATH / "multi-agent"
|
|
|
|
# 1. Check multi-agent directory exists
|
|
if not multi_agent_path.exists():
|
|
log(f"Multi-agent directory not found: {multi_agent_path}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Multi-agent directory exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 2. Check TypeScript files exist
|
|
ts_files = ["orchestrator.ts", "agents.ts", "coordination.ts", "types.ts"]
|
|
for ts_file in ts_files:
|
|
file_path = multi_agent_path / ts_file
|
|
if file_path.exists():
|
|
log(f"{ts_file} exists", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"{ts_file} missing", "fail")
|
|
FAILED += 1
|
|
|
|
# 3. Check package.json
|
|
package_json = multi_agent_path / "package.json"
|
|
if package_json.exists():
|
|
try:
|
|
with open(package_json) as f:
|
|
pkg = json.load(f)
|
|
log(f"package.json valid (name: {pkg.get('name', 'N/A')})", "pass")
|
|
PASSED += 1
|
|
except json.JSONDecodeError:
|
|
log("package.json invalid JSON", "fail")
|
|
FAILED += 1
|
|
else:
|
|
log("package.json missing", "fail")
|
|
FAILED += 1
|
|
|
|
# 4. Check node_modules installed
|
|
node_modules = multi_agent_path / "node_modules"
|
|
if node_modules.exists() and node_modules.is_dir():
|
|
module_count = len(list(node_modules.iterdir()))
|
|
log(f"node_modules installed ({module_count} packages)", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("node_modules not installed", "fail")
|
|
FAILED += 1
|
|
|
|
# 5. Check coordination patterns in coordination.ts
|
|
coordination_ts = multi_agent_path / "coordination.ts"
|
|
if coordination_ts.exists():
|
|
content = coordination_ts.read_text()
|
|
|
|
patterns = [
|
|
("class", "coordination class defined"),
|
|
("async", "async patterns used"),
|
|
("Promise", "Promise-based coordination"),
|
|
]
|
|
|
|
for pattern, desc in patterns:
|
|
if pattern in content:
|
|
log(f"{desc}", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"{desc} - not found", "info")
|
|
|
|
# 6. Check orchestrator patterns in orchestrator.ts
|
|
orchestrator_ts = multi_agent_path / "orchestrator.ts"
|
|
if orchestrator_ts.exists():
|
|
content = orchestrator_ts.read_text()
|
|
|
|
if "Orchestrator" in content:
|
|
log("Orchestrator class defined", "pass")
|
|
PASSED += 1
|
|
if "delegate" in content.lower() or "dispatch" in content.lower():
|
|
log("Delegation/dispatch pattern found", "pass")
|
|
PASSED += 1
|
|
|
|
# 7. Check agent registry in agents.ts
|
|
agents_ts = multi_agent_path / "agents.ts"
|
|
if agents_ts.exists():
|
|
content = agents_ts.read_text()
|
|
|
|
if "Agent" in content:
|
|
log("Agent definitions found", "pass")
|
|
PASSED += 1
|
|
if "register" in content.lower() or "Registry" in content:
|
|
log("Agent registry pattern found", "pass")
|
|
PASSED += 1
|
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
"""Run all Phase 6 tests"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n" + "=" * 60)
|
|
print("PHASE 6: MULTI-AGENT ORCHESTRATION TESTS")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
test_model_controller()
|
|
test_pipeline_parser()
|
|
test_pipeline_validation()
|
|
test_pipeline_execution()
|
|
test_template_generation()
|
|
test_execution()
|
|
test_multi_agent_coordination()
|
|
except Exception as e:
|
|
print(f"\n\033[91mTest execution error: {e}\033[0m")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
FAILED += 1
|
|
|
|
print("\n" + "=" * 60)
|
|
print(f"RESULTS: {PASSED} passed, {FAILED} failed")
|
|
print("=" * 60 + "\n")
|
|
|
|
return FAILED == 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = main()
|
|
sys.exit(0 if success else 1)
|