Major additions: - marketplace/: Agent template registry with FTS5 search, ratings, versioning - observability/: Prometheus metrics, distributed tracing, structured logging - ledger/migrations/: Database migration scripts for multi-tenant support - tests/governance/: 15 new test files for phases 6-12 (295 total tests) - bin/validate-phases: Full 12-phase validation script New features: - Multi-tenant support with tenant isolation and quota enforcement - Agent marketplace with semantic versioning and search - Observability with metrics, tracing, and log correlation - Tier-1 agent bootstrap scripts Updated components: - ledger/api.py: Extended API for tenants, marketplace, observability - ledger/schema.sql: Added tenant, project, marketplace tables - testing/framework.ts: Enhanced test framework - checkpoint/checkpoint.py: Improved checkpoint management Archived: - External integrations (Slack/GitHub/PagerDuty) moved to .archive/ - Old checkpoint files cleaned up Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
608 lines
18 KiB
Python
608 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Phase 7: Monitoring & Learning Tests
|
|
=====================================
|
|
Tests for learning system, memory layer, and monitoring infrastructure.
|
|
|
|
Required tests:
|
|
- learning_system: Verify agent statistics and pattern detection
|
|
- memory_layer: Verify memory storage and retrieval
|
|
- memory_storage: Verify memory persistence helpers
|
|
- learning_patterns: Verify pattern detection helpers
|
|
- monitors: Verify monitoring infrastructure
|
|
- analytics: Verify analytics data collection
|
|
- team_coordination: Verify hierarchical team workflow structures
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import sqlite3
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
# Add paths
|
|
BASE_PATH = Path("/opt/agent-governance")
|
|
ANALYTICS_PATH = BASE_PATH / "analytics"
|
|
MEMORY_PATH = BASE_PATH / "memory"
|
|
RUNTIME_PATH = BASE_PATH / "runtime"
|
|
|
|
sys.path.insert(0, str(ANALYTICS_PATH))
|
|
sys.path.insert(0, str(MEMORY_PATH))
|
|
sys.path.insert(0, str(RUNTIME_PATH))
|
|
|
|
# Test results
|
|
PASSED = 0
|
|
FAILED = 0
|
|
|
|
|
|
def log(msg: str, status: str = "info"):
|
|
"""Log a message"""
|
|
icons = {"pass": "\033[92m✓\033[0m", "fail": "\033[91m✗\033[0m", "info": "→"}
|
|
print(f" {icons.get(status, '•')} {msg}")
|
|
|
|
|
|
def load_module(name: str, path: Path):
|
|
import importlib.util
|
|
import sys
|
|
|
|
spec = importlib.util.spec_from_file_location(name, path)
|
|
if spec is None or spec.loader is None:
|
|
raise ImportError(f"Module spec missing for {name}")
|
|
module = importlib.util.module_from_spec(spec)
|
|
sys.modules[name] = module
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
|
|
def test_learning_system():
|
|
"""Test learning from history system"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] learning_system")
|
|
|
|
# 1. Check learning module exists
|
|
learning_module = ANALYTICS_PATH / "learning.py"
|
|
if not learning_module.exists():
|
|
log(f"Learning module not found: {learning_module}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Learning module exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 2. Try importing the module
|
|
try:
|
|
learning = load_module("learning", learning_module)
|
|
AgentStats = learning.AgentStats
|
|
Pattern = learning.Pattern
|
|
HistoryAnalyzer = learning.HistoryAnalyzer
|
|
|
|
log("Learning classes importable", "pass")
|
|
PASSED += 1
|
|
|
|
# 3. Test AgentStats dataclass
|
|
stats = AgentStats(
|
|
agent_id="test-agent-001",
|
|
total_actions=100,
|
|
successful_actions=95,
|
|
failed_actions=5,
|
|
)
|
|
if stats.agent_id == "test-agent-001":
|
|
log("AgentStats instantiation works", "pass")
|
|
PASSED += 1
|
|
if stats.successful_actions == 95:
|
|
log("AgentStats fields accessible", "pass")
|
|
PASSED += 1
|
|
|
|
# 4. Test Pattern dataclass
|
|
pattern = Pattern(
|
|
pattern_type="success_streak",
|
|
description="Agent has high success rate",
|
|
frequency=10,
|
|
confidence=0.95,
|
|
agents_affected=["agent-001"],
|
|
recommendation="Consider promotion",
|
|
)
|
|
if pattern.confidence == 0.95:
|
|
log("Pattern instantiation works", "pass")
|
|
PASSED += 1
|
|
|
|
# 5. Test HistoryAnalyzer instantiation
|
|
try:
|
|
analyzer = HistoryAnalyzer()
|
|
log("HistoryAnalyzer instantiated", "pass")
|
|
PASSED += 1
|
|
|
|
# 6. Check analyzer has analysis methods
|
|
if hasattr(analyzer, "analyze_agent") or hasattr(
|
|
analyzer, "get_agent_stats"
|
|
):
|
|
log("HistoryAnalyzer has analysis methods", "pass")
|
|
PASSED += 1
|
|
|
|
if hasattr(analyzer, "detect_patterns") or hasattr(
|
|
analyzer, "find_patterns"
|
|
):
|
|
log("HistoryAnalyzer has pattern detection", "pass")
|
|
PASSED += 1
|
|
|
|
if hasattr(analyzer, "get_recommendations") or hasattr(analyzer, "predict"):
|
|
log("HistoryAnalyzer has recommendations/predictions", "pass")
|
|
PASSED += 1
|
|
except Exception as e:
|
|
log(f"HistoryAnalyzer instantiation: {e}", "info")
|
|
|
|
except ImportError as e:
|
|
log(f"Learning import failed: {e}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
except Exception as e:
|
|
log(f"Learning test error: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_learning_patterns():
|
|
"""Test learning pattern detection helpers"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] learning_patterns")
|
|
|
|
try:
|
|
learning_module = ANALYTICS_PATH / "learning.py"
|
|
if not learning_module.exists():
|
|
log(f"Learning module not found: {learning_module}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
content = learning_module.read_text()
|
|
if "pattern" in content.lower():
|
|
log("Learning module references pattern analysis", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Learning module missing pattern references", "fail")
|
|
FAILED += 1
|
|
|
|
learning = load_module("learning", learning_module)
|
|
analyzer = learning.HistoryAnalyzer()
|
|
has_patterns = any(
|
|
hasattr(analyzer, name)
|
|
for name in ["detect_patterns", "find_patterns", "pattern_analysis"]
|
|
)
|
|
if has_patterns:
|
|
log("HistoryAnalyzer exposes pattern detection", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("HistoryAnalyzer missing pattern detection helpers", "fail")
|
|
FAILED += 1
|
|
except Exception as e:
|
|
log(f"Learning patterns test failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_memory_layer():
|
|
"""Test external memory layer"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] memory_layer")
|
|
|
|
# 1. Check memory module exists
|
|
memory_module = MEMORY_PATH / "memory.py"
|
|
if not memory_module.exists():
|
|
log(f"Memory module not found: {memory_module}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Memory module exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 2. Check memory directories exist
|
|
chunks_dir = MEMORY_PATH / "chunks"
|
|
summaries_dir = MEMORY_PATH / "summaries"
|
|
|
|
if chunks_dir.exists():
|
|
log("Chunks directory exists", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Chunks directory missing (may be created on first use)", "info")
|
|
|
|
if summaries_dir.exists():
|
|
log("Summaries directory exists", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("Summaries directory missing (may be created on first use)", "info")
|
|
|
|
# 3. Try importing memory module
|
|
try:
|
|
memory = load_module("memory", memory_module)
|
|
MemoryEntry = memory.MemoryEntry
|
|
MemoryType = memory.MemoryType
|
|
MemoryManager = memory.MemoryManager
|
|
|
|
log("Memory classes importable", "pass")
|
|
PASSED += 1
|
|
|
|
# 4. Test MemoryType enum
|
|
memory_types = [e.value for e in MemoryType]
|
|
expected_types = ["output", "transcript", "summary"]
|
|
for mtype in expected_types:
|
|
if mtype in memory_types:
|
|
log(f"MemoryType.{mtype} exists", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log(f"MemoryType.{mtype} missing", "info")
|
|
|
|
# 5. Test MemoryManager instantiation
|
|
try:
|
|
manager = MemoryManager()
|
|
log("MemoryManager instantiated", "pass")
|
|
PASSED += 1
|
|
|
|
# Check manager has core methods
|
|
methods = ["store", "fetch", "summarize", "list_entries"]
|
|
for method in methods:
|
|
if hasattr(manager, method):
|
|
log(f"MemoryManager.{method}() exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 6. Test storing a small entry
|
|
test_content = "Test memory entry for Phase 7 testing"
|
|
try:
|
|
entry = manager.store(
|
|
content=test_content,
|
|
type=MemoryType.OUTPUT,
|
|
context={"source": "phase7_test"},
|
|
)
|
|
if entry and getattr(entry, "id", None):
|
|
log(f"Memory store works: {entry.id[:20]}...", "pass")
|
|
PASSED += 1
|
|
|
|
# 7. Test fetching
|
|
fetched = manager.fetch(entry.id)
|
|
if fetched and test_content in str(fetched):
|
|
log("Memory fetch works", "pass")
|
|
PASSED += 1
|
|
except Exception as e:
|
|
log(f"Memory store/fetch: {e}", "info")
|
|
|
|
except Exception as e:
|
|
log(f"MemoryManager instantiation: {e}", "info")
|
|
|
|
except ImportError as e:
|
|
log(f"Memory import failed: {e}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
# 8. Check memory database
|
|
memory_db = MEMORY_PATH / "memory.db"
|
|
if memory_db.exists():
|
|
log("Memory database exists", "pass")
|
|
PASSED += 1
|
|
|
|
try:
|
|
conn = sqlite3.connect(memory_db)
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
tables = [row[0] for row in cursor.fetchall()]
|
|
|
|
if "memory_entries" in tables or "entries" in tables:
|
|
log("Memory entries table exists", "pass")
|
|
PASSED += 1
|
|
conn.close()
|
|
except Exception as e:
|
|
log(f"Memory DB check: {e}", "info")
|
|
else:
|
|
log("Memory database not created yet", "info")
|
|
|
|
return True
|
|
|
|
|
|
def test_memory_storage():
|
|
"""Test memory persistence helpers"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] memory_storage")
|
|
|
|
try:
|
|
memory = load_module("memory", MEMORY_PATH / "memory.py")
|
|
manager = memory.MemoryManager()
|
|
entry = manager.store(
|
|
content="Phase 7 memory persistence test",
|
|
type=memory.MemoryType.OUTPUT,
|
|
context={"source": "phase7_storage_test"},
|
|
)
|
|
|
|
if entry and getattr(entry, "id", None):
|
|
log("MemoryManager.store returned entry id", "pass")
|
|
PASSED += 1
|
|
fetched = manager.fetch(entry.id)
|
|
if fetched:
|
|
log("MemoryManager.fetch returned stored entry", "pass")
|
|
PASSED += 1
|
|
else:
|
|
log("MemoryManager.fetch missing stored entry", "fail")
|
|
FAILED += 1
|
|
else:
|
|
log("MemoryManager.store returned empty id", "fail")
|
|
FAILED += 1
|
|
except Exception as e:
|
|
log(f"Memory storage test failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_monitors():
|
|
"""Test monitoring infrastructure"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] monitors")
|
|
|
|
# 1. Check monitors module exists
|
|
monitors_module = RUNTIME_PATH / "monitors.py"
|
|
if not monitors_module.exists():
|
|
log(f"Monitors module not found: {monitors_module}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Monitors module exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 2. Try importing monitors
|
|
try:
|
|
monitors = load_module("monitors", monitors_module)
|
|
log("Monitors module importable", "pass")
|
|
PASSED += 1
|
|
|
|
# 3. Check for monitor classes
|
|
monitor_classes = [
|
|
"ResourceMonitor",
|
|
"ActionMonitor",
|
|
"ComplianceMonitor",
|
|
"Monitor",
|
|
]
|
|
|
|
found_monitors = 0
|
|
for cls_name in monitor_classes:
|
|
if hasattr(monitors, cls_name):
|
|
log(f"{cls_name} class exists", "pass")
|
|
PASSED += 1
|
|
found_monitors += 1
|
|
|
|
if found_monitors == 0:
|
|
log("No standard monitor classes found (may use different naming)", "info")
|
|
|
|
# 4. Check for monitoring functions
|
|
monitor_functions = [
|
|
"start_monitoring",
|
|
"stop_monitoring",
|
|
"check_resources",
|
|
"log_action",
|
|
]
|
|
|
|
for func_name in monitor_functions:
|
|
if hasattr(monitors, func_name):
|
|
log(f"{func_name}() function exists", "pass")
|
|
PASSED += 1
|
|
|
|
except Exception as e:
|
|
log(f"Monitors import/check: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
# 5. Check governance.py for monitoring integration
|
|
governance_module = RUNTIME_PATH / "governance.py"
|
|
if governance_module.exists():
|
|
content = governance_module.read_text()
|
|
|
|
if "monitor" in content.lower():
|
|
log("Governance has monitoring integration", "pass")
|
|
PASSED += 1
|
|
|
|
if "heartbeat" in content.lower():
|
|
log("Governance has heartbeat tracking", "pass")
|
|
PASSED += 1
|
|
|
|
if "metrics" in content.lower():
|
|
log("Governance has metrics collection", "pass")
|
|
PASSED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_team_coordination():
|
|
"""Test hierarchical team coordination structures"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] team_coordination")
|
|
|
|
team_module = BASE_PATH / "teams" / "framework" / "team.py"
|
|
if not team_module.exists():
|
|
log(f"Team framework not found: {team_module}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Team framework module exists", "pass")
|
|
PASSED += 1
|
|
|
|
try:
|
|
team = load_module("team", team_module)
|
|
lead = team.TeamLead("phase7-test", {"description": "Test objective"})
|
|
if isinstance(lead, team.TeamLead):
|
|
log("TeamLead instantiation works", "pass")
|
|
PASSED += 1
|
|
|
|
if team.TeamRole.RESEARCH.value == "research":
|
|
log("TeamRole enum contains research", "pass")
|
|
PASSED += 1
|
|
|
|
research_team = lead.spawn_team(team.TeamRole.RESEARCH)
|
|
if research_team and research_team.members:
|
|
log("Spawned research team with members", "pass")
|
|
PASSED += 1
|
|
except Exception as e:
|
|
log(f"Team coordination test failed: {e}", "fail")
|
|
FAILED += 1
|
|
|
|
return True
|
|
|
|
|
|
def test_analytics():
|
|
"""Test analytics data collection"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n[TEST] analytics")
|
|
|
|
# 1. Check analytics directory exists
|
|
if not ANALYTICS_PATH.exists():
|
|
log(f"Analytics directory not found: {ANALYTICS_PATH}", "fail")
|
|
FAILED += 1
|
|
return False
|
|
|
|
log("Analytics directory exists", "pass")
|
|
PASSED += 1
|
|
|
|
# 2. List analytics modules
|
|
py_files = list(ANALYTICS_PATH.glob("*.py"))
|
|
log(f"Analytics has {len(py_files)} Python modules", "pass")
|
|
PASSED += 1
|
|
|
|
# 3. Check governance ledger for analytics data
|
|
ledger_path = BASE_PATH / "ledger" / "governance.db"
|
|
if ledger_path.exists():
|
|
try:
|
|
conn = sqlite3.connect(ledger_path)
|
|
cursor = conn.cursor()
|
|
|
|
# Check agent_actions table (analytics source)
|
|
cursor.execute("SELECT COUNT(*) FROM agent_actions")
|
|
action_count = cursor.fetchone()[0]
|
|
log(f"Ledger has {action_count} agent actions logged", "pass")
|
|
PASSED += 1
|
|
|
|
# Check agent_metrics table
|
|
cursor.execute("SELECT COUNT(*) FROM agent_metrics")
|
|
metrics_count = cursor.fetchone()[0]
|
|
log(f"Ledger has {metrics_count} agent metrics records", "pass")
|
|
PASSED += 1
|
|
|
|
# 4. Check for analytics-relevant columns
|
|
cursor.execute("PRAGMA table_info(agent_actions)")
|
|
columns = [row[1] for row in cursor.fetchall()]
|
|
|
|
analytics_columns = ["confidence", "success", "timestamp", "action"]
|
|
found = sum(1 for c in analytics_columns if c in columns)
|
|
log(
|
|
f"agent_actions has {found}/{len(analytics_columns)} analytics columns",
|
|
"pass",
|
|
)
|
|
PASSED += 1
|
|
|
|
# 5. Check for time-series data
|
|
cursor.execute("""
|
|
SELECT DATE(timestamp) as day, COUNT(*) as count
|
|
FROM agent_actions
|
|
GROUP BY DATE(timestamp)
|
|
ORDER BY day DESC
|
|
LIMIT 5
|
|
""")
|
|
daily_data = cursor.fetchall()
|
|
if daily_data:
|
|
log(f"Time-series data available ({len(daily_data)} days)", "pass")
|
|
PASSED += 1
|
|
|
|
# 6. Check for action distribution
|
|
cursor.execute("""
|
|
SELECT action, COUNT(*) as count
|
|
FROM agent_actions
|
|
GROUP BY action
|
|
ORDER BY count DESC
|
|
LIMIT 5
|
|
""")
|
|
action_dist = cursor.fetchall()
|
|
if action_dist:
|
|
log(f"Action distribution: {len(action_dist)} action types", "pass")
|
|
PASSED += 1
|
|
|
|
# 7. Check agent_metrics for promotion tracking
|
|
cursor.execute("PRAGMA table_info(agent_metrics)")
|
|
metric_columns = [row[1] for row in cursor.fetchall()]
|
|
|
|
promotion_columns = [
|
|
"compliant_runs",
|
|
"consecutive_compliant",
|
|
"current_tier",
|
|
]
|
|
found = sum(1 for c in promotion_columns if c in metric_columns)
|
|
log(
|
|
f"agent_metrics has {found}/{len(promotion_columns)} promotion columns",
|
|
"pass",
|
|
)
|
|
PASSED += 1
|
|
|
|
conn.close()
|
|
|
|
except Exception as e:
|
|
log(f"Ledger analytics check: {e}", "fail")
|
|
FAILED += 1
|
|
else:
|
|
log("Governance ledger not found", "fail")
|
|
FAILED += 1
|
|
|
|
# 8. Check for learning.py analytics integration
|
|
learning_module = ANALYTICS_PATH / "learning.py"
|
|
if learning_module.exists():
|
|
content = learning_module.read_text()
|
|
|
|
if "agent_actions" in content:
|
|
log("Learning integrates with agent_actions", "pass")
|
|
PASSED += 1
|
|
|
|
if "statistics" in content or "stats" in content:
|
|
log("Learning has statistical analysis", "pass")
|
|
PASSED += 1
|
|
|
|
if "pattern" in content.lower():
|
|
log("Learning has pattern detection", "pass")
|
|
PASSED += 1
|
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
"""Run all Phase 7 tests"""
|
|
global PASSED, FAILED
|
|
|
|
print("\n" + "=" * 60)
|
|
print("PHASE 7: MONITORING & LEARNING TESTS")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
test_learning_system()
|
|
test_learning_patterns()
|
|
test_memory_layer()
|
|
test_memory_storage()
|
|
test_monitors()
|
|
test_analytics()
|
|
test_team_coordination()
|
|
except Exception as e:
|
|
print(f"\n\033[91mTest execution error: {e}\033[0m")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
FAILED += 1
|
|
|
|
print("\n" + "=" * 60)
|
|
print(f"RESULTS: {PASSED} passed, {FAILED} failed")
|
|
print("=" * 60 + "\n")
|
|
|
|
return FAILED == 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = main()
|
|
sys.exit(0 if success else 1)
|