agent-governance/tests/governance/test_phase7_monitoring.py
profit 8c6e7831e9 Add Phase 10-12 implementation: multi-tenant, marketplace, observability
Major additions:
- marketplace/: Agent template registry with FTS5 search, ratings, versioning
- observability/: Prometheus metrics, distributed tracing, structured logging
- ledger/migrations/: Database migration scripts for multi-tenant support
- tests/governance/: 15 new test files for phases 6-12 (295 total tests)
- bin/validate-phases: Full 12-phase validation script

New features:
- Multi-tenant support with tenant isolation and quota enforcement
- Agent marketplace with semantic versioning and search
- Observability with metrics, tracing, and log correlation
- Tier-1 agent bootstrap scripts

Updated components:
- ledger/api.py: Extended API for tenants, marketplace, observability
- ledger/schema.sql: Added tenant, project, marketplace tables
- testing/framework.ts: Enhanced test framework
- checkpoint/checkpoint.py: Improved checkpoint management

Archived:
- External integrations (Slack/GitHub/PagerDuty) moved to .archive/
- Old checkpoint files cleaned up

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 18:39:47 -05:00

608 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Phase 7: Monitoring & Learning Tests
=====================================
Tests for learning system, memory layer, and monitoring infrastructure.
Required tests:
- learning_system: Verify agent statistics and pattern detection
- memory_layer: Verify memory storage and retrieval
- memory_storage: Verify memory persistence helpers
- learning_patterns: Verify pattern detection helpers
- monitors: Verify monitoring infrastructure
- analytics: Verify analytics data collection
- team_coordination: Verify hierarchical team workflow structures
"""
import json
import os
import sys
import sqlite3
from pathlib import Path
from datetime import datetime
# Add paths
BASE_PATH = Path("/opt/agent-governance")
ANALYTICS_PATH = BASE_PATH / "analytics"
MEMORY_PATH = BASE_PATH / "memory"
RUNTIME_PATH = BASE_PATH / "runtime"
sys.path.insert(0, str(ANALYTICS_PATH))
sys.path.insert(0, str(MEMORY_PATH))
sys.path.insert(0, str(RUNTIME_PATH))
# Test results
PASSED = 0
FAILED = 0
def log(msg: str, status: str = "info"):
"""Log a message"""
icons = {"pass": "\033[92m✓\033[0m", "fail": "\033[91m✗\033[0m", "info": ""}
print(f" {icons.get(status, '')} {msg}")
def load_module(name: str, path: Path):
import importlib.util
import sys
spec = importlib.util.spec_from_file_location(name, path)
if spec is None or spec.loader is None:
raise ImportError(f"Module spec missing for {name}")
module = importlib.util.module_from_spec(spec)
sys.modules[name] = module
spec.loader.exec_module(module)
return module
def test_learning_system():
"""Test learning from history system"""
global PASSED, FAILED
print("\n[TEST] learning_system")
# 1. Check learning module exists
learning_module = ANALYTICS_PATH / "learning.py"
if not learning_module.exists():
log(f"Learning module not found: {learning_module}", "fail")
FAILED += 1
return False
log("Learning module exists", "pass")
PASSED += 1
# 2. Try importing the module
try:
learning = load_module("learning", learning_module)
AgentStats = learning.AgentStats
Pattern = learning.Pattern
HistoryAnalyzer = learning.HistoryAnalyzer
log("Learning classes importable", "pass")
PASSED += 1
# 3. Test AgentStats dataclass
stats = AgentStats(
agent_id="test-agent-001",
total_actions=100,
successful_actions=95,
failed_actions=5,
)
if stats.agent_id == "test-agent-001":
log("AgentStats instantiation works", "pass")
PASSED += 1
if stats.successful_actions == 95:
log("AgentStats fields accessible", "pass")
PASSED += 1
# 4. Test Pattern dataclass
pattern = Pattern(
pattern_type="success_streak",
description="Agent has high success rate",
frequency=10,
confidence=0.95,
agents_affected=["agent-001"],
recommendation="Consider promotion",
)
if pattern.confidence == 0.95:
log("Pattern instantiation works", "pass")
PASSED += 1
# 5. Test HistoryAnalyzer instantiation
try:
analyzer = HistoryAnalyzer()
log("HistoryAnalyzer instantiated", "pass")
PASSED += 1
# 6. Check analyzer has analysis methods
if hasattr(analyzer, "analyze_agent") or hasattr(
analyzer, "get_agent_stats"
):
log("HistoryAnalyzer has analysis methods", "pass")
PASSED += 1
if hasattr(analyzer, "detect_patterns") or hasattr(
analyzer, "find_patterns"
):
log("HistoryAnalyzer has pattern detection", "pass")
PASSED += 1
if hasattr(analyzer, "get_recommendations") or hasattr(analyzer, "predict"):
log("HistoryAnalyzer has recommendations/predictions", "pass")
PASSED += 1
except Exception as e:
log(f"HistoryAnalyzer instantiation: {e}", "info")
except ImportError as e:
log(f"Learning import failed: {e}", "fail")
FAILED += 1
return False
except Exception as e:
log(f"Learning test error: {e}", "fail")
FAILED += 1
return True
def test_learning_patterns():
"""Test learning pattern detection helpers"""
global PASSED, FAILED
print("\n[TEST] learning_patterns")
try:
learning_module = ANALYTICS_PATH / "learning.py"
if not learning_module.exists():
log(f"Learning module not found: {learning_module}", "fail")
FAILED += 1
return False
content = learning_module.read_text()
if "pattern" in content.lower():
log("Learning module references pattern analysis", "pass")
PASSED += 1
else:
log("Learning module missing pattern references", "fail")
FAILED += 1
learning = load_module("learning", learning_module)
analyzer = learning.HistoryAnalyzer()
has_patterns = any(
hasattr(analyzer, name)
for name in ["detect_patterns", "find_patterns", "pattern_analysis"]
)
if has_patterns:
log("HistoryAnalyzer exposes pattern detection", "pass")
PASSED += 1
else:
log("HistoryAnalyzer missing pattern detection helpers", "fail")
FAILED += 1
except Exception as e:
log(f"Learning patterns test failed: {e}", "fail")
FAILED += 1
return True
def test_memory_layer():
"""Test external memory layer"""
global PASSED, FAILED
print("\n[TEST] memory_layer")
# 1. Check memory module exists
memory_module = MEMORY_PATH / "memory.py"
if not memory_module.exists():
log(f"Memory module not found: {memory_module}", "fail")
FAILED += 1
return False
log("Memory module exists", "pass")
PASSED += 1
# 2. Check memory directories exist
chunks_dir = MEMORY_PATH / "chunks"
summaries_dir = MEMORY_PATH / "summaries"
if chunks_dir.exists():
log("Chunks directory exists", "pass")
PASSED += 1
else:
log("Chunks directory missing (may be created on first use)", "info")
if summaries_dir.exists():
log("Summaries directory exists", "pass")
PASSED += 1
else:
log("Summaries directory missing (may be created on first use)", "info")
# 3. Try importing memory module
try:
memory = load_module("memory", memory_module)
MemoryEntry = memory.MemoryEntry
MemoryType = memory.MemoryType
MemoryManager = memory.MemoryManager
log("Memory classes importable", "pass")
PASSED += 1
# 4. Test MemoryType enum
memory_types = [e.value for e in MemoryType]
expected_types = ["output", "transcript", "summary"]
for mtype in expected_types:
if mtype in memory_types:
log(f"MemoryType.{mtype} exists", "pass")
PASSED += 1
else:
log(f"MemoryType.{mtype} missing", "info")
# 5. Test MemoryManager instantiation
try:
manager = MemoryManager()
log("MemoryManager instantiated", "pass")
PASSED += 1
# Check manager has core methods
methods = ["store", "fetch", "summarize", "list_entries"]
for method in methods:
if hasattr(manager, method):
log(f"MemoryManager.{method}() exists", "pass")
PASSED += 1
# 6. Test storing a small entry
test_content = "Test memory entry for Phase 7 testing"
try:
entry = manager.store(
content=test_content,
type=MemoryType.OUTPUT,
context={"source": "phase7_test"},
)
if entry and getattr(entry, "id", None):
log(f"Memory store works: {entry.id[:20]}...", "pass")
PASSED += 1
# 7. Test fetching
fetched = manager.fetch(entry.id)
if fetched and test_content in str(fetched):
log("Memory fetch works", "pass")
PASSED += 1
except Exception as e:
log(f"Memory store/fetch: {e}", "info")
except Exception as e:
log(f"MemoryManager instantiation: {e}", "info")
except ImportError as e:
log(f"Memory import failed: {e}", "fail")
FAILED += 1
return False
# 8. Check memory database
memory_db = MEMORY_PATH / "memory.db"
if memory_db.exists():
log("Memory database exists", "pass")
PASSED += 1
try:
conn = sqlite3.connect(memory_db)
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = [row[0] for row in cursor.fetchall()]
if "memory_entries" in tables or "entries" in tables:
log("Memory entries table exists", "pass")
PASSED += 1
conn.close()
except Exception as e:
log(f"Memory DB check: {e}", "info")
else:
log("Memory database not created yet", "info")
return True
def test_memory_storage():
"""Test memory persistence helpers"""
global PASSED, FAILED
print("\n[TEST] memory_storage")
try:
memory = load_module("memory", MEMORY_PATH / "memory.py")
manager = memory.MemoryManager()
entry = manager.store(
content="Phase 7 memory persistence test",
type=memory.MemoryType.OUTPUT,
context={"source": "phase7_storage_test"},
)
if entry and getattr(entry, "id", None):
log("MemoryManager.store returned entry id", "pass")
PASSED += 1
fetched = manager.fetch(entry.id)
if fetched:
log("MemoryManager.fetch returned stored entry", "pass")
PASSED += 1
else:
log("MemoryManager.fetch missing stored entry", "fail")
FAILED += 1
else:
log("MemoryManager.store returned empty id", "fail")
FAILED += 1
except Exception as e:
log(f"Memory storage test failed: {e}", "fail")
FAILED += 1
return True
def test_monitors():
"""Test monitoring infrastructure"""
global PASSED, FAILED
print("\n[TEST] monitors")
# 1. Check monitors module exists
monitors_module = RUNTIME_PATH / "monitors.py"
if not monitors_module.exists():
log(f"Monitors module not found: {monitors_module}", "fail")
FAILED += 1
return False
log("Monitors module exists", "pass")
PASSED += 1
# 2. Try importing monitors
try:
monitors = load_module("monitors", monitors_module)
log("Monitors module importable", "pass")
PASSED += 1
# 3. Check for monitor classes
monitor_classes = [
"ResourceMonitor",
"ActionMonitor",
"ComplianceMonitor",
"Monitor",
]
found_monitors = 0
for cls_name in monitor_classes:
if hasattr(monitors, cls_name):
log(f"{cls_name} class exists", "pass")
PASSED += 1
found_monitors += 1
if found_monitors == 0:
log("No standard monitor classes found (may use different naming)", "info")
# 4. Check for monitoring functions
monitor_functions = [
"start_monitoring",
"stop_monitoring",
"check_resources",
"log_action",
]
for func_name in monitor_functions:
if hasattr(monitors, func_name):
log(f"{func_name}() function exists", "pass")
PASSED += 1
except Exception as e:
log(f"Monitors import/check: {e}", "fail")
FAILED += 1
# 5. Check governance.py for monitoring integration
governance_module = RUNTIME_PATH / "governance.py"
if governance_module.exists():
content = governance_module.read_text()
if "monitor" in content.lower():
log("Governance has monitoring integration", "pass")
PASSED += 1
if "heartbeat" in content.lower():
log("Governance has heartbeat tracking", "pass")
PASSED += 1
if "metrics" in content.lower():
log("Governance has metrics collection", "pass")
PASSED += 1
return True
def test_team_coordination():
"""Test hierarchical team coordination structures"""
global PASSED, FAILED
print("\n[TEST] team_coordination")
team_module = BASE_PATH / "teams" / "framework" / "team.py"
if not team_module.exists():
log(f"Team framework not found: {team_module}", "fail")
FAILED += 1
return False
log("Team framework module exists", "pass")
PASSED += 1
try:
team = load_module("team", team_module)
lead = team.TeamLead("phase7-test", {"description": "Test objective"})
if isinstance(lead, team.TeamLead):
log("TeamLead instantiation works", "pass")
PASSED += 1
if team.TeamRole.RESEARCH.value == "research":
log("TeamRole enum contains research", "pass")
PASSED += 1
research_team = lead.spawn_team(team.TeamRole.RESEARCH)
if research_team and research_team.members:
log("Spawned research team with members", "pass")
PASSED += 1
except Exception as e:
log(f"Team coordination test failed: {e}", "fail")
FAILED += 1
return True
def test_analytics():
"""Test analytics data collection"""
global PASSED, FAILED
print("\n[TEST] analytics")
# 1. Check analytics directory exists
if not ANALYTICS_PATH.exists():
log(f"Analytics directory not found: {ANALYTICS_PATH}", "fail")
FAILED += 1
return False
log("Analytics directory exists", "pass")
PASSED += 1
# 2. List analytics modules
py_files = list(ANALYTICS_PATH.glob("*.py"))
log(f"Analytics has {len(py_files)} Python modules", "pass")
PASSED += 1
# 3. Check governance ledger for analytics data
ledger_path = BASE_PATH / "ledger" / "governance.db"
if ledger_path.exists():
try:
conn = sqlite3.connect(ledger_path)
cursor = conn.cursor()
# Check agent_actions table (analytics source)
cursor.execute("SELECT COUNT(*) FROM agent_actions")
action_count = cursor.fetchone()[0]
log(f"Ledger has {action_count} agent actions logged", "pass")
PASSED += 1
# Check agent_metrics table
cursor.execute("SELECT COUNT(*) FROM agent_metrics")
metrics_count = cursor.fetchone()[0]
log(f"Ledger has {metrics_count} agent metrics records", "pass")
PASSED += 1
# 4. Check for analytics-relevant columns
cursor.execute("PRAGMA table_info(agent_actions)")
columns = [row[1] for row in cursor.fetchall()]
analytics_columns = ["confidence", "success", "timestamp", "action"]
found = sum(1 for c in analytics_columns if c in columns)
log(
f"agent_actions has {found}/{len(analytics_columns)} analytics columns",
"pass",
)
PASSED += 1
# 5. Check for time-series data
cursor.execute("""
SELECT DATE(timestamp) as day, COUNT(*) as count
FROM agent_actions
GROUP BY DATE(timestamp)
ORDER BY day DESC
LIMIT 5
""")
daily_data = cursor.fetchall()
if daily_data:
log(f"Time-series data available ({len(daily_data)} days)", "pass")
PASSED += 1
# 6. Check for action distribution
cursor.execute("""
SELECT action, COUNT(*) as count
FROM agent_actions
GROUP BY action
ORDER BY count DESC
LIMIT 5
""")
action_dist = cursor.fetchall()
if action_dist:
log(f"Action distribution: {len(action_dist)} action types", "pass")
PASSED += 1
# 7. Check agent_metrics for promotion tracking
cursor.execute("PRAGMA table_info(agent_metrics)")
metric_columns = [row[1] for row in cursor.fetchall()]
promotion_columns = [
"compliant_runs",
"consecutive_compliant",
"current_tier",
]
found = sum(1 for c in promotion_columns if c in metric_columns)
log(
f"agent_metrics has {found}/{len(promotion_columns)} promotion columns",
"pass",
)
PASSED += 1
conn.close()
except Exception as e:
log(f"Ledger analytics check: {e}", "fail")
FAILED += 1
else:
log("Governance ledger not found", "fail")
FAILED += 1
# 8. Check for learning.py analytics integration
learning_module = ANALYTICS_PATH / "learning.py"
if learning_module.exists():
content = learning_module.read_text()
if "agent_actions" in content:
log("Learning integrates with agent_actions", "pass")
PASSED += 1
if "statistics" in content or "stats" in content:
log("Learning has statistical analysis", "pass")
PASSED += 1
if "pattern" in content.lower():
log("Learning has pattern detection", "pass")
PASSED += 1
return True
def main():
"""Run all Phase 7 tests"""
global PASSED, FAILED
print("\n" + "=" * 60)
print("PHASE 7: MONITORING & LEARNING TESTS")
print("=" * 60)
try:
test_learning_system()
test_learning_patterns()
test_memory_layer()
test_memory_storage()
test_monitors()
test_analytics()
test_team_coordination()
except Exception as e:
print(f"\n\033[91mTest execution error: {e}\033[0m")
import traceback
traceback.print_exc()
FAILED += 1
print("\n" + "=" * 60)
print(f"RESULTS: {PASSED} passed, {FAILED} failed")
print("=" * 60 + "\n")
return FAILED == 0
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)