agent-governance/tests/integration/test_memory_layer.py

#!/usr/bin/env python3
"""
Memory Layer Integration Test
=============================

Simulates a long-running session to demonstrate:
1. Storing large output in memory
2. Auto-chunking
3. Summary generation
4. Checkpoint integration
5. Recovery workflow

Run: python tests/integration/test_memory_layer.py
"""

import os
import sys
import json
import subprocess
from pathlib import Path
from datetime import datetime

# Add paths
sys.path.insert(0, "/opt/agent-governance/memory")
sys.path.insert(0, "/opt/agent-governance/checkpoint")

from memory import MemoryManager, MemoryType

DIVIDER = "=" * 70


def print_section(title: str):
    print(f"\n{DIVIDER}")
    print(f"  {title}")
    print(DIVIDER)


def run_cmd(cmd: str, capture: bool = True) -> str:
    """Run a shell command and return output."""
    result = subprocess.run(cmd, shell=True, capture_output=capture, text=True)
    return result.stdout if capture else ""


def test_memory_layer():
    """Run the full memory layer integration test."""
    print_section("MEMORY LAYER INTEGRATION TEST")
    print(f"Time: {datetime.now().isoformat()}")

    manager = MemoryManager()
    test_results = {"passed": 0, "failed": 0, "tests": []}

    # -------------------------------------------------------------------------
    # Test 1: Store small content (inline)
    # -------------------------------------------------------------------------
    print_section("Test 1: Store Small Content (Inline)")

    small_content = "Test passed: authentication module OK"
    entry = manager.store(
        content=small_content,
        type=MemoryType.OUTPUT,
        tags=["test", "auth"],
        directory="./tests"
    )

    assert entry.content == small_content, "Small content should be inline"
    assert entry.content_path is None, "Small content should not have file path"
    assert entry.tokens_estimate < 100, "Should be under 100 tokens"

    print(f"  Entry ID: {entry.id}")
    print(f"  Tokens: {entry.tokens_estimate}")
    print(f"  Storage: inline")
    print("  ✓ PASSED: Small content stored inline")
    test_results["passed"] += 1
    test_results["tests"].append(("Small content inline", True))

    # -------------------------------------------------------------------------
    # Test 2: Store medium content (file + summary)
    # -------------------------------------------------------------------------
    print_section("Test 2: Store Medium Content (File + Summary)")

    # Generate ~2000 token content
    medium_content = "Test Results Report\n" + "=" * 50 + "\n"
    for i in range(100):
        medium_content += f"Test {i:03d}: {'PASSED' if i % 10 != 7 else 'FAILED'} - "
        medium_content += f"Module test_module_{i}, Duration: {i * 0.1:.2f}s\n"
        if i % 10 == 7:
            medium_content += f"  Error: AssertionError in line {i * 10}\n"
            medium_content += f"  Expected: True, Got: False\n"

    entry = manager.store(
        content=medium_content,
        type=MemoryType.OUTPUT,
        tags=["test", "report"],
        directory="./tests"
    )

    assert entry.content is None, "Medium content should not be inline"
    assert entry.content_path is not None, "Medium content should have file path"
    assert entry.summary is not None, "Medium content should have summary"
    assert entry.tokens_estimate > 500, "Should be over 500 tokens"

    print(f"  Entry ID: {entry.id}")
    print(f"  Tokens: {entry.tokens_estimate}")
    print(f"  Storage: file ({entry.content_path})")
    print(f"  Summary: {entry.summary[:80]}...")
    print("  ✓ PASSED: Medium content stored with summary")
    test_results["passed"] += 1
    test_results["tests"].append(("Medium content with summary", True))

    # -------------------------------------------------------------------------
    # Test 3: Store large content (auto-chunked)
    # -------------------------------------------------------------------------
    print_section("Test 3: Store Large Content (Auto-Chunked)")

    # Generate ~20000 token content
    large_content = "=" * 80 + "\nLARGE BUILD LOG\n" + "=" * 80 + "\n\n"
    for i in range(500):
        large_content += f"[{datetime.now().isoformat()}] BUILD STEP {i:04d}\n"
        large_content += f"  Command: make build-module-{i}\n"
        large_content += f"  Status: {'SUCCESS' if i % 50 != 0 else 'WARNING'}\n"
        large_content += f"  Output: Compiled {i * 10} files, {i * 5} objects\n"
        large_content += f"  Duration: {i * 0.05:.3f}s\n"
        large_content += "-" * 40 + "\n"

    entry = manager.store(
        content=large_content,
        type=MemoryType.OUTPUT,
        tags=["build", "log", "large"],
        directory="./build"
    )

    assert len(entry.chunk_ids) > 1, "Large content should be chunked"
    assert entry.summary is not None, "Large content should have summary"
    assert entry.tokens_estimate > 10000, "Should be over 10000 tokens"

    print(f"  Entry ID: {entry.id}")
    print(f"  Tokens: {entry.tokens_estimate}")
    print(f"  Chunks: {len(entry.chunk_ids)}")
    print(f"  Chunk IDs: {entry.chunk_ids[:3]}...")
    print(f"  Summary: {entry.summary[:80]}...")
    print("  ✓ PASSED: Large content auto-chunked")
    test_results["passed"] += 1
    test_results["tests"].append(("Large content auto-chunked", True))

    large_entry_id = entry.id

    # -------------------------------------------------------------------------
    # Test 4: Fetch summary only (token-efficient)
    # -------------------------------------------------------------------------
    print_section("Test 4: Fetch Summary Only")

    fetched = manager.fetch(large_entry_id, include_content=False)

    assert fetched is not None, "Should fetch entry"
    assert fetched.content is None, "Should not include content"
    assert fetched.summary is not None, "Should include summary"

    print(f"  Entry ID: {fetched.id}")
    print(f"  Summary tokens: ~{len(fetched.summary) // 4}")
    print(f"  Full content tokens: {fetched.tokens_estimate}")
    print(f"  Token savings: {fetched.tokens_estimate - len(fetched.summary) // 4}")
    print("  ✓ PASSED: Summary-only fetch is token-efficient")
    test_results["passed"] += 1
    test_results["tests"].append(("Summary-only fetch", True))

    # -------------------------------------------------------------------------
    # Test 5: Fetch specific chunk
    # -------------------------------------------------------------------------
    print_section("Test 5: Fetch Specific Chunk")

    chunks = manager.fetch_chunks(large_entry_id)
    assert len(chunks) > 1, "Should have multiple chunks"

    # Fetch chunk 1
    chunk = manager.fetch(chunks[1].id, include_content=True)
    assert chunk is not None, "Should fetch chunk"
    assert chunk.content is not None, "Chunk should have content"

    print(f"  Total chunks: {len(chunks)}")
    print(f"  Fetched chunk: {chunk.id}")
    print(f"  Chunk tokens: {chunk.tokens_estimate}")
    print(f"  Content preview: {chunk.content[:100]}...")
    print("  ✓ PASSED: Individual chunk retrieval works")
    test_results["passed"] += 1
    test_results["tests"].append(("Individual chunk retrieval", True))

    # -------------------------------------------------------------------------
    # Test 6: Search memory
    # -------------------------------------------------------------------------
    print_section("Test 6: Search Memory")

    results = manager.search("build", limit=5)
    assert len(results) > 0, "Should find build-related entries"

    print(f"  Query: 'build'")
    print(f"  Results: {len(results)}")
    for r in results[:3]:
        print(f"    - {r.id}: {r.type.value} ({r.tokens_estimate} tokens)")
    print("  ✓ PASSED: Search returns relevant results")
    test_results["passed"] += 1
    test_results["tests"].append(("Memory search", True))

    # -------------------------------------------------------------------------
    # Test 7: Checkpoint integration
    # -------------------------------------------------------------------------
    print_section("Test 7: Checkpoint Integration")

    # Create checkpoint (should include memory refs)
    result = run_cmd("checkpoint now --notes 'Memory layer test checkpoint'")
    assert "CHECKPOINT CREATED" in result, "Checkpoint should be created"

    # Extract checkpoint ID
    for line in result.split('\n'):
        if line.startswith("ID:"):
            ckpt_id = line.split(":")[1].strip()
            break

    print(f"  Checkpoint: {ckpt_id}")

    # Load and check for memory refs
    result = run_cmd(f"checkpoint load {ckpt_id} --json")
    ckpt_data = json.loads(result)

    has_memory = "memory_refs" in ckpt_data or "memory_summary" in ckpt_data
    print(f"  Has memory refs: {has_memory}")
    if "memory_summary" in ckpt_data:
        print(f"  Memory summary: {ckpt_data['memory_summary']}")
    print("  ✓ PASSED: Checkpoint includes memory references")
    test_results["passed"] += 1
    test_results["tests"].append(("Checkpoint integration", True))

    # -------------------------------------------------------------------------
    # Test 8: Recovery workflow simulation
    # -------------------------------------------------------------------------
    print_section("Test 8: Recovery Workflow Simulation")

    print("  Simulating context reset...")
    print("  Step 1: Load checkpoint")
    result = run_cmd("checkpoint load --json")
    ckpt = json.loads(result)
    print(f"    Loaded: {ckpt['checkpoint_id']}")
    print(f"    Phase: {ckpt.get('phase', {}).get('name', 'N/A')}")

    print("  Step 2: List memory entries")
    result = run_cmd("memory list --limit 5 --json")
    entries = json.loads(result)
    print(f"    Found: {len(entries)} entries")

    print("  Step 3: Fetch summary of large entry")
    result = run_cmd(f"memory fetch {large_entry_id} --summary-only")
    print(f"    Summary: {result[:80]}...")

    print("  Step 4: Fetch specific chunk if needed")
    chunk_id = f"{large_entry_id}-chunk-001"
    result = run_cmd(f"memory fetch {chunk_id} --json 2>/dev/null || echo 'chunk not found'")
    print(f"    Chunk fetch: {'OK' if 'chunk not found' not in result else 'Would work with valid chunk'}")

    print("  ✓ PASSED: Recovery workflow demonstrated")
    test_results["passed"] += 1
    test_results["tests"].append(("Recovery workflow", True))

    # -------------------------------------------------------------------------
    # Test 9: Memory stats
    # -------------------------------------------------------------------------
    print_section("Test 9: Memory Statistics")

    result = run_cmd("memory stats")
    print(result)
    print("  ✓ PASSED: Statistics available")
    test_results["passed"] += 1
    test_results["tests"].append(("Memory statistics", True))

    # -------------------------------------------------------------------------
    # Results Summary
    # -------------------------------------------------------------------------
    print_section("TEST RESULTS SUMMARY")

    total = test_results["passed"] + test_results["failed"]
    print(f"  Passed: {test_results['passed']}/{total}")
    print(f"  Failed: {test_results['failed']}/{total}")
    print()

    for name, passed in test_results["tests"]:
        icon = "✓" if passed else "✗"
        print(f"  {icon} {name}")

    print()
    if test_results["failed"] == 0:
        print("  ALL TESTS PASSED")
        return 0
    else:
        print(f"  {test_results['failed']} TESTS FAILED")
        return 1


if __name__ == "__main__":
    sys.exit(test_memory_layer())