agent-governance/memory/memory.py

#!/usr/bin/env python3
"""
External Memory Layer
=====================

Persistent context memory system that integrates with checkpoint + STATUS.
Provides token-efficient storage and retrieval of large outputs, transcripts,
and summaries.

Features:
- Chunked output storage with unique IDs
- Automatic summarization of large content
- Links between checkpoints, STATUS files, and memory entries
- CLI for storing, fetching, and summarizing content

Architecture:
- SQLite database for metadata and indexes
- Filesystem for large content chunks
- Redis for hot/recent entries (optional)
"""

import os
import sys
import json
import hashlib
import sqlite3
import gzip
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, List, Dict, Any, Tuple
from enum import Enum
import textwrap

# =============================================================================
# Configuration
# =============================================================================

MEMORY_DIR = Path("/opt/agent-governance/memory")
MEMORY_DB = MEMORY_DIR / "memory.db"
CHUNKS_DIR = MEMORY_DIR / "chunks"
SUMMARIES_DIR = MEMORY_DIR / "summaries"

# Thresholds
DEFAULT_CHUNK_SIZE = 4000  # tokens (~16000 chars)
SUMMARY_THRESHOLD = 1000   # tokens - content over this gets summarized
MAX_INLINE_SIZE = 500      # tokens - content under this stays inline
CHARS_PER_TOKEN = 4        # rough estimate

# Retention
MAX_MEMORY_ENTRIES = 1000
MAX_CHUNK_AGE_DAYS = 30


class MemoryType(str, Enum):
    """Types of memory entries."""
    TRANSCRIPT = "transcript"      # Full conversation logs
    OUTPUT = "output"              # Command/tool outputs
    SUMMARY = "summary"            # Generated summaries
    CHUNK = "chunk"                # Large output chunk
    REFERENCE = "reference"        # Pointer to external content
    CONTEXT = "context"            # Saved context state


class MemoryStatus(str, Enum):
    """Status of memory entries."""
    ACTIVE = "active"
    ARCHIVED = "archived"
    EXPIRED = "expired"


# =============================================================================
# Data Classes
# =============================================================================

@dataclass
class MemoryEntry:
    """A single memory entry."""
    id: str
    type: MemoryType
    created_at: str

    # Content (either inline or chunked)
    content: Optional[str] = None
    content_path: Optional[str] = None  # Path to file if chunked

    # Metadata
    summary: Optional[str] = None
    tokens_estimate: int = 0
    chunk_ids: List[str] = field(default_factory=list)  # For multi-chunk entries

    # Links
    checkpoint_id: Optional[str] = None
    directory: Optional[str] = None
    parent_id: Optional[str] = None  # For chunks pointing to parent entry

    # Tags and status
    tags: List[str] = field(default_factory=list)
    status: MemoryStatus = MemoryStatus.ACTIVE

    # Context
    context: Dict[str, Any] = field(default_factory=dict)

    def to_dict(self) -> dict:
        return {
            "id": self.id,
            "type": self.type.value if isinstance(self.type, MemoryType) else self.type,
            "created_at": self.created_at,
            "content": self.content,
            "content_path": self.content_path,
            "summary": self.summary,
            "tokens_estimate": self.tokens_estimate,
            "chunk_ids": self.chunk_ids,
            "checkpoint_id": self.checkpoint_id,
            "directory": self.directory,
            "parent_id": self.parent_id,
            "tags": self.tags,
            "status": self.status.value if isinstance(self.status, MemoryStatus) else self.status,
            "context": self.context
        }

    @classmethod
    def from_dict(cls, data: dict) -> 'MemoryEntry':
        return cls(
            id=data["id"],
            type=MemoryType(data["type"]) if data.get("type") else MemoryType.OUTPUT,
            created_at=data["created_at"],
            content=data.get("content"),
            content_path=data.get("content_path"),
            summary=data.get("summary"),
            tokens_estimate=data.get("tokens_estimate", 0),
            chunk_ids=data.get("chunk_ids", []),
            checkpoint_id=data.get("checkpoint_id"),
            directory=data.get("directory"),
            parent_id=data.get("parent_id"),
            tags=data.get("tags", []),
            status=MemoryStatus(data["status"]) if data.get("status") else MemoryStatus.ACTIVE,
            context=data.get("context", {})
        )


@dataclass
class MemoryReference:
    """A lightweight reference to a memory entry (for embedding in prompts)."""
    id: str
    type: str
    summary: str
    tokens: int
    created_at: str

    def to_inline(self) -> str:
        """Format for inline prompt inclusion."""
        return f"[Memory:{self.id}] {self.summary} ({self.tokens} tokens)"


# =============================================================================
# Memory Manager
# =============================================================================

class MemoryManager:
    """
    Manages the external memory layer.
    """

    def __init__(self, db_path: Path = MEMORY_DB):
        self.db_path = db_path
        self.chunks_dir = CHUNKS_DIR
        self.summaries_dir = SUMMARIES_DIR

        # Ensure directories exist
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self.chunks_dir.mkdir(parents=True, exist_ok=True)
        self.summaries_dir.mkdir(parents=True, exist_ok=True)

        # Initialize database
        self._init_db()

        # Optional Redis connection
        self.redis = self._get_redis()

    def _init_db(self):
        """Initialize SQLite database schema."""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute("""
            CREATE TABLE IF NOT EXISTS memory_entries (
                id TEXT PRIMARY KEY,
                type TEXT NOT NULL,
                created_at TEXT NOT NULL,
                content TEXT,
                content_path TEXT,
                summary TEXT,
                tokens_estimate INTEGER DEFAULT 0,
                chunk_ids TEXT,
                checkpoint_id TEXT,
                directory TEXT,
                parent_id TEXT,
                tags TEXT,
                status TEXT DEFAULT 'active',
                context TEXT
            )
        """)

        cursor.execute("""
            CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(type)
        """)
        cursor.execute("""
            CREATE INDEX IF NOT EXISTS idx_memory_checkpoint ON memory_entries(checkpoint_id)
        """)
        cursor.execute("""
            CREATE INDEX IF NOT EXISTS idx_memory_directory ON memory_entries(directory)
        """)
        cursor.execute("""
            CREATE INDEX IF NOT EXISTS idx_memory_created ON memory_entries(created_at)
        """)
        cursor.execute("""
            CREATE INDEX IF NOT EXISTS idx_memory_status ON memory_entries(status)
        """)

        conn.commit()
        conn.close()

    def _get_redis(self):
        """Get optional Redis connection for hot entries."""
        try:
            import redis
            with open("/opt/vault/init-keys.json") as f:
                token = json.load(f)["root_token"]

            import subprocess
            result = subprocess.run([
                "curl", "-sk",
                "-H", f"X-Vault-Token: {token}",
                "https://127.0.0.1:8200/v1/secret/data/services/dragonfly"
            ], capture_output=True, text=True)

            creds = json.loads(result.stdout)["data"]["data"]
            return redis.Redis(
                host=creds["host"],
                port=int(creds["port"]),
                password=creds["password"],
                decode_responses=True
            )
        except:
            return None

    def _now(self) -> str:
        return datetime.now(timezone.utc).isoformat()

    def _generate_id(self, prefix: str = "mem") -> str:
        timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
        suffix = hashlib.sha256(f"{timestamp}-{os.getpid()}-{os.urandom(4).hex()}".encode()).hexdigest()[:8]
        return f"{prefix}-{timestamp}-{suffix}"

    def _estimate_tokens(self, text: str) -> int:
        """Estimate token count from text."""
        return len(text) // CHARS_PER_TOKEN

    def _generate_summary(self, content: str, max_tokens: int = 200) -> str:
        """
        Generate a summary of content.
        For now, uses extractive summarization (first/last lines + key info).
        Could be enhanced with LLM summarization.
        """
        lines = content.strip().split('\n')
        tokens_used = 0
        summary_lines = []

        # Get first few lines
        for line in lines[:5]:
            line_tokens = self._estimate_tokens(line)
            if tokens_used + line_tokens < max_tokens // 2:
                summary_lines.append(line)
                tokens_used += line_tokens
            else:
                break

        # Add ellipsis if there's more
        if len(lines) > 10:
            summary_lines.append(f"... ({len(lines) - 10} more lines) ...")

        # Get last few lines
        for line in lines[-3:]:
            line_tokens = self._estimate_tokens(line)
            if tokens_used + line_tokens < max_tokens:
                summary_lines.append(line)
                tokens_used += line_tokens

        return '\n'.join(summary_lines)

    # -------------------------------------------------------------------------
    # Core Operations
    # -------------------------------------------------------------------------

    def store(
        self,
        content: str,
        type: MemoryType = MemoryType.OUTPUT,
        tags: List[str] = None,
        checkpoint_id: str = None,
        directory: str = None,
        context: Dict[str, Any] = None,
        auto_chunk: bool = True,
        auto_summarize: bool = True
    ) -> MemoryEntry:
        """
        Store content in memory.

        Automatically chunks large content and generates summaries.
        Returns the memory entry (or parent entry if chunked).
        """
        tokens = self._estimate_tokens(content)
        entry_id = self._generate_id()

        # Decide storage strategy
        if tokens <= MAX_INLINE_SIZE:
            # Small content - store inline
            entry = MemoryEntry(
                id=entry_id,
                type=type,
                created_at=self._now(),
                content=content,
                tokens_estimate=tokens,
                checkpoint_id=checkpoint_id,
                directory=directory,
                tags=tags or [],
                context=context or {}
            )
        elif auto_chunk and tokens > DEFAULT_CHUNK_SIZE:
            # Large content - chunk it
            entry = self._store_chunked(
                content=content,
                entry_id=entry_id,
                type=type,
                tags=tags,
                checkpoint_id=checkpoint_id,
                directory=directory,
                context=context
            )
        else:
            # Medium content - store to file
            content_path = self._write_chunk_file(entry_id, content)
            summary = self._generate_summary(content) if auto_summarize else None

            entry = MemoryEntry(
                id=entry_id,
                type=type,
                created_at=self._now(),
                content_path=str(content_path),
                summary=summary,
                tokens_estimate=tokens,
                checkpoint_id=checkpoint_id,
                directory=directory,
                tags=tags or [],
                context=context or {}
            )

        # Save to database
        self._save_entry(entry)

        # Cache in Redis if available
        if self.redis:
            self.redis.setex(
                f"memory:{entry.id}",
                3600,  # 1 hour TTL
                json.dumps(entry.to_dict())
            )

        return entry

    def _store_chunked(
        self,
        content: str,
        entry_id: str,
        type: MemoryType,
        tags: List[str],
        checkpoint_id: str,
        directory: str,
        context: Dict[str, Any]
    ) -> MemoryEntry:
        """Store large content as multiple chunks."""
        chunk_size_chars = DEFAULT_CHUNK_SIZE * CHARS_PER_TOKEN
        chunks = []
        chunk_ids = []

        # Split into chunks
        for i in range(0, len(content), chunk_size_chars):
            chunk_content = content[i:i + chunk_size_chars]
            chunk_id = f"{entry_id}-chunk-{len(chunks):03d}"

            # Write chunk to file
            chunk_path = self._write_chunk_file(chunk_id, chunk_content)

            chunk_entry = MemoryEntry(
                id=chunk_id,
                type=MemoryType.CHUNK,
                created_at=self._now(),
                content_path=str(chunk_path),
                tokens_estimate=self._estimate_tokens(chunk_content),
                parent_id=entry_id,
                tags=tags or [],
                context={"chunk_index": len(chunks), "total_chunks": -1}
            )
            chunks.append(chunk_entry)
            chunk_ids.append(chunk_id)

        # Update total_chunks in context
        for chunk in chunks:
            chunk.context["total_chunks"] = len(chunks)
            self._save_entry(chunk)

        # Generate summary of full content
        summary = self._generate_summary(content)

        # Create parent entry
        parent_entry = MemoryEntry(
            id=entry_id,
            type=type,
            created_at=self._now(),
            summary=summary,
            tokens_estimate=self._estimate_tokens(content),
            chunk_ids=chunk_ids,
            checkpoint_id=checkpoint_id,
            directory=directory,
            tags=tags or [],
            context=context or {}
        )

        return parent_entry

    def _write_chunk_file(self, chunk_id: str, content: str, compress: bool = True) -> Path:
        """Write content to a chunk file."""
        if compress:
            filepath = self.chunks_dir / f"{chunk_id}.gz"
            with gzip.open(filepath, 'wt', encoding='utf-8') as f:
                f.write(content)
        else:
            filepath = self.chunks_dir / f"{chunk_id}.txt"
            filepath.write_text(content)
        return filepath

    def _read_chunk_file(self, path: str) -> str:
        """Read content from a chunk file."""
        filepath = Path(path)
        if filepath.suffix == '.gz':
            with gzip.open(filepath, 'rt', encoding='utf-8') as f:
                return f.read()
        else:
            return filepath.read_text()

    def _save_entry(self, entry: MemoryEntry):
        """Save entry to database."""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute("""
            INSERT OR REPLACE INTO memory_entries
            (id, type, created_at, content, content_path, summary, tokens_estimate,
             chunk_ids, checkpoint_id, directory, parent_id, tags, status, context)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            entry.id,
            entry.type.value if isinstance(entry.type, MemoryType) else entry.type,
            entry.created_at,
            entry.content,
            entry.content_path,
            entry.summary,
            entry.tokens_estimate,
            json.dumps(entry.chunk_ids),
            entry.checkpoint_id,
            entry.directory,
            entry.parent_id,
            json.dumps(entry.tags),
            entry.status.value if isinstance(entry.status, MemoryStatus) else entry.status,
            json.dumps(entry.context)
        ))

        conn.commit()
        conn.close()

    def fetch(self, entry_id: str, include_content: bool = True) -> Optional[MemoryEntry]:
        """
        Fetch a memory entry by ID.

        If include_content is True and content is in a file, reads the file.
        """
        # Try Redis cache first
        if self.redis:
            cached = self.redis.get(f"memory:{entry_id}")
            if cached:
                entry = MemoryEntry.from_dict(json.loads(cached))
                if include_content and entry.content_path and not entry.content:
                    entry.content = self._read_chunk_file(entry.content_path)
                return entry

        # Fall back to database
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        cursor.execute("SELECT * FROM memory_entries WHERE id = ?", (entry_id,))
        row = cursor.fetchone()
        conn.close()

        if not row:
            return None

        entry = self._row_to_entry(row)

        if include_content and entry.content_path and not entry.content:
            try:
                entry.content = self._read_chunk_file(entry.content_path)
            except FileNotFoundError:
                pass

        return entry

    def _row_to_entry(self, row: sqlite3.Row) -> MemoryEntry:
        """Convert database row to MemoryEntry."""
        return MemoryEntry(
            id=row['id'],
            type=MemoryType(row['type']),
            created_at=row['created_at'],
            content=row['content'],
            content_path=row['content_path'],
            summary=row['summary'],
            tokens_estimate=row['tokens_estimate'],
            chunk_ids=json.loads(row['chunk_ids']) if row['chunk_ids'] else [],
            checkpoint_id=row['checkpoint_id'],
            directory=row['directory'],
            parent_id=row['parent_id'],
            tags=json.loads(row['tags']) if row['tags'] else [],
            status=MemoryStatus(row['status']),
            context=json.loads(row['context']) if row['context'] else {}
        )

    def fetch_chunks(self, parent_id: str) -> List[MemoryEntry]:
        """Fetch all chunks for a parent entry."""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        cursor.execute(
            "SELECT * FROM memory_entries WHERE parent_id = ? ORDER BY id",
            (parent_id,)
        )
        rows = cursor.fetchall()
        conn.close()

        return [self._row_to_entry(row) for row in rows]

    def list_entries(
        self,
        type: MemoryType = None,
        directory: str = None,
        checkpoint_id: str = None,
        status: MemoryStatus = MemoryStatus.ACTIVE,
        limit: int = 50,
        include_chunks: bool = False
    ) -> List[MemoryEntry]:
        """List memory entries with optional filters."""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        query = "SELECT * FROM memory_entries WHERE 1=1"
        params = []

        if type:
            query += " AND type = ?"
            params.append(type.value)
        elif not include_chunks:
            query += " AND type != ?"
            params.append(MemoryType.CHUNK.value)

        if directory:
            query += " AND directory = ?"
            params.append(directory)

        if checkpoint_id:
            query += " AND checkpoint_id = ?"
            params.append(checkpoint_id)

        if status:
            query += " AND status = ?"
            params.append(status.value)

        query += " ORDER BY created_at DESC LIMIT ?"
        params.append(limit)

        cursor.execute(query, params)
        rows = cursor.fetchall()
        conn.close()

        return [self._row_to_entry(row) for row in rows]

    def get_reference(self, entry_id: str) -> Optional[MemoryReference]:
        """Get a lightweight reference to an entry."""
        entry = self.fetch(entry_id, include_content=False)
        if not entry:
            return None

        return MemoryReference(
            id=entry.id,
            type=entry.type.value,
            summary=entry.summary or "(no summary)",
            tokens=entry.tokens_estimate,
            created_at=entry.created_at
        )

    def get_references_for_checkpoint(self, checkpoint_id: str) -> List[MemoryReference]:
        """Get all memory references linked to a checkpoint."""
        entries = self.list_entries(checkpoint_id=checkpoint_id)
        return [
            MemoryReference(
                id=e.id,
                type=e.type.value,
                summary=e.summary or "(no summary)",
                tokens=e.tokens_estimate,
                created_at=e.created_at
            )
            for e in entries
        ]

    def search(self, query: str, limit: int = 20) -> List[MemoryEntry]:
        """Search memory entries by content or summary."""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        # Search in summary and tags
        cursor.execute("""
            SELECT * FROM memory_entries
            WHERE (summary LIKE ? OR tags LIKE ? OR content LIKE ?)
            AND type != ?
            AND status = ?
            ORDER BY created_at DESC
            LIMIT ?
        """, (f"%{query}%", f"%{query}%", f"%{query}%",
              MemoryType.CHUNK.value, MemoryStatus.ACTIVE.value, limit))

        rows = cursor.fetchall()
        conn.close()

        return [self._row_to_entry(row) for row in rows]

    def summarize(self, entry_id: str, force: bool = False) -> str:
        """
        Generate or retrieve summary for an entry.
        """
        entry = self.fetch(entry_id, include_content=True)
        if not entry:
            return f"Entry not found: {entry_id}"

        if entry.summary and not force:
            return entry.summary

        # Generate summary from content
        content = entry.content
        if not content and entry.chunk_ids:
            # Reconstruct from chunks for summary
            chunks = self.fetch_chunks(entry_id)
            content = ""
            for chunk in chunks:
                chunk_content = self.fetch(chunk.id, include_content=True)
                if chunk_content and chunk_content.content:
                    content += chunk_content.content

        if not content:
            return "(no content to summarize)"

        summary = self._generate_summary(content)

        # Update entry with summary
        entry.summary = summary
        self._save_entry(entry)

        return summary

    def archive(self, entry_id: str):
        """Archive an entry (mark as archived but keep data)."""
        entry = self.fetch(entry_id, include_content=False)
        if entry:
            entry.status = MemoryStatus.ARCHIVED
            self._save_entry(entry)

    def prune(self, keep_days: int = MAX_CHUNK_AGE_DAYS, keep_entries: int = MAX_MEMORY_ENTRIES):
        """Prune old entries to manage storage."""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        # Get count
        cursor.execute("SELECT COUNT(*) FROM memory_entries WHERE status = ?",
                      (MemoryStatus.ACTIVE.value,))
        count = cursor.fetchone()[0]

        deleted = 0

        # Delete oldest entries if over limit
        if count > keep_entries:
            cursor.execute("""
                SELECT id, content_path FROM memory_entries
                WHERE status = ?
                ORDER BY created_at ASC
                LIMIT ?
            """, (MemoryStatus.ACTIVE.value, count - keep_entries))

            for row in cursor.fetchall():
                entry_id, content_path = row
                # Delete file if exists
                if content_path:
                    try:
                        Path(content_path).unlink()
                    except:
                        pass
                cursor.execute("DELETE FROM memory_entries WHERE id = ?", (entry_id,))
                deleted += 1

        conn.commit()
        conn.close()

        return deleted


# =============================================================================
# CLI Interface
# =============================================================================

def cli():
    import argparse

    parser = argparse.ArgumentParser(
        description="External Memory Layer",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  memory log "Large output content here"
  memory log --file output.txt --tag "test-results"
  memory fetch mem-20260123-123456-abcd1234
  memory fetch mem-20260123-123456-abcd1234 --summary-only
  memory list --type output --limit 10
  memory search "error"
  memory summarize mem-20260123-123456-abcd1234
  memory prune --keep-days 7
        """
    )

    subparsers = parser.add_subparsers(dest="command", required=True)

    # log
    log_parser = subparsers.add_parser("log", help="Store content in memory")
    log_parser.add_argument("content", nargs="?", help="Content to store (or use --file)")
    log_parser.add_argument("--file", "-f", help="Read content from file")
    log_parser.add_argument("--stdin", action="store_true", help="Read from stdin")
    log_parser.add_argument("--type", "-t", choices=["transcript", "output", "context"],
                           default="output", help="Entry type")
    log_parser.add_argument("--tag", action="append", help="Add tag (can repeat)")
    log_parser.add_argument("--checkpoint", help="Link to checkpoint ID")
    log_parser.add_argument("--directory", "-d", help="Link to directory")
    log_parser.add_argument("--no-chunk", action="store_true", help="Don't auto-chunk")
    log_parser.add_argument("--json", action="store_true", help="Output JSON")

    # fetch
    fetch_parser = subparsers.add_parser("fetch", help="Retrieve memory entry")
    fetch_parser.add_argument("entry_id", help="Memory entry ID")
    fetch_parser.add_argument("--summary-only", "-s", action="store_true",
                             help="Only show summary")
    fetch_parser.add_argument("--chunk", "-c", type=int, help="Fetch specific chunk index")
    fetch_parser.add_argument("--json", action="store_true", help="Output JSON")

    # list
    list_parser = subparsers.add_parser("list", help="List memory entries")
    list_parser.add_argument("--type", "-t", choices=["transcript", "output", "summary", "context"])
    list_parser.add_argument("--directory", "-d", help="Filter by directory")
    list_parser.add_argument("--checkpoint", help="Filter by checkpoint ID")
    list_parser.add_argument("--limit", "-n", type=int, default=20)
    list_parser.add_argument("--json", action="store_true", help="Output JSON")

    # search
    search_parser = subparsers.add_parser("search", help="Search memory entries")
    search_parser.add_argument("query", help="Search query")
    search_parser.add_argument("--limit", "-n", type=int, default=10)
    search_parser.add_argument("--json", action="store_true", help="Output JSON")

    # summarize
    summarize_parser = subparsers.add_parser("summarize", help="Generate/show summary")
    summarize_parser.add_argument("entry_id", help="Memory entry ID")
    summarize_parser.add_argument("--force", action="store_true", help="Regenerate summary")

    # refs
    refs_parser = subparsers.add_parser("refs", help="Get memory references")
    refs_parser.add_argument("--checkpoint", help="Get refs for checkpoint")
    refs_parser.add_argument("--directory", "-d", help="Get refs for directory")
    refs_parser.add_argument("--json", action="store_true", help="Output JSON")

    # prune
    prune_parser = subparsers.add_parser("prune", help="Prune old entries")
    prune_parser.add_argument("--keep-days", type=int, default=MAX_CHUNK_AGE_DAYS)
    prune_parser.add_argument("--keep-entries", type=int, default=MAX_MEMORY_ENTRIES)

    # stats
    subparsers.add_parser("stats", help="Show memory statistics")

    args = parser.parse_args()
    manager = MemoryManager()

    # -------------------------------------------------------------------------

    if args.command == "log":
        # Get content
        if args.stdin:
            content = sys.stdin.read()
        elif args.file:
            content = Path(args.file).read_text()
        elif args.content:
            content = args.content
        else:
            print("Error: Provide content, --file, or --stdin")
            sys.exit(1)

        entry = manager.store(
            content=content,
            type=MemoryType(args.type),
            tags=args.tag or [],
            checkpoint_id=args.checkpoint,
            directory=args.directory,
            auto_chunk=not args.no_chunk
        )

        if args.json:
            print(json.dumps(entry.to_dict(), indent=2))
        else:
            print(f"\n{'='*60}")
            print("MEMORY STORED")
            print(f"{'='*60}")
            print(f"ID: {entry.id}")
            print(f"Type: {entry.type.value}")
            print(f"Tokens: {entry.tokens_estimate}")
            if entry.chunk_ids:
                print(f"Chunks: {len(entry.chunk_ids)}")
            if entry.summary:
                print(f"\nSummary:\n{entry.summary[:200]}...")
            print(f"{'='*60}")

    elif args.command == "fetch":
        if args.chunk is not None:
            # Fetch specific chunk
            chunk_id = f"{args.entry_id}-chunk-{args.chunk:03d}"
            entry = manager.fetch(chunk_id, include_content=True)
        else:
            entry = manager.fetch(args.entry_id, include_content=not args.summary_only)

        if not entry:
            print(f"Entry not found: {args.entry_id}")
            sys.exit(1)

        if args.json:
            print(json.dumps(entry.to_dict(), indent=2))
        elif args.summary_only:
            print(entry.summary or "(no summary)")
        else:
            print(f"\n{'='*60}")
            print(f"MEMORY: {entry.id}")
            print(f"{'='*60}")
            print(f"Type: {entry.type.value}")
            print(f"Created: {entry.created_at}")
            print(f"Tokens: {entry.tokens_estimate}")

            if entry.chunk_ids:
                print(f"Chunks: {len(entry.chunk_ids)}")
                print(f"  IDs: {', '.join(entry.chunk_ids[:3])}...")

            if entry.summary:
                print(f"\nSummary:\n{entry.summary}")

            if entry.content:
                print(f"\nContent:\n{'-'*40}")
                print(entry.content)
            print(f"{'='*60}")

    elif args.command == "list":
        entry_type = MemoryType(args.type) if args.type else None
        entries = manager.list_entries(
            type=entry_type,
            directory=args.directory,
            checkpoint_id=args.checkpoint,
            limit=args.limit
        )

        if args.json:
            print(json.dumps([e.to_dict() for e in entries], indent=2))
        else:
            print(f"\n{'='*60}")
            print(f"MEMORY ENTRIES ({len(entries)})")
            print(f"{'='*60}")

            for entry in entries:
                chunks_str = f" [{len(entry.chunk_ids)} chunks]" if entry.chunk_ids else ""
                summary = (entry.summary or "")[:50]
                if len(entry.summary or "") > 50:
                    summary += "..."
                print(f"\n  {entry.id}")
                print(f"    Type: {entry.type.value}{chunks_str}")
                print(f"    Tokens: {entry.tokens_estimate}")
                if summary:
                    print(f"    Summary: {summary}")

            print(f"\n{'='*60}")

    elif args.command == "search":
        entries = manager.search(args.query, limit=args.limit)

        if args.json:
            print(json.dumps([e.to_dict() for e in entries], indent=2))
        else:
            print(f"\n{'='*60}")
            print(f"SEARCH RESULTS: '{args.query}' ({len(entries)} found)")
            print(f"{'='*60}")

            for entry in entries:
                summary = (entry.summary or "")[:60]
                print(f"\n  {entry.id}")
                print(f"    Type: {entry.type.value}, Tokens: {entry.tokens_estimate}")
                if summary:
                    print(f"    {summary}")

            print(f"\n{'='*60}")

    elif args.command == "summarize":
        summary = manager.summarize(args.entry_id, force=args.force)
        print(summary)

    elif args.command == "refs":
        refs = []
        if args.checkpoint:
            refs = manager.get_references_for_checkpoint(args.checkpoint)
        elif args.directory:
            entries = manager.list_entries(directory=args.directory)
            refs = [
                MemoryReference(
                    id=e.id, type=e.type.value,
                    summary=e.summary or "", tokens=e.tokens_estimate,
                    created_at=e.created_at
                )
                for e in entries
            ]

        if args.json:
            print(json.dumps([asdict(r) for r in refs], indent=2))
        else:
            print(f"\n{'='*60}")
            print(f"MEMORY REFERENCES ({len(refs)})")
            print(f"{'='*60}")

            for ref in refs:
                print(f"\n  {ref.to_inline()}")

            print(f"\n{'='*60}")

    elif args.command == "prune":
        deleted = manager.prune(
            keep_days=args.keep_days,
            keep_entries=args.keep_entries
        )
        print(f"Pruned {deleted} entries")

    elif args.command == "stats":
        conn = sqlite3.connect(MEMORY_DB)
        cursor = conn.cursor()

        cursor.execute("SELECT COUNT(*) FROM memory_entries WHERE status = 'active'")
        total = cursor.fetchone()[0]

        cursor.execute("""
            SELECT type, COUNT(*), SUM(tokens_estimate)
            FROM memory_entries WHERE status = 'active'
            GROUP BY type
        """)
        by_type = cursor.fetchall()

        cursor.execute("SELECT SUM(tokens_estimate) FROM memory_entries WHERE status = 'active'")
        total_tokens = cursor.fetchone()[0] or 0

        conn.close()

        # Get storage size
        chunks_size = sum(f.stat().st_size for f in CHUNKS_DIR.glob("*") if f.is_file())

        print(f"\n{'='*60}")
        print("MEMORY STATISTICS")
        print(f"{'='*60}")
        print(f"Total Entries: {total}")
        print(f"Total Tokens: {total_tokens:,}")
        print(f"Storage Size: {chunks_size / 1024:.1f} KB")
        print(f"\nBy Type:")
        for type_name, count, tokens in by_type:
            print(f"  {type_name}: {count} entries, {tokens or 0:,} tokens")
        print(f"{'='*60}")


if __name__ == "__main__":
    cli()