#!/usr/bin/env python3 """ External Memory Layer ===================== Persistent context memory system that integrates with checkpoint + STATUS. Provides token-efficient storage and retrieval of large outputs, transcripts, and summaries. Features: - Chunked output storage with unique IDs - Automatic summarization of large content - Links between checkpoints, STATUS files, and memory entries - CLI for storing, fetching, and summarizing content Architecture: - SQLite database for metadata and indexes - Filesystem for large content chunks - Redis for hot/recent entries (optional) """ import os import sys import json import hashlib import sqlite3 import gzip from dataclasses import dataclass, field, asdict from datetime import datetime, timezone from pathlib import Path from typing import Optional, List, Dict, Any, Tuple from enum import Enum import textwrap # ============================================================================= # Configuration # ============================================================================= MEMORY_DIR = Path("/opt/agent-governance/memory") MEMORY_DB = MEMORY_DIR / "memory.db" CHUNKS_DIR = MEMORY_DIR / "chunks" SUMMARIES_DIR = MEMORY_DIR / "summaries" # Thresholds DEFAULT_CHUNK_SIZE = 4000 # tokens (~16000 chars) SUMMARY_THRESHOLD = 1000 # tokens - content over this gets summarized MAX_INLINE_SIZE = 500 # tokens - content under this stays inline CHARS_PER_TOKEN = 4 # rough estimate # Retention MAX_MEMORY_ENTRIES = 1000 MAX_CHUNK_AGE_DAYS = 30 class MemoryType(str, Enum): """Types of memory entries.""" TRANSCRIPT = "transcript" # Full conversation logs OUTPUT = "output" # Command/tool outputs SUMMARY = "summary" # Generated summaries CHUNK = "chunk" # Large output chunk REFERENCE = "reference" # Pointer to external content CONTEXT = "context" # Saved context state class MemoryStatus(str, Enum): """Status of memory entries.""" ACTIVE = "active" ARCHIVED = "archived" EXPIRED = "expired" # ============================================================================= # Data Classes # ============================================================================= @dataclass class MemoryEntry: """A single memory entry.""" id: str type: MemoryType created_at: str # Content (either inline or chunked) content: Optional[str] = None content_path: Optional[str] = None # Path to file if chunked # Metadata summary: Optional[str] = None tokens_estimate: int = 0 chunk_ids: List[str] = field(default_factory=list) # For multi-chunk entries # Links checkpoint_id: Optional[str] = None directory: Optional[str] = None parent_id: Optional[str] = None # For chunks pointing to parent entry # Tags and status tags: List[str] = field(default_factory=list) status: MemoryStatus = MemoryStatus.ACTIVE # Context context: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict: return { "id": self.id, "type": self.type.value if isinstance(self.type, MemoryType) else self.type, "created_at": self.created_at, "content": self.content, "content_path": self.content_path, "summary": self.summary, "tokens_estimate": self.tokens_estimate, "chunk_ids": self.chunk_ids, "checkpoint_id": self.checkpoint_id, "directory": self.directory, "parent_id": self.parent_id, "tags": self.tags, "status": self.status.value if isinstance(self.status, MemoryStatus) else self.status, "context": self.context } @classmethod def from_dict(cls, data: dict) -> 'MemoryEntry': return cls( id=data["id"], type=MemoryType(data["type"]) if data.get("type") else MemoryType.OUTPUT, created_at=data["created_at"], content=data.get("content"), content_path=data.get("content_path"), summary=data.get("summary"), tokens_estimate=data.get("tokens_estimate", 0), chunk_ids=data.get("chunk_ids", []), checkpoint_id=data.get("checkpoint_id"), directory=data.get("directory"), parent_id=data.get("parent_id"), tags=data.get("tags", []), status=MemoryStatus(data["status"]) if data.get("status") else MemoryStatus.ACTIVE, context=data.get("context", {}) ) @dataclass class MemoryReference: """A lightweight reference to a memory entry (for embedding in prompts).""" id: str type: str summary: str tokens: int created_at: str def to_inline(self) -> str: """Format for inline prompt inclusion.""" return f"[Memory:{self.id}] {self.summary} ({self.tokens} tokens)" # ============================================================================= # Memory Manager # ============================================================================= class MemoryManager: """ Manages the external memory layer. """ def __init__(self, db_path: Path = MEMORY_DB): self.db_path = db_path self.chunks_dir = CHUNKS_DIR self.summaries_dir = SUMMARIES_DIR # Ensure directories exist self.db_path.parent.mkdir(parents=True, exist_ok=True) self.chunks_dir.mkdir(parents=True, exist_ok=True) self.summaries_dir.mkdir(parents=True, exist_ok=True) # Initialize database self._init_db() # Optional Redis connection self.redis = self._get_redis() def _init_db(self): """Initialize SQLite database schema.""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS memory_entries ( id TEXT PRIMARY KEY, type TEXT NOT NULL, created_at TEXT NOT NULL, content TEXT, content_path TEXT, summary TEXT, tokens_estimate INTEGER DEFAULT 0, chunk_ids TEXT, checkpoint_id TEXT, directory TEXT, parent_id TEXT, tags TEXT, status TEXT DEFAULT 'active', context TEXT ) """) cursor.execute(""" CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(type) """) cursor.execute(""" CREATE INDEX IF NOT EXISTS idx_memory_checkpoint ON memory_entries(checkpoint_id) """) cursor.execute(""" CREATE INDEX IF NOT EXISTS idx_memory_directory ON memory_entries(directory) """) cursor.execute(""" CREATE INDEX IF NOT EXISTS idx_memory_created ON memory_entries(created_at) """) cursor.execute(""" CREATE INDEX IF NOT EXISTS idx_memory_status ON memory_entries(status) """) conn.commit() conn.close() def _get_redis(self): """Get optional Redis connection for hot entries.""" try: import redis with open("/opt/vault/init-keys.json") as f: token = json.load(f)["root_token"] import subprocess result = subprocess.run([ "curl", "-sk", "-H", f"X-Vault-Token: {token}", "https://127.0.0.1:8200/v1/secret/data/services/dragonfly" ], capture_output=True, text=True) creds = json.loads(result.stdout)["data"]["data"] return redis.Redis( host=creds["host"], port=int(creds["port"]), password=creds["password"], decode_responses=True ) except: return None def _now(self) -> str: return datetime.now(timezone.utc).isoformat() def _generate_id(self, prefix: str = "mem") -> str: timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") suffix = hashlib.sha256(f"{timestamp}-{os.getpid()}-{os.urandom(4).hex()}".encode()).hexdigest()[:8] return f"{prefix}-{timestamp}-{suffix}" def _estimate_tokens(self, text: str) -> int: """Estimate token count from text.""" return len(text) // CHARS_PER_TOKEN def _generate_summary(self, content: str, max_tokens: int = 200) -> str: """ Generate a summary of content. For now, uses extractive summarization (first/last lines + key info). Could be enhanced with LLM summarization. """ lines = content.strip().split('\n') tokens_used = 0 summary_lines = [] # Get first few lines for line in lines[:5]: line_tokens = self._estimate_tokens(line) if tokens_used + line_tokens < max_tokens // 2: summary_lines.append(line) tokens_used += line_tokens else: break # Add ellipsis if there's more if len(lines) > 10: summary_lines.append(f"... ({len(lines) - 10} more lines) ...") # Get last few lines for line in lines[-3:]: line_tokens = self._estimate_tokens(line) if tokens_used + line_tokens < max_tokens: summary_lines.append(line) tokens_used += line_tokens return '\n'.join(summary_lines) # ------------------------------------------------------------------------- # Core Operations # ------------------------------------------------------------------------- def store( self, content: str, type: MemoryType = MemoryType.OUTPUT, tags: List[str] = None, checkpoint_id: str = None, directory: str = None, context: Dict[str, Any] = None, auto_chunk: bool = True, auto_summarize: bool = True ) -> MemoryEntry: """ Store content in memory. Automatically chunks large content and generates summaries. Returns the memory entry (or parent entry if chunked). """ tokens = self._estimate_tokens(content) entry_id = self._generate_id() # Decide storage strategy if tokens <= MAX_INLINE_SIZE: # Small content - store inline entry = MemoryEntry( id=entry_id, type=type, created_at=self._now(), content=content, tokens_estimate=tokens, checkpoint_id=checkpoint_id, directory=directory, tags=tags or [], context=context or {} ) elif auto_chunk and tokens > DEFAULT_CHUNK_SIZE: # Large content - chunk it entry = self._store_chunked( content=content, entry_id=entry_id, type=type, tags=tags, checkpoint_id=checkpoint_id, directory=directory, context=context ) else: # Medium content - store to file content_path = self._write_chunk_file(entry_id, content) summary = self._generate_summary(content) if auto_summarize else None entry = MemoryEntry( id=entry_id, type=type, created_at=self._now(), content_path=str(content_path), summary=summary, tokens_estimate=tokens, checkpoint_id=checkpoint_id, directory=directory, tags=tags or [], context=context or {} ) # Save to database self._save_entry(entry) # Cache in Redis if available if self.redis: self.redis.setex( f"memory:{entry.id}", 3600, # 1 hour TTL json.dumps(entry.to_dict()) ) return entry def _store_chunked( self, content: str, entry_id: str, type: MemoryType, tags: List[str], checkpoint_id: str, directory: str, context: Dict[str, Any] ) -> MemoryEntry: """Store large content as multiple chunks.""" chunk_size_chars = DEFAULT_CHUNK_SIZE * CHARS_PER_TOKEN chunks = [] chunk_ids = [] # Split into chunks for i in range(0, len(content), chunk_size_chars): chunk_content = content[i:i + chunk_size_chars] chunk_id = f"{entry_id}-chunk-{len(chunks):03d}" # Write chunk to file chunk_path = self._write_chunk_file(chunk_id, chunk_content) chunk_entry = MemoryEntry( id=chunk_id, type=MemoryType.CHUNK, created_at=self._now(), content_path=str(chunk_path), tokens_estimate=self._estimate_tokens(chunk_content), parent_id=entry_id, tags=tags or [], context={"chunk_index": len(chunks), "total_chunks": -1} ) chunks.append(chunk_entry) chunk_ids.append(chunk_id) # Update total_chunks in context for chunk in chunks: chunk.context["total_chunks"] = len(chunks) self._save_entry(chunk) # Generate summary of full content summary = self._generate_summary(content) # Create parent entry parent_entry = MemoryEntry( id=entry_id, type=type, created_at=self._now(), summary=summary, tokens_estimate=self._estimate_tokens(content), chunk_ids=chunk_ids, checkpoint_id=checkpoint_id, directory=directory, tags=tags or [], context=context or {} ) return parent_entry def _write_chunk_file(self, chunk_id: str, content: str, compress: bool = True) -> Path: """Write content to a chunk file.""" if compress: filepath = self.chunks_dir / f"{chunk_id}.gz" with gzip.open(filepath, 'wt', encoding='utf-8') as f: f.write(content) else: filepath = self.chunks_dir / f"{chunk_id}.txt" filepath.write_text(content) return filepath def _read_chunk_file(self, path: str) -> str: """Read content from a chunk file.""" filepath = Path(path) if filepath.suffix == '.gz': with gzip.open(filepath, 'rt', encoding='utf-8') as f: return f.read() else: return filepath.read_text() def _save_entry(self, entry: MemoryEntry): """Save entry to database.""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() cursor.execute(""" INSERT OR REPLACE INTO memory_entries (id, type, created_at, content, content_path, summary, tokens_estimate, chunk_ids, checkpoint_id, directory, parent_id, tags, status, context) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( entry.id, entry.type.value if isinstance(entry.type, MemoryType) else entry.type, entry.created_at, entry.content, entry.content_path, entry.summary, entry.tokens_estimate, json.dumps(entry.chunk_ids), entry.checkpoint_id, entry.directory, entry.parent_id, json.dumps(entry.tags), entry.status.value if isinstance(entry.status, MemoryStatus) else entry.status, json.dumps(entry.context) )) conn.commit() conn.close() def fetch(self, entry_id: str, include_content: bool = True) -> Optional[MemoryEntry]: """ Fetch a memory entry by ID. If include_content is True and content is in a file, reads the file. """ # Try Redis cache first if self.redis: cached = self.redis.get(f"memory:{entry_id}") if cached: entry = MemoryEntry.from_dict(json.loads(cached)) if include_content and entry.content_path and not entry.content: entry.content = self._read_chunk_file(entry.content_path) return entry # Fall back to database conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute("SELECT * FROM memory_entries WHERE id = ?", (entry_id,)) row = cursor.fetchone() conn.close() if not row: return None entry = self._row_to_entry(row) if include_content and entry.content_path and not entry.content: try: entry.content = self._read_chunk_file(entry.content_path) except FileNotFoundError: pass return entry def _row_to_entry(self, row: sqlite3.Row) -> MemoryEntry: """Convert database row to MemoryEntry.""" return MemoryEntry( id=row['id'], type=MemoryType(row['type']), created_at=row['created_at'], content=row['content'], content_path=row['content_path'], summary=row['summary'], tokens_estimate=row['tokens_estimate'], chunk_ids=json.loads(row['chunk_ids']) if row['chunk_ids'] else [], checkpoint_id=row['checkpoint_id'], directory=row['directory'], parent_id=row['parent_id'], tags=json.loads(row['tags']) if row['tags'] else [], status=MemoryStatus(row['status']), context=json.loads(row['context']) if row['context'] else {} ) def fetch_chunks(self, parent_id: str) -> List[MemoryEntry]: """Fetch all chunks for a parent entry.""" conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute( "SELECT * FROM memory_entries WHERE parent_id = ? ORDER BY id", (parent_id,) ) rows = cursor.fetchall() conn.close() return [self._row_to_entry(row) for row in rows] def list_entries( self, type: MemoryType = None, directory: str = None, checkpoint_id: str = None, status: MemoryStatus = MemoryStatus.ACTIVE, limit: int = 50, include_chunks: bool = False ) -> List[MemoryEntry]: """List memory entries with optional filters.""" conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row cursor = conn.cursor() query = "SELECT * FROM memory_entries WHERE 1=1" params = [] if type: query += " AND type = ?" params.append(type.value) elif not include_chunks: query += " AND type != ?" params.append(MemoryType.CHUNK.value) if directory: query += " AND directory = ?" params.append(directory) if checkpoint_id: query += " AND checkpoint_id = ?" params.append(checkpoint_id) if status: query += " AND status = ?" params.append(status.value) query += " ORDER BY created_at DESC LIMIT ?" params.append(limit) cursor.execute(query, params) rows = cursor.fetchall() conn.close() return [self._row_to_entry(row) for row in rows] def get_reference(self, entry_id: str) -> Optional[MemoryReference]: """Get a lightweight reference to an entry.""" entry = self.fetch(entry_id, include_content=False) if not entry: return None return MemoryReference( id=entry.id, type=entry.type.value, summary=entry.summary or "(no summary)", tokens=entry.tokens_estimate, created_at=entry.created_at ) def get_references_for_checkpoint(self, checkpoint_id: str) -> List[MemoryReference]: """Get all memory references linked to a checkpoint.""" entries = self.list_entries(checkpoint_id=checkpoint_id) return [ MemoryReference( id=e.id, type=e.type.value, summary=e.summary or "(no summary)", tokens=e.tokens_estimate, created_at=e.created_at ) for e in entries ] def search(self, query: str, limit: int = 20) -> List[MemoryEntry]: """Search memory entries by content or summary.""" conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row cursor = conn.cursor() # Search in summary and tags cursor.execute(""" SELECT * FROM memory_entries WHERE (summary LIKE ? OR tags LIKE ? OR content LIKE ?) AND type != ? AND status = ? ORDER BY created_at DESC LIMIT ? """, (f"%{query}%", f"%{query}%", f"%{query}%", MemoryType.CHUNK.value, MemoryStatus.ACTIVE.value, limit)) rows = cursor.fetchall() conn.close() return [self._row_to_entry(row) for row in rows] def summarize(self, entry_id: str, force: bool = False) -> str: """ Generate or retrieve summary for an entry. """ entry = self.fetch(entry_id, include_content=True) if not entry: return f"Entry not found: {entry_id}" if entry.summary and not force: return entry.summary # Generate summary from content content = entry.content if not content and entry.chunk_ids: # Reconstruct from chunks for summary chunks = self.fetch_chunks(entry_id) content = "" for chunk in chunks: chunk_content = self.fetch(chunk.id, include_content=True) if chunk_content and chunk_content.content: content += chunk_content.content if not content: return "(no content to summarize)" summary = self._generate_summary(content) # Update entry with summary entry.summary = summary self._save_entry(entry) return summary def archive(self, entry_id: str): """Archive an entry (mark as archived but keep data).""" entry = self.fetch(entry_id, include_content=False) if entry: entry.status = MemoryStatus.ARCHIVED self._save_entry(entry) def prune(self, keep_days: int = MAX_CHUNK_AGE_DAYS, keep_entries: int = MAX_MEMORY_ENTRIES): """Prune old entries to manage storage.""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() # Get count cursor.execute("SELECT COUNT(*) FROM memory_entries WHERE status = ?", (MemoryStatus.ACTIVE.value,)) count = cursor.fetchone()[0] deleted = 0 # Delete oldest entries if over limit if count > keep_entries: cursor.execute(""" SELECT id, content_path FROM memory_entries WHERE status = ? ORDER BY created_at ASC LIMIT ? """, (MemoryStatus.ACTIVE.value, count - keep_entries)) for row in cursor.fetchall(): entry_id, content_path = row # Delete file if exists if content_path: try: Path(content_path).unlink() except: pass cursor.execute("DELETE FROM memory_entries WHERE id = ?", (entry_id,)) deleted += 1 conn.commit() conn.close() return deleted # ============================================================================= # CLI Interface # ============================================================================= def cli(): import argparse parser = argparse.ArgumentParser( description="External Memory Layer", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: memory log "Large output content here" memory log --file output.txt --tag "test-results" memory fetch mem-20260123-123456-abcd1234 memory fetch mem-20260123-123456-abcd1234 --summary-only memory list --type output --limit 10 memory search "error" memory summarize mem-20260123-123456-abcd1234 memory prune --keep-days 7 """ ) subparsers = parser.add_subparsers(dest="command", required=True) # log log_parser = subparsers.add_parser("log", help="Store content in memory") log_parser.add_argument("content", nargs="?", help="Content to store (or use --file)") log_parser.add_argument("--file", "-f", help="Read content from file") log_parser.add_argument("--stdin", action="store_true", help="Read from stdin") log_parser.add_argument("--type", "-t", choices=["transcript", "output", "context"], default="output", help="Entry type") log_parser.add_argument("--tag", action="append", help="Add tag (can repeat)") log_parser.add_argument("--checkpoint", help="Link to checkpoint ID") log_parser.add_argument("--directory", "-d", help="Link to directory") log_parser.add_argument("--no-chunk", action="store_true", help="Don't auto-chunk") log_parser.add_argument("--json", action="store_true", help="Output JSON") # fetch fetch_parser = subparsers.add_parser("fetch", help="Retrieve memory entry") fetch_parser.add_argument("entry_id", help="Memory entry ID") fetch_parser.add_argument("--summary-only", "-s", action="store_true", help="Only show summary") fetch_parser.add_argument("--chunk", "-c", type=int, help="Fetch specific chunk index") fetch_parser.add_argument("--json", action="store_true", help="Output JSON") # list list_parser = subparsers.add_parser("list", help="List memory entries") list_parser.add_argument("--type", "-t", choices=["transcript", "output", "summary", "context"]) list_parser.add_argument("--directory", "-d", help="Filter by directory") list_parser.add_argument("--checkpoint", help="Filter by checkpoint ID") list_parser.add_argument("--limit", "-n", type=int, default=20) list_parser.add_argument("--json", action="store_true", help="Output JSON") # search search_parser = subparsers.add_parser("search", help="Search memory entries") search_parser.add_argument("query", help="Search query") search_parser.add_argument("--limit", "-n", type=int, default=10) search_parser.add_argument("--json", action="store_true", help="Output JSON") # summarize summarize_parser = subparsers.add_parser("summarize", help="Generate/show summary") summarize_parser.add_argument("entry_id", help="Memory entry ID") summarize_parser.add_argument("--force", action="store_true", help="Regenerate summary") # refs refs_parser = subparsers.add_parser("refs", help="Get memory references") refs_parser.add_argument("--checkpoint", help="Get refs for checkpoint") refs_parser.add_argument("--directory", "-d", help="Get refs for directory") refs_parser.add_argument("--json", action="store_true", help="Output JSON") # prune prune_parser = subparsers.add_parser("prune", help="Prune old entries") prune_parser.add_argument("--keep-days", type=int, default=MAX_CHUNK_AGE_DAYS) prune_parser.add_argument("--keep-entries", type=int, default=MAX_MEMORY_ENTRIES) # stats subparsers.add_parser("stats", help="Show memory statistics") args = parser.parse_args() manager = MemoryManager() # ------------------------------------------------------------------------- if args.command == "log": # Get content if args.stdin: content = sys.stdin.read() elif args.file: content = Path(args.file).read_text() elif args.content: content = args.content else: print("Error: Provide content, --file, or --stdin") sys.exit(1) entry = manager.store( content=content, type=MemoryType(args.type), tags=args.tag or [], checkpoint_id=args.checkpoint, directory=args.directory, auto_chunk=not args.no_chunk ) if args.json: print(json.dumps(entry.to_dict(), indent=2)) else: print(f"\n{'='*60}") print("MEMORY STORED") print(f"{'='*60}") print(f"ID: {entry.id}") print(f"Type: {entry.type.value}") print(f"Tokens: {entry.tokens_estimate}") if entry.chunk_ids: print(f"Chunks: {len(entry.chunk_ids)}") if entry.summary: print(f"\nSummary:\n{entry.summary[:200]}...") print(f"{'='*60}") elif args.command == "fetch": if args.chunk is not None: # Fetch specific chunk chunk_id = f"{args.entry_id}-chunk-{args.chunk:03d}" entry = manager.fetch(chunk_id, include_content=True) else: entry = manager.fetch(args.entry_id, include_content=not args.summary_only) if not entry: print(f"Entry not found: {args.entry_id}") sys.exit(1) if args.json: print(json.dumps(entry.to_dict(), indent=2)) elif args.summary_only: print(entry.summary or "(no summary)") else: print(f"\n{'='*60}") print(f"MEMORY: {entry.id}") print(f"{'='*60}") print(f"Type: {entry.type.value}") print(f"Created: {entry.created_at}") print(f"Tokens: {entry.tokens_estimate}") if entry.chunk_ids: print(f"Chunks: {len(entry.chunk_ids)}") print(f" IDs: {', '.join(entry.chunk_ids[:3])}...") if entry.summary: print(f"\nSummary:\n{entry.summary}") if entry.content: print(f"\nContent:\n{'-'*40}") print(entry.content) print(f"{'='*60}") elif args.command == "list": entry_type = MemoryType(args.type) if args.type else None entries = manager.list_entries( type=entry_type, directory=args.directory, checkpoint_id=args.checkpoint, limit=args.limit ) if args.json: print(json.dumps([e.to_dict() for e in entries], indent=2)) else: print(f"\n{'='*60}") print(f"MEMORY ENTRIES ({len(entries)})") print(f"{'='*60}") for entry in entries: chunks_str = f" [{len(entry.chunk_ids)} chunks]" if entry.chunk_ids else "" summary = (entry.summary or "")[:50] if len(entry.summary or "") > 50: summary += "..." print(f"\n {entry.id}") print(f" Type: {entry.type.value}{chunks_str}") print(f" Tokens: {entry.tokens_estimate}") if summary: print(f" Summary: {summary}") print(f"\n{'='*60}") elif args.command == "search": entries = manager.search(args.query, limit=args.limit) if args.json: print(json.dumps([e.to_dict() for e in entries], indent=2)) else: print(f"\n{'='*60}") print(f"SEARCH RESULTS: '{args.query}' ({len(entries)} found)") print(f"{'='*60}") for entry in entries: summary = (entry.summary or "")[:60] print(f"\n {entry.id}") print(f" Type: {entry.type.value}, Tokens: {entry.tokens_estimate}") if summary: print(f" {summary}") print(f"\n{'='*60}") elif args.command == "summarize": summary = manager.summarize(args.entry_id, force=args.force) print(summary) elif args.command == "refs": refs = [] if args.checkpoint: refs = manager.get_references_for_checkpoint(args.checkpoint) elif args.directory: entries = manager.list_entries(directory=args.directory) refs = [ MemoryReference( id=e.id, type=e.type.value, summary=e.summary or "", tokens=e.tokens_estimate, created_at=e.created_at ) for e in entries ] if args.json: print(json.dumps([asdict(r) for r in refs], indent=2)) else: print(f"\n{'='*60}") print(f"MEMORY REFERENCES ({len(refs)})") print(f"{'='*60}") for ref in refs: print(f"\n {ref.to_inline()}") print(f"\n{'='*60}") elif args.command == "prune": deleted = manager.prune( keep_days=args.keep_days, keep_entries=args.keep_entries ) print(f"Pruned {deleted} entries") elif args.command == "stats": conn = sqlite3.connect(MEMORY_DB) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM memory_entries WHERE status = 'active'") total = cursor.fetchone()[0] cursor.execute(""" SELECT type, COUNT(*), SUM(tokens_estimate) FROM memory_entries WHERE status = 'active' GROUP BY type """) by_type = cursor.fetchall() cursor.execute("SELECT SUM(tokens_estimate) FROM memory_entries WHERE status = 'active'") total_tokens = cursor.fetchone()[0] or 0 conn.close() # Get storage size chunks_size = sum(f.stat().st_size for f in CHUNKS_DIR.glob("*") if f.is_file()) print(f"\n{'='*60}") print("MEMORY STATISTICS") print(f"{'='*60}") print(f"Total Entries: {total}") print(f"Total Tokens: {total_tokens:,}") print(f"Storage Size: {chunks_size / 1024:.1f} KB") print(f"\nBy Type:") for type_name, count, tokens in by_type: print(f" {type_name}: {count} entries, {tokens or 0:,} tokens") print(f"{'='*60}") if __name__ == "__main__": cli()