Phase 8 Production Hardening with complete governance infrastructure: - Vault integration with tiered policies (T0-T4) - DragonflyDB state management - SQLite audit ledger - Pipeline DSL and templates - Promotion/revocation engine - Checkpoint system for session persistence - Health manager and circuit breaker for fault tolerance - GitHub/Slack integrations - Architectural test pipeline with bug watcher, suggestion engine, council review - Multi-agent chaos testing framework Test Results: - Governance tests: 68/68 passing - E2E workflow: 16/16 passing - Phase 2 Vault: 14/14 passing - Integration tests: 27/27 passing Coverage: 57.6% average across 12 phases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1004 lines
34 KiB
Python
Executable File
1004 lines
34 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
External Memory Layer
|
|
=====================
|
|
|
|
Persistent context memory system that integrates with checkpoint + STATUS.
|
|
Provides token-efficient storage and retrieval of large outputs, transcripts,
|
|
and summaries.
|
|
|
|
Features:
|
|
- Chunked output storage with unique IDs
|
|
- Automatic summarization of large content
|
|
- Links between checkpoints, STATUS files, and memory entries
|
|
- CLI for storing, fetching, and summarizing content
|
|
|
|
Architecture:
|
|
- SQLite database for metadata and indexes
|
|
- Filesystem for large content chunks
|
|
- Redis for hot/recent entries (optional)
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import hashlib
|
|
import sqlite3
|
|
import gzip
|
|
from dataclasses import dataclass, field, asdict
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict, Any, Tuple
|
|
from enum import Enum
|
|
import textwrap
|
|
|
|
# =============================================================================
|
|
# Configuration
|
|
# =============================================================================
|
|
|
|
MEMORY_DIR = Path("/opt/agent-governance/memory")
|
|
MEMORY_DB = MEMORY_DIR / "memory.db"
|
|
CHUNKS_DIR = MEMORY_DIR / "chunks"
|
|
SUMMARIES_DIR = MEMORY_DIR / "summaries"
|
|
|
|
# Thresholds
|
|
DEFAULT_CHUNK_SIZE = 4000 # tokens (~16000 chars)
|
|
SUMMARY_THRESHOLD = 1000 # tokens - content over this gets summarized
|
|
MAX_INLINE_SIZE = 500 # tokens - content under this stays inline
|
|
CHARS_PER_TOKEN = 4 # rough estimate
|
|
|
|
# Retention
|
|
MAX_MEMORY_ENTRIES = 1000
|
|
MAX_CHUNK_AGE_DAYS = 30
|
|
|
|
|
|
class MemoryType(str, Enum):
|
|
"""Types of memory entries."""
|
|
TRANSCRIPT = "transcript" # Full conversation logs
|
|
OUTPUT = "output" # Command/tool outputs
|
|
SUMMARY = "summary" # Generated summaries
|
|
CHUNK = "chunk" # Large output chunk
|
|
REFERENCE = "reference" # Pointer to external content
|
|
CONTEXT = "context" # Saved context state
|
|
|
|
|
|
class MemoryStatus(str, Enum):
|
|
"""Status of memory entries."""
|
|
ACTIVE = "active"
|
|
ARCHIVED = "archived"
|
|
EXPIRED = "expired"
|
|
|
|
|
|
# =============================================================================
|
|
# Data Classes
|
|
# =============================================================================
|
|
|
|
@dataclass
|
|
class MemoryEntry:
|
|
"""A single memory entry."""
|
|
id: str
|
|
type: MemoryType
|
|
created_at: str
|
|
|
|
# Content (either inline or chunked)
|
|
content: Optional[str] = None
|
|
content_path: Optional[str] = None # Path to file if chunked
|
|
|
|
# Metadata
|
|
summary: Optional[str] = None
|
|
tokens_estimate: int = 0
|
|
chunk_ids: List[str] = field(default_factory=list) # For multi-chunk entries
|
|
|
|
# Links
|
|
checkpoint_id: Optional[str] = None
|
|
directory: Optional[str] = None
|
|
parent_id: Optional[str] = None # For chunks pointing to parent entry
|
|
|
|
# Tags and status
|
|
tags: List[str] = field(default_factory=list)
|
|
status: MemoryStatus = MemoryStatus.ACTIVE
|
|
|
|
# Context
|
|
context: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
"id": self.id,
|
|
"type": self.type.value if isinstance(self.type, MemoryType) else self.type,
|
|
"created_at": self.created_at,
|
|
"content": self.content,
|
|
"content_path": self.content_path,
|
|
"summary": self.summary,
|
|
"tokens_estimate": self.tokens_estimate,
|
|
"chunk_ids": self.chunk_ids,
|
|
"checkpoint_id": self.checkpoint_id,
|
|
"directory": self.directory,
|
|
"parent_id": self.parent_id,
|
|
"tags": self.tags,
|
|
"status": self.status.value if isinstance(self.status, MemoryStatus) else self.status,
|
|
"context": self.context
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict) -> 'MemoryEntry':
|
|
return cls(
|
|
id=data["id"],
|
|
type=MemoryType(data["type"]) if data.get("type") else MemoryType.OUTPUT,
|
|
created_at=data["created_at"],
|
|
content=data.get("content"),
|
|
content_path=data.get("content_path"),
|
|
summary=data.get("summary"),
|
|
tokens_estimate=data.get("tokens_estimate", 0),
|
|
chunk_ids=data.get("chunk_ids", []),
|
|
checkpoint_id=data.get("checkpoint_id"),
|
|
directory=data.get("directory"),
|
|
parent_id=data.get("parent_id"),
|
|
tags=data.get("tags", []),
|
|
status=MemoryStatus(data["status"]) if data.get("status") else MemoryStatus.ACTIVE,
|
|
context=data.get("context", {})
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class MemoryReference:
|
|
"""A lightweight reference to a memory entry (for embedding in prompts)."""
|
|
id: str
|
|
type: str
|
|
summary: str
|
|
tokens: int
|
|
created_at: str
|
|
|
|
def to_inline(self) -> str:
|
|
"""Format for inline prompt inclusion."""
|
|
return f"[Memory:{self.id}] {self.summary} ({self.tokens} tokens)"
|
|
|
|
|
|
# =============================================================================
|
|
# Memory Manager
|
|
# =============================================================================
|
|
|
|
class MemoryManager:
|
|
"""
|
|
Manages the external memory layer.
|
|
"""
|
|
|
|
def __init__(self, db_path: Path = MEMORY_DB):
|
|
self.db_path = db_path
|
|
self.chunks_dir = CHUNKS_DIR
|
|
self.summaries_dir = SUMMARIES_DIR
|
|
|
|
# Ensure directories exist
|
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
self.chunks_dir.mkdir(parents=True, exist_ok=True)
|
|
self.summaries_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Initialize database
|
|
self._init_db()
|
|
|
|
# Optional Redis connection
|
|
self.redis = self._get_redis()
|
|
|
|
def _init_db(self):
|
|
"""Initialize SQLite database schema."""
|
|
conn = sqlite3.connect(self.db_path)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE IF NOT EXISTS memory_entries (
|
|
id TEXT PRIMARY KEY,
|
|
type TEXT NOT NULL,
|
|
created_at TEXT NOT NULL,
|
|
content TEXT,
|
|
content_path TEXT,
|
|
summary TEXT,
|
|
tokens_estimate INTEGER DEFAULT 0,
|
|
chunk_ids TEXT,
|
|
checkpoint_id TEXT,
|
|
directory TEXT,
|
|
parent_id TEXT,
|
|
tags TEXT,
|
|
status TEXT DEFAULT 'active',
|
|
context TEXT
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(type)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_memory_checkpoint ON memory_entries(checkpoint_id)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_memory_directory ON memory_entries(directory)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_memory_created ON memory_entries(created_at)
|
|
""")
|
|
cursor.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_memory_status ON memory_entries(status)
|
|
""")
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def _get_redis(self):
|
|
"""Get optional Redis connection for hot entries."""
|
|
try:
|
|
import redis
|
|
with open("/opt/vault/init-keys.json") as f:
|
|
token = json.load(f)["root_token"]
|
|
|
|
import subprocess
|
|
result = subprocess.run([
|
|
"curl", "-sk",
|
|
"-H", f"X-Vault-Token: {token}",
|
|
"https://127.0.0.1:8200/v1/secret/data/services/dragonfly"
|
|
], capture_output=True, text=True)
|
|
|
|
creds = json.loads(result.stdout)["data"]["data"]
|
|
return redis.Redis(
|
|
host=creds["host"],
|
|
port=int(creds["port"]),
|
|
password=creds["password"],
|
|
decode_responses=True
|
|
)
|
|
except:
|
|
return None
|
|
|
|
def _now(self) -> str:
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
def _generate_id(self, prefix: str = "mem") -> str:
|
|
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
|
suffix = hashlib.sha256(f"{timestamp}-{os.getpid()}-{os.urandom(4).hex()}".encode()).hexdigest()[:8]
|
|
return f"{prefix}-{timestamp}-{suffix}"
|
|
|
|
def _estimate_tokens(self, text: str) -> int:
|
|
"""Estimate token count from text."""
|
|
return len(text) // CHARS_PER_TOKEN
|
|
|
|
def _generate_summary(self, content: str, max_tokens: int = 200) -> str:
|
|
"""
|
|
Generate a summary of content.
|
|
For now, uses extractive summarization (first/last lines + key info).
|
|
Could be enhanced with LLM summarization.
|
|
"""
|
|
lines = content.strip().split('\n')
|
|
tokens_used = 0
|
|
summary_lines = []
|
|
|
|
# Get first few lines
|
|
for line in lines[:5]:
|
|
line_tokens = self._estimate_tokens(line)
|
|
if tokens_used + line_tokens < max_tokens // 2:
|
|
summary_lines.append(line)
|
|
tokens_used += line_tokens
|
|
else:
|
|
break
|
|
|
|
# Add ellipsis if there's more
|
|
if len(lines) > 10:
|
|
summary_lines.append(f"... ({len(lines) - 10} more lines) ...")
|
|
|
|
# Get last few lines
|
|
for line in lines[-3:]:
|
|
line_tokens = self._estimate_tokens(line)
|
|
if tokens_used + line_tokens < max_tokens:
|
|
summary_lines.append(line)
|
|
tokens_used += line_tokens
|
|
|
|
return '\n'.join(summary_lines)
|
|
|
|
# -------------------------------------------------------------------------
|
|
# Core Operations
|
|
# -------------------------------------------------------------------------
|
|
|
|
def store(
|
|
self,
|
|
content: str,
|
|
type: MemoryType = MemoryType.OUTPUT,
|
|
tags: List[str] = None,
|
|
checkpoint_id: str = None,
|
|
directory: str = None,
|
|
context: Dict[str, Any] = None,
|
|
auto_chunk: bool = True,
|
|
auto_summarize: bool = True
|
|
) -> MemoryEntry:
|
|
"""
|
|
Store content in memory.
|
|
|
|
Automatically chunks large content and generates summaries.
|
|
Returns the memory entry (or parent entry if chunked).
|
|
"""
|
|
tokens = self._estimate_tokens(content)
|
|
entry_id = self._generate_id()
|
|
|
|
# Decide storage strategy
|
|
if tokens <= MAX_INLINE_SIZE:
|
|
# Small content - store inline
|
|
entry = MemoryEntry(
|
|
id=entry_id,
|
|
type=type,
|
|
created_at=self._now(),
|
|
content=content,
|
|
tokens_estimate=tokens,
|
|
checkpoint_id=checkpoint_id,
|
|
directory=directory,
|
|
tags=tags or [],
|
|
context=context or {}
|
|
)
|
|
elif auto_chunk and tokens > DEFAULT_CHUNK_SIZE:
|
|
# Large content - chunk it
|
|
entry = self._store_chunked(
|
|
content=content,
|
|
entry_id=entry_id,
|
|
type=type,
|
|
tags=tags,
|
|
checkpoint_id=checkpoint_id,
|
|
directory=directory,
|
|
context=context
|
|
)
|
|
else:
|
|
# Medium content - store to file
|
|
content_path = self._write_chunk_file(entry_id, content)
|
|
summary = self._generate_summary(content) if auto_summarize else None
|
|
|
|
entry = MemoryEntry(
|
|
id=entry_id,
|
|
type=type,
|
|
created_at=self._now(),
|
|
content_path=str(content_path),
|
|
summary=summary,
|
|
tokens_estimate=tokens,
|
|
checkpoint_id=checkpoint_id,
|
|
directory=directory,
|
|
tags=tags or [],
|
|
context=context or {}
|
|
)
|
|
|
|
# Save to database
|
|
self._save_entry(entry)
|
|
|
|
# Cache in Redis if available
|
|
if self.redis:
|
|
self.redis.setex(
|
|
f"memory:{entry.id}",
|
|
3600, # 1 hour TTL
|
|
json.dumps(entry.to_dict())
|
|
)
|
|
|
|
return entry
|
|
|
|
def _store_chunked(
|
|
self,
|
|
content: str,
|
|
entry_id: str,
|
|
type: MemoryType,
|
|
tags: List[str],
|
|
checkpoint_id: str,
|
|
directory: str,
|
|
context: Dict[str, Any]
|
|
) -> MemoryEntry:
|
|
"""Store large content as multiple chunks."""
|
|
chunk_size_chars = DEFAULT_CHUNK_SIZE * CHARS_PER_TOKEN
|
|
chunks = []
|
|
chunk_ids = []
|
|
|
|
# Split into chunks
|
|
for i in range(0, len(content), chunk_size_chars):
|
|
chunk_content = content[i:i + chunk_size_chars]
|
|
chunk_id = f"{entry_id}-chunk-{len(chunks):03d}"
|
|
|
|
# Write chunk to file
|
|
chunk_path = self._write_chunk_file(chunk_id, chunk_content)
|
|
|
|
chunk_entry = MemoryEntry(
|
|
id=chunk_id,
|
|
type=MemoryType.CHUNK,
|
|
created_at=self._now(),
|
|
content_path=str(chunk_path),
|
|
tokens_estimate=self._estimate_tokens(chunk_content),
|
|
parent_id=entry_id,
|
|
tags=tags or [],
|
|
context={"chunk_index": len(chunks), "total_chunks": -1}
|
|
)
|
|
chunks.append(chunk_entry)
|
|
chunk_ids.append(chunk_id)
|
|
|
|
# Update total_chunks in context
|
|
for chunk in chunks:
|
|
chunk.context["total_chunks"] = len(chunks)
|
|
self._save_entry(chunk)
|
|
|
|
# Generate summary of full content
|
|
summary = self._generate_summary(content)
|
|
|
|
# Create parent entry
|
|
parent_entry = MemoryEntry(
|
|
id=entry_id,
|
|
type=type,
|
|
created_at=self._now(),
|
|
summary=summary,
|
|
tokens_estimate=self._estimate_tokens(content),
|
|
chunk_ids=chunk_ids,
|
|
checkpoint_id=checkpoint_id,
|
|
directory=directory,
|
|
tags=tags or [],
|
|
context=context or {}
|
|
)
|
|
|
|
return parent_entry
|
|
|
|
def _write_chunk_file(self, chunk_id: str, content: str, compress: bool = True) -> Path:
|
|
"""Write content to a chunk file."""
|
|
if compress:
|
|
filepath = self.chunks_dir / f"{chunk_id}.gz"
|
|
with gzip.open(filepath, 'wt', encoding='utf-8') as f:
|
|
f.write(content)
|
|
else:
|
|
filepath = self.chunks_dir / f"{chunk_id}.txt"
|
|
filepath.write_text(content)
|
|
return filepath
|
|
|
|
def _read_chunk_file(self, path: str) -> str:
|
|
"""Read content from a chunk file."""
|
|
filepath = Path(path)
|
|
if filepath.suffix == '.gz':
|
|
with gzip.open(filepath, 'rt', encoding='utf-8') as f:
|
|
return f.read()
|
|
else:
|
|
return filepath.read_text()
|
|
|
|
def _save_entry(self, entry: MemoryEntry):
|
|
"""Save entry to database."""
|
|
conn = sqlite3.connect(self.db_path)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO memory_entries
|
|
(id, type, created_at, content, content_path, summary, tokens_estimate,
|
|
chunk_ids, checkpoint_id, directory, parent_id, tags, status, context)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
entry.id,
|
|
entry.type.value if isinstance(entry.type, MemoryType) else entry.type,
|
|
entry.created_at,
|
|
entry.content,
|
|
entry.content_path,
|
|
entry.summary,
|
|
entry.tokens_estimate,
|
|
json.dumps(entry.chunk_ids),
|
|
entry.checkpoint_id,
|
|
entry.directory,
|
|
entry.parent_id,
|
|
json.dumps(entry.tags),
|
|
entry.status.value if isinstance(entry.status, MemoryStatus) else entry.status,
|
|
json.dumps(entry.context)
|
|
))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def fetch(self, entry_id: str, include_content: bool = True) -> Optional[MemoryEntry]:
|
|
"""
|
|
Fetch a memory entry by ID.
|
|
|
|
If include_content is True and content is in a file, reads the file.
|
|
"""
|
|
# Try Redis cache first
|
|
if self.redis:
|
|
cached = self.redis.get(f"memory:{entry_id}")
|
|
if cached:
|
|
entry = MemoryEntry.from_dict(json.loads(cached))
|
|
if include_content and entry.content_path and not entry.content:
|
|
entry.content = self._read_chunk_file(entry.content_path)
|
|
return entry
|
|
|
|
# Fall back to database
|
|
conn = sqlite3.connect(self.db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("SELECT * FROM memory_entries WHERE id = ?", (entry_id,))
|
|
row = cursor.fetchone()
|
|
conn.close()
|
|
|
|
if not row:
|
|
return None
|
|
|
|
entry = self._row_to_entry(row)
|
|
|
|
if include_content and entry.content_path and not entry.content:
|
|
try:
|
|
entry.content = self._read_chunk_file(entry.content_path)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
return entry
|
|
|
|
def _row_to_entry(self, row: sqlite3.Row) -> MemoryEntry:
|
|
"""Convert database row to MemoryEntry."""
|
|
return MemoryEntry(
|
|
id=row['id'],
|
|
type=MemoryType(row['type']),
|
|
created_at=row['created_at'],
|
|
content=row['content'],
|
|
content_path=row['content_path'],
|
|
summary=row['summary'],
|
|
tokens_estimate=row['tokens_estimate'],
|
|
chunk_ids=json.loads(row['chunk_ids']) if row['chunk_ids'] else [],
|
|
checkpoint_id=row['checkpoint_id'],
|
|
directory=row['directory'],
|
|
parent_id=row['parent_id'],
|
|
tags=json.loads(row['tags']) if row['tags'] else [],
|
|
status=MemoryStatus(row['status']),
|
|
context=json.loads(row['context']) if row['context'] else {}
|
|
)
|
|
|
|
def fetch_chunks(self, parent_id: str) -> List[MemoryEntry]:
|
|
"""Fetch all chunks for a parent entry."""
|
|
conn = sqlite3.connect(self.db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute(
|
|
"SELECT * FROM memory_entries WHERE parent_id = ? ORDER BY id",
|
|
(parent_id,)
|
|
)
|
|
rows = cursor.fetchall()
|
|
conn.close()
|
|
|
|
return [self._row_to_entry(row) for row in rows]
|
|
|
|
def list_entries(
|
|
self,
|
|
type: MemoryType = None,
|
|
directory: str = None,
|
|
checkpoint_id: str = None,
|
|
status: MemoryStatus = MemoryStatus.ACTIVE,
|
|
limit: int = 50,
|
|
include_chunks: bool = False
|
|
) -> List[MemoryEntry]:
|
|
"""List memory entries with optional filters."""
|
|
conn = sqlite3.connect(self.db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
query = "SELECT * FROM memory_entries WHERE 1=1"
|
|
params = []
|
|
|
|
if type:
|
|
query += " AND type = ?"
|
|
params.append(type.value)
|
|
elif not include_chunks:
|
|
query += " AND type != ?"
|
|
params.append(MemoryType.CHUNK.value)
|
|
|
|
if directory:
|
|
query += " AND directory = ?"
|
|
params.append(directory)
|
|
|
|
if checkpoint_id:
|
|
query += " AND checkpoint_id = ?"
|
|
params.append(checkpoint_id)
|
|
|
|
if status:
|
|
query += " AND status = ?"
|
|
params.append(status.value)
|
|
|
|
query += " ORDER BY created_at DESC LIMIT ?"
|
|
params.append(limit)
|
|
|
|
cursor.execute(query, params)
|
|
rows = cursor.fetchall()
|
|
conn.close()
|
|
|
|
return [self._row_to_entry(row) for row in rows]
|
|
|
|
def get_reference(self, entry_id: str) -> Optional[MemoryReference]:
|
|
"""Get a lightweight reference to an entry."""
|
|
entry = self.fetch(entry_id, include_content=False)
|
|
if not entry:
|
|
return None
|
|
|
|
return MemoryReference(
|
|
id=entry.id,
|
|
type=entry.type.value,
|
|
summary=entry.summary or "(no summary)",
|
|
tokens=entry.tokens_estimate,
|
|
created_at=entry.created_at
|
|
)
|
|
|
|
def get_references_for_checkpoint(self, checkpoint_id: str) -> List[MemoryReference]:
|
|
"""Get all memory references linked to a checkpoint."""
|
|
entries = self.list_entries(checkpoint_id=checkpoint_id)
|
|
return [
|
|
MemoryReference(
|
|
id=e.id,
|
|
type=e.type.value,
|
|
summary=e.summary or "(no summary)",
|
|
tokens=e.tokens_estimate,
|
|
created_at=e.created_at
|
|
)
|
|
for e in entries
|
|
]
|
|
|
|
def search(self, query: str, limit: int = 20) -> List[MemoryEntry]:
|
|
"""Search memory entries by content or summary."""
|
|
conn = sqlite3.connect(self.db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
# Search in summary and tags
|
|
cursor.execute("""
|
|
SELECT * FROM memory_entries
|
|
WHERE (summary LIKE ? OR tags LIKE ? OR content LIKE ?)
|
|
AND type != ?
|
|
AND status = ?
|
|
ORDER BY created_at DESC
|
|
LIMIT ?
|
|
""", (f"%{query}%", f"%{query}%", f"%{query}%",
|
|
MemoryType.CHUNK.value, MemoryStatus.ACTIVE.value, limit))
|
|
|
|
rows = cursor.fetchall()
|
|
conn.close()
|
|
|
|
return [self._row_to_entry(row) for row in rows]
|
|
|
|
def summarize(self, entry_id: str, force: bool = False) -> str:
|
|
"""
|
|
Generate or retrieve summary for an entry.
|
|
"""
|
|
entry = self.fetch(entry_id, include_content=True)
|
|
if not entry:
|
|
return f"Entry not found: {entry_id}"
|
|
|
|
if entry.summary and not force:
|
|
return entry.summary
|
|
|
|
# Generate summary from content
|
|
content = entry.content
|
|
if not content and entry.chunk_ids:
|
|
# Reconstruct from chunks for summary
|
|
chunks = self.fetch_chunks(entry_id)
|
|
content = ""
|
|
for chunk in chunks:
|
|
chunk_content = self.fetch(chunk.id, include_content=True)
|
|
if chunk_content and chunk_content.content:
|
|
content += chunk_content.content
|
|
|
|
if not content:
|
|
return "(no content to summarize)"
|
|
|
|
summary = self._generate_summary(content)
|
|
|
|
# Update entry with summary
|
|
entry.summary = summary
|
|
self._save_entry(entry)
|
|
|
|
return summary
|
|
|
|
def archive(self, entry_id: str):
|
|
"""Archive an entry (mark as archived but keep data)."""
|
|
entry = self.fetch(entry_id, include_content=False)
|
|
if entry:
|
|
entry.status = MemoryStatus.ARCHIVED
|
|
self._save_entry(entry)
|
|
|
|
def prune(self, keep_days: int = MAX_CHUNK_AGE_DAYS, keep_entries: int = MAX_MEMORY_ENTRIES):
|
|
"""Prune old entries to manage storage."""
|
|
conn = sqlite3.connect(self.db_path)
|
|
cursor = conn.cursor()
|
|
|
|
# Get count
|
|
cursor.execute("SELECT COUNT(*) FROM memory_entries WHERE status = ?",
|
|
(MemoryStatus.ACTIVE.value,))
|
|
count = cursor.fetchone()[0]
|
|
|
|
deleted = 0
|
|
|
|
# Delete oldest entries if over limit
|
|
if count > keep_entries:
|
|
cursor.execute("""
|
|
SELECT id, content_path FROM memory_entries
|
|
WHERE status = ?
|
|
ORDER BY created_at ASC
|
|
LIMIT ?
|
|
""", (MemoryStatus.ACTIVE.value, count - keep_entries))
|
|
|
|
for row in cursor.fetchall():
|
|
entry_id, content_path = row
|
|
# Delete file if exists
|
|
if content_path:
|
|
try:
|
|
Path(content_path).unlink()
|
|
except:
|
|
pass
|
|
cursor.execute("DELETE FROM memory_entries WHERE id = ?", (entry_id,))
|
|
deleted += 1
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
return deleted
|
|
|
|
|
|
# =============================================================================
|
|
# CLI Interface
|
|
# =============================================================================
|
|
|
|
def cli():
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="External Memory Layer",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
memory log "Large output content here"
|
|
memory log --file output.txt --tag "test-results"
|
|
memory fetch mem-20260123-123456-abcd1234
|
|
memory fetch mem-20260123-123456-abcd1234 --summary-only
|
|
memory list --type output --limit 10
|
|
memory search "error"
|
|
memory summarize mem-20260123-123456-abcd1234
|
|
memory prune --keep-days 7
|
|
"""
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
|
|
# log
|
|
log_parser = subparsers.add_parser("log", help="Store content in memory")
|
|
log_parser.add_argument("content", nargs="?", help="Content to store (or use --file)")
|
|
log_parser.add_argument("--file", "-f", help="Read content from file")
|
|
log_parser.add_argument("--stdin", action="store_true", help="Read from stdin")
|
|
log_parser.add_argument("--type", "-t", choices=["transcript", "output", "context"],
|
|
default="output", help="Entry type")
|
|
log_parser.add_argument("--tag", action="append", help="Add tag (can repeat)")
|
|
log_parser.add_argument("--checkpoint", help="Link to checkpoint ID")
|
|
log_parser.add_argument("--directory", "-d", help="Link to directory")
|
|
log_parser.add_argument("--no-chunk", action="store_true", help="Don't auto-chunk")
|
|
log_parser.add_argument("--json", action="store_true", help="Output JSON")
|
|
|
|
# fetch
|
|
fetch_parser = subparsers.add_parser("fetch", help="Retrieve memory entry")
|
|
fetch_parser.add_argument("entry_id", help="Memory entry ID")
|
|
fetch_parser.add_argument("--summary-only", "-s", action="store_true",
|
|
help="Only show summary")
|
|
fetch_parser.add_argument("--chunk", "-c", type=int, help="Fetch specific chunk index")
|
|
fetch_parser.add_argument("--json", action="store_true", help="Output JSON")
|
|
|
|
# list
|
|
list_parser = subparsers.add_parser("list", help="List memory entries")
|
|
list_parser.add_argument("--type", "-t", choices=["transcript", "output", "summary", "context"])
|
|
list_parser.add_argument("--directory", "-d", help="Filter by directory")
|
|
list_parser.add_argument("--checkpoint", help="Filter by checkpoint ID")
|
|
list_parser.add_argument("--limit", "-n", type=int, default=20)
|
|
list_parser.add_argument("--json", action="store_true", help="Output JSON")
|
|
|
|
# search
|
|
search_parser = subparsers.add_parser("search", help="Search memory entries")
|
|
search_parser.add_argument("query", help="Search query")
|
|
search_parser.add_argument("--limit", "-n", type=int, default=10)
|
|
search_parser.add_argument("--json", action="store_true", help="Output JSON")
|
|
|
|
# summarize
|
|
summarize_parser = subparsers.add_parser("summarize", help="Generate/show summary")
|
|
summarize_parser.add_argument("entry_id", help="Memory entry ID")
|
|
summarize_parser.add_argument("--force", action="store_true", help="Regenerate summary")
|
|
|
|
# refs
|
|
refs_parser = subparsers.add_parser("refs", help="Get memory references")
|
|
refs_parser.add_argument("--checkpoint", help="Get refs for checkpoint")
|
|
refs_parser.add_argument("--directory", "-d", help="Get refs for directory")
|
|
refs_parser.add_argument("--json", action="store_true", help="Output JSON")
|
|
|
|
# prune
|
|
prune_parser = subparsers.add_parser("prune", help="Prune old entries")
|
|
prune_parser.add_argument("--keep-days", type=int, default=MAX_CHUNK_AGE_DAYS)
|
|
prune_parser.add_argument("--keep-entries", type=int, default=MAX_MEMORY_ENTRIES)
|
|
|
|
# stats
|
|
subparsers.add_parser("stats", help="Show memory statistics")
|
|
|
|
args = parser.parse_args()
|
|
manager = MemoryManager()
|
|
|
|
# -------------------------------------------------------------------------
|
|
|
|
if args.command == "log":
|
|
# Get content
|
|
if args.stdin:
|
|
content = sys.stdin.read()
|
|
elif args.file:
|
|
content = Path(args.file).read_text()
|
|
elif args.content:
|
|
content = args.content
|
|
else:
|
|
print("Error: Provide content, --file, or --stdin")
|
|
sys.exit(1)
|
|
|
|
entry = manager.store(
|
|
content=content,
|
|
type=MemoryType(args.type),
|
|
tags=args.tag or [],
|
|
checkpoint_id=args.checkpoint,
|
|
directory=args.directory,
|
|
auto_chunk=not args.no_chunk
|
|
)
|
|
|
|
if args.json:
|
|
print(json.dumps(entry.to_dict(), indent=2))
|
|
else:
|
|
print(f"\n{'='*60}")
|
|
print("MEMORY STORED")
|
|
print(f"{'='*60}")
|
|
print(f"ID: {entry.id}")
|
|
print(f"Type: {entry.type.value}")
|
|
print(f"Tokens: {entry.tokens_estimate}")
|
|
if entry.chunk_ids:
|
|
print(f"Chunks: {len(entry.chunk_ids)}")
|
|
if entry.summary:
|
|
print(f"\nSummary:\n{entry.summary[:200]}...")
|
|
print(f"{'='*60}")
|
|
|
|
elif args.command == "fetch":
|
|
if args.chunk is not None:
|
|
# Fetch specific chunk
|
|
chunk_id = f"{args.entry_id}-chunk-{args.chunk:03d}"
|
|
entry = manager.fetch(chunk_id, include_content=True)
|
|
else:
|
|
entry = manager.fetch(args.entry_id, include_content=not args.summary_only)
|
|
|
|
if not entry:
|
|
print(f"Entry not found: {args.entry_id}")
|
|
sys.exit(1)
|
|
|
|
if args.json:
|
|
print(json.dumps(entry.to_dict(), indent=2))
|
|
elif args.summary_only:
|
|
print(entry.summary or "(no summary)")
|
|
else:
|
|
print(f"\n{'='*60}")
|
|
print(f"MEMORY: {entry.id}")
|
|
print(f"{'='*60}")
|
|
print(f"Type: {entry.type.value}")
|
|
print(f"Created: {entry.created_at}")
|
|
print(f"Tokens: {entry.tokens_estimate}")
|
|
|
|
if entry.chunk_ids:
|
|
print(f"Chunks: {len(entry.chunk_ids)}")
|
|
print(f" IDs: {', '.join(entry.chunk_ids[:3])}...")
|
|
|
|
if entry.summary:
|
|
print(f"\nSummary:\n{entry.summary}")
|
|
|
|
if entry.content:
|
|
print(f"\nContent:\n{'-'*40}")
|
|
print(entry.content)
|
|
print(f"{'='*60}")
|
|
|
|
elif args.command == "list":
|
|
entry_type = MemoryType(args.type) if args.type else None
|
|
entries = manager.list_entries(
|
|
type=entry_type,
|
|
directory=args.directory,
|
|
checkpoint_id=args.checkpoint,
|
|
limit=args.limit
|
|
)
|
|
|
|
if args.json:
|
|
print(json.dumps([e.to_dict() for e in entries], indent=2))
|
|
else:
|
|
print(f"\n{'='*60}")
|
|
print(f"MEMORY ENTRIES ({len(entries)})")
|
|
print(f"{'='*60}")
|
|
|
|
for entry in entries:
|
|
chunks_str = f" [{len(entry.chunk_ids)} chunks]" if entry.chunk_ids else ""
|
|
summary = (entry.summary or "")[:50]
|
|
if len(entry.summary or "") > 50:
|
|
summary += "..."
|
|
print(f"\n {entry.id}")
|
|
print(f" Type: {entry.type.value}{chunks_str}")
|
|
print(f" Tokens: {entry.tokens_estimate}")
|
|
if summary:
|
|
print(f" Summary: {summary}")
|
|
|
|
print(f"\n{'='*60}")
|
|
|
|
elif args.command == "search":
|
|
entries = manager.search(args.query, limit=args.limit)
|
|
|
|
if args.json:
|
|
print(json.dumps([e.to_dict() for e in entries], indent=2))
|
|
else:
|
|
print(f"\n{'='*60}")
|
|
print(f"SEARCH RESULTS: '{args.query}' ({len(entries)} found)")
|
|
print(f"{'='*60}")
|
|
|
|
for entry in entries:
|
|
summary = (entry.summary or "")[:60]
|
|
print(f"\n {entry.id}")
|
|
print(f" Type: {entry.type.value}, Tokens: {entry.tokens_estimate}")
|
|
if summary:
|
|
print(f" {summary}")
|
|
|
|
print(f"\n{'='*60}")
|
|
|
|
elif args.command == "summarize":
|
|
summary = manager.summarize(args.entry_id, force=args.force)
|
|
print(summary)
|
|
|
|
elif args.command == "refs":
|
|
refs = []
|
|
if args.checkpoint:
|
|
refs = manager.get_references_for_checkpoint(args.checkpoint)
|
|
elif args.directory:
|
|
entries = manager.list_entries(directory=args.directory)
|
|
refs = [
|
|
MemoryReference(
|
|
id=e.id, type=e.type.value,
|
|
summary=e.summary or "", tokens=e.tokens_estimate,
|
|
created_at=e.created_at
|
|
)
|
|
for e in entries
|
|
]
|
|
|
|
if args.json:
|
|
print(json.dumps([asdict(r) for r in refs], indent=2))
|
|
else:
|
|
print(f"\n{'='*60}")
|
|
print(f"MEMORY REFERENCES ({len(refs)})")
|
|
print(f"{'='*60}")
|
|
|
|
for ref in refs:
|
|
print(f"\n {ref.to_inline()}")
|
|
|
|
print(f"\n{'='*60}")
|
|
|
|
elif args.command == "prune":
|
|
deleted = manager.prune(
|
|
keep_days=args.keep_days,
|
|
keep_entries=args.keep_entries
|
|
)
|
|
print(f"Pruned {deleted} entries")
|
|
|
|
elif args.command == "stats":
|
|
conn = sqlite3.connect(MEMORY_DB)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("SELECT COUNT(*) FROM memory_entries WHERE status = 'active'")
|
|
total = cursor.fetchone()[0]
|
|
|
|
cursor.execute("""
|
|
SELECT type, COUNT(*), SUM(tokens_estimate)
|
|
FROM memory_entries WHERE status = 'active'
|
|
GROUP BY type
|
|
""")
|
|
by_type = cursor.fetchall()
|
|
|
|
cursor.execute("SELECT SUM(tokens_estimate) FROM memory_entries WHERE status = 'active'")
|
|
total_tokens = cursor.fetchone()[0] or 0
|
|
|
|
conn.close()
|
|
|
|
# Get storage size
|
|
chunks_size = sum(f.stat().st_size for f in CHUNKS_DIR.glob("*") if f.is_file())
|
|
|
|
print(f"\n{'='*60}")
|
|
print("MEMORY STATISTICS")
|
|
print(f"{'='*60}")
|
|
print(f"Total Entries: {total}")
|
|
print(f"Total Tokens: {total_tokens:,}")
|
|
print(f"Storage Size: {chunks_size / 1024:.1f} KB")
|
|
print(f"\nBy Type:")
|
|
for type_name, count, tokens in by_type:
|
|
print(f" {type_name}: {count} entries, {tokens or 0:,} tokens")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cli()
|