#!/usr/bin/env python3 """ Structured Logging Module ========================= Provides JSON-structured logging with trace correlation. Usage: from observability.logging import get_logger logger = get_logger("my_module") logger.info("Operation completed", user_id="123", duration_ms=45.2) """ import json import logging import sys import sqlite3 from datetime import datetime, timezone from pathlib import Path from typing import Optional, Any, Dict from dataclasses import dataclass import threading from .tracing import get_current_trace_id, get_current_span # ============================================================================= # Configuration # ============================================================================= DB_PATH = Path("/opt/agent-governance/ledger/governance.db") LOG_LEVEL = logging.INFO LOG_TO_DB = True LOG_RETENTION_DAYS = 7 # ============================================================================= # Structured Log Entry # ============================================================================= @dataclass class LogEntry: """Structured log entry""" timestamp: str level: str logger: str message: str trace_id: Optional[str] = None span_id: Optional[str] = None tenant_id: str = "default" attributes: Dict[str, Any] = None def to_dict(self) -> dict: result = { "timestamp": self.timestamp, "level": self.level, "logger": self.logger, "message": self.message } if self.trace_id: result["trace_id"] = self.trace_id if self.span_id: result["span_id"] = self.span_id if self.tenant_id: result["tenant_id"] = self.tenant_id if self.attributes: result.update(self.attributes) return result def to_json(self) -> str: return json.dumps(self.to_dict()) # ============================================================================= # Log Storage # ============================================================================= class LogStorage: """Stores logs in SQLite for querying""" _instance = None _lock = threading.Lock() def __new__(cls): if cls._instance is None: with cls._lock: if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._initialized = False return cls._instance def __init__(self): if self._initialized: return self._ensure_table() self._initialized = True def _ensure_table(self): """Create logs table if not exists""" try: conn = sqlite3.connect(DB_PATH) conn.execute(""" CREATE TABLE IF NOT EXISTS logs ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT NOT NULL, level TEXT NOT NULL, logger TEXT NOT NULL, message TEXT NOT NULL, trace_id TEXT, span_id TEXT, tenant_id TEXT DEFAULT 'default', attributes TEXT, -- JSON created_at TEXT DEFAULT CURRENT_TIMESTAMP ) """) conn.execute("CREATE INDEX IF NOT EXISTS idx_logs_timestamp ON logs(timestamp)") conn.execute("CREATE INDEX IF NOT EXISTS idx_logs_level ON logs(level)") conn.execute("CREATE INDEX IF NOT EXISTS idx_logs_trace ON logs(trace_id)") conn.execute("CREATE INDEX IF NOT EXISTS idx_logs_tenant ON logs(tenant_id)") conn.execute("CREATE INDEX IF NOT EXISTS idx_logs_logger ON logs(logger)") conn.commit() conn.close() except Exception as e: print(f"Error creating logs table: {e}", file=sys.stderr) def save(self, entry: LogEntry): """Save log entry to database""" if not LOG_TO_DB: return try: conn = sqlite3.connect(DB_PATH) conn.execute(""" INSERT INTO logs (timestamp, level, logger, message, trace_id, span_id, tenant_id, attributes) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, ( entry.timestamp, entry.level, entry.logger, entry.message, entry.trace_id, entry.span_id, entry.tenant_id, json.dumps(entry.attributes) if entry.attributes else None )) conn.commit() conn.close() except Exception as e: print(f"Error saving log: {e}", file=sys.stderr) def search( self, level: str = None, logger: str = None, trace_id: str = None, tenant_id: str = None, message_contains: str = None, start_time: str = None, end_time: str = None, limit: int = 100 ) -> list: """Search logs""" try: conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row query = "SELECT * FROM logs WHERE 1=1" params = [] if level: query += " AND level = ?" params.append(level) if logger: query += " AND logger = ?" params.append(logger) if trace_id: query += " AND trace_id = ?" params.append(trace_id) if tenant_id: query += " AND tenant_id = ?" params.append(tenant_id) if message_contains: query += " AND message LIKE ?" params.append(f"%{message_contains}%") if start_time: query += " AND timestamp >= ?" params.append(start_time) if end_time: query += " AND timestamp <= ?" params.append(end_time) query += " ORDER BY timestamp DESC LIMIT ?" params.append(limit) cursor = conn.execute(query, params) results = [] for row in cursor.fetchall(): entry = dict(row) entry["attributes"] = json.loads(entry["attributes"]) if entry.get("attributes") else {} results.append(entry) conn.close() return results except Exception as e: print(f"Error searching logs: {e}", file=sys.stderr) return [] def cleanup(self, days: int = LOG_RETENTION_DAYS): """Remove old logs""" try: conn = sqlite3.connect(DB_PATH) conn.execute(f""" DELETE FROM logs WHERE timestamp < datetime('now', '-{days} days') """) conn.commit() deleted = conn.total_changes conn.close() return deleted except Exception as e: print(f"Error cleaning up logs: {e}", file=sys.stderr) return 0 # ============================================================================= # JSON Formatter # ============================================================================= class JSONFormatter(logging.Formatter): """Format log records as JSON""" def format(self, record: logging.LogRecord) -> str: # Get trace context trace_id = get_current_trace_id() span = get_current_span() span_id = span.span_id if span else None # Build entry entry = LogEntry( timestamp=datetime.now(timezone.utc).isoformat(), level=record.levelname, logger=record.name, message=record.getMessage(), trace_id=trace_id, span_id=span_id, tenant_id=getattr(record, 'tenant_id', 'default'), attributes=getattr(record, 'extra_attrs', None) ) # Store in DB storage = LogStorage() storage.save(entry) return entry.to_json() # ============================================================================= # Custom Logger # ============================================================================= class StructuredLogger: """Logger with structured output and trace correlation""" def __init__(self, name: str, tenant_id: str = "default"): self.name = name self.tenant_id = tenant_id self._logger = logging.getLogger(name) self._logger.setLevel(LOG_LEVEL) # Add JSON handler if not already present if not any(isinstance(h, logging.StreamHandler) and isinstance(h.formatter, JSONFormatter) for h in self._logger.handlers): handler = logging.StreamHandler(sys.stdout) handler.setFormatter(JSONFormatter()) self._logger.addHandler(handler) def _log(self, level: int, message: str, **kwargs): """Internal log method""" extra = { 'tenant_id': kwargs.pop('tenant_id', self.tenant_id), 'extra_attrs': kwargs if kwargs else None } self._logger.log(level, message, extra=extra) def debug(self, message: str, **kwargs): """Log debug message""" self._log(logging.DEBUG, message, **kwargs) def info(self, message: str, **kwargs): """Log info message""" self._log(logging.INFO, message, **kwargs) def warning(self, message: str, **kwargs): """Log warning message""" self._log(logging.WARNING, message, **kwargs) def error(self, message: str, **kwargs): """Log error message""" self._log(logging.ERROR, message, **kwargs) def critical(self, message: str, **kwargs): """Log critical message""" self._log(logging.CRITICAL, message, **kwargs) def exception(self, message: str, **kwargs): """Log exception with traceback""" import traceback kwargs['traceback'] = traceback.format_exc() self._log(logging.ERROR, message, **kwargs) # ============================================================================= # Logger Factory # ============================================================================= _loggers: Dict[str, StructuredLogger] = {} def get_logger(name: str, tenant_id: str = "default") -> StructuredLogger: """Get or create a structured logger""" key = f"{name}:{tenant_id}" if key not in _loggers: _loggers[key] = StructuredLogger(name, tenant_id) return _loggers[key] # ============================================================================= # FastAPI Router # ============================================================================= from fastapi import APIRouter, Query, HTTPException router = APIRouter(prefix="/logs", tags=["Logging"]) @router.get("") async def search_logs( level: Optional[str] = None, logger: Optional[str] = None, trace_id: Optional[str] = None, message: Optional[str] = None, start_time: Optional[str] = None, end_time: Optional[str] = None, limit: int = Query(100, le=500) ): """Search logs""" storage = LogStorage() logs = storage.search( level=level, logger=logger, trace_id=trace_id, message_contains=message, start_time=start_time, end_time=end_time, limit=limit ) return {"logs": logs, "count": len(logs)} @router.get("/trace/{trace_id}") async def get_logs_by_trace(trace_id: str, limit: int = Query(100, le=500)): """Get all logs for a trace""" storage = LogStorage() logs = storage.search(trace_id=trace_id, limit=limit) return {"logs": logs, "count": len(logs), "trace_id": trace_id} @router.get("/stats") async def get_log_stats(): """Get log statistics""" try: conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row stats = {} # Count by level cursor = conn.execute(""" SELECT level, COUNT(*) as count FROM logs WHERE timestamp > datetime('now', '-1 hour') GROUP BY level """) stats["last_hour_by_level"] = {row["level"]: row["count"] for row in cursor.fetchall()} # Count by logger cursor = conn.execute(""" SELECT logger, COUNT(*) as count FROM logs WHERE timestamp > datetime('now', '-1 hour') GROUP BY logger ORDER BY count DESC LIMIT 10 """) stats["last_hour_by_logger"] = {row["logger"]: row["count"] for row in cursor.fetchall()} # Total count cursor = conn.execute("SELECT COUNT(*) as total FROM logs") stats["total_logs"] = cursor.fetchone()["total"] # Error count last 24h cursor = conn.execute(""" SELECT COUNT(*) as count FROM logs WHERE level IN ('ERROR', 'CRITICAL') AND timestamp > datetime('now', '-1 day') """) stats["errors_last_24h"] = cursor.fetchone()["count"] conn.close() return stats except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.post("/cleanup") async def cleanup_logs(days: int = Query(7, ge=1, le=365)): """Clean up old logs""" storage = LogStorage() deleted = storage.cleanup(days) return {"deleted": deleted, "retention_days": days}