root bc9ab93afe H: observerd — autonomous-iteration witness loop (SPEC §2 port)
Port of the load-bearing pieces of mcp-server/observer.ts (Rust
system, 852 lines TS) per SPEC §2's named target. Implements PRD
loop 3 ("Observer loop — watches each run, refines configs").

Routes (all under /v1/observer/* via gateway):
  GET  /observer/health   — liveness
  GET  /observer/stats    — total / successes / failures /
                             by_source / recent_scenario_ops
                             (matches Rust JSON shape exactly)
  POST /observer/event    — record one ObservedOp; auto-defaults
                             timestamp + source, validates required
                             fields (endpoint), persists to JSONL,
                             appends to ring buffer

Architecture:
  - internal/observer/types.go — ObservedOp model + Source taxonomy
    (mcp / scenario / langfuse / overseer_correction). Mirrors the
    Rust shape so JSON round-trips during cutover.
  - internal/observer/store.go — Store + Persistor. Ring buffer cap
    matches Rust's 2000; recent_scenarios cap matches Rust's 10.
    Same persist-then-apply order as pathwayd; same corruption-
    tolerant replay (skip malformed lines + warn).
  - cmd/observerd — :3219 HTTP service, fronted by gateway as
    /v1/observer/*.
  - lakehouse.toml + DefaultConfig — [observerd] block matches the
    pathwayd pattern (Bind + PersistPath; empty path = ephemeral).

Tests + smoke (all PASS):
  - 7 unit tests in store_test.go: validation, default fields,
    stats aggregation, recent-scenarios cap + ordering, ring-buffer
    rollover at cap, JSONL round-trip persistence, corruption-
    tolerant replay (1 valid + 1 corrupt + 1 valid → 2 applied)
  - scripts/observer_smoke.sh: 4 assertions through gateway —
    record 5 events (3 ok / 2 fail across 2 sources), stats
    aggregates correctly, empty-endpoint→400, kill+restart preserves
    via JSONL replay (5 ops, 3 ok, 2 err survive)

Deferred (named in package + cmd doc, not in this commit):
  - POST /observer/review (cloud-LLM hand-review fall-back). The
    heuristic-only path could land cheaply but the productized
    cloud path (qwen3-coder fall-back) is multi-day port.
  - Background loops: analyzeErrors, consolidatePlaybooks,
    tailOverseerCorrections (read overseer_corrections.jsonl into
    the ring buffer once per cycle).
  - escalateFailureClusterToLLMTeam (failure clustering trigger
    that posts to LLM Team's /api/run with code_review mode).

/relevance is NOT duplicated — already ported in 9588bd8 to
internal/matrix/relevance.go (component 3 of SPEC §3.4).

16-smoke regression all green (D1-D6, G1, G1P, G2, storaged_cap,
pathway, matrix, relevance, downgrade, playbook, observer).
13 binaries now: gateway, storaged, catalogd, ingestd, queryd,
vectord, embedd, pathwayd, matrixd, observerd, mcpd, fake_ollama
(plus catalogd-only test build).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 20:18:02 -05:00

132 lines
3.6 KiB
Go

// observerd is the autonomous-iteration witness service. Port of
// the load-bearing pieces of mcp-server/observer.ts (Rust system).
//
// Routes (all under /observer):
// GET /observer/health — service liveness + ring size
// GET /observer/stats — aggregate counters + recent scenarios
// POST /observer/event — record one observed op
//
// Deferred to follow-up commits (see internal/observer doc):
// - POST /observer/review (cloud-LLM hand review fall-back)
// - background loops (analyzeErrors, consolidatePlaybooks,
// tailOverseerCorrections)
// - failure-cluster escalation to LLM Team
//
// /relevance was already ported to internal/matrix in 9588bd8 and is
// not duplicated here.
package main
import (
"encoding/json"
"errors"
"flag"
"log/slog"
"net/http"
"os"
"strings"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/observer"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
)
const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
func main() {
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
flag.Parse()
cfg, err := shared.LoadConfig(*configPath)
if err != nil {
slog.Error("config", "err", err)
os.Exit(1)
}
// Persistence is optional — empty path = ephemeral (matches the
// pathwayd pattern). Production sets a stable path under
// /var/lib/lakehouse/observer/ops.jsonl.
var persistor *observer.Persistor
if cfg.Observerd.PersistPath != "" {
persistor, err = observer.NewPersistor(cfg.Observerd.PersistPath)
if err != nil {
slog.Error("observer persistor", "err", err)
os.Exit(1)
}
}
store := observer.NewStore(persistor)
if persistor != nil {
n, err := store.Load()
if err != nil {
slog.Warn("observer load", "err", err, "loaded", n)
} else {
slog.Info("observer loaded", "ops", n, "path", cfg.Observerd.PersistPath)
}
}
h := &handlers{store: store}
if err := shared.Run("observerd", cfg.Observerd.Bind, h.register, cfg.Auth); err != nil {
slog.Error("server", "err", err)
os.Exit(1)
}
}
type handlers struct {
store *observer.Store
}
func (h *handlers) register(r chi.Router) {
r.Get("/observer/stats", h.handleStats)
r.Post("/observer/event", h.handleEvent)
}
func (h *handlers) handleStats(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, h.store.Stats())
}
func (h *handlers) handleEvent(w http.ResponseWriter, r *http.Request) {
var op observer.ObservedOp
if !decodeJSON(w, r, &op) {
return
}
if err := h.store.Record(op); err != nil {
if errors.Is(err, observer.ErrInvalidOp) {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
slog.Error("observer record", "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
return
}
stats := h.store.Stats()
writeJSON(w, http.StatusOK, map[string]any{
"accepted": true,
"ring_size": stats.Total,
})
}
func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
defer r.Body.Close()
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
if err := json.NewDecoder(r.Body).Decode(v); err != nil {
var maxErr *http.MaxBytesError
if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
return false
}
http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
return false
}
return true
}
func writeJSON(w http.ResponseWriter, code int, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
if err := json.NewEncoder(w).Encode(v); err != nil {
slog.Warn("observer write json", "err", err)
}
}