root 6c93a38093 scrum multi_coord_phase3: 4 fixes from cross-lineage review
Cross-lineage scrum on bundle 87cbd10..f971e64 (3,652 lines)
produced 4 actionable findings, all defensive hardening.

1. (Opus WARN) internal/langfuse/client.go:queue
   Synchronous Flush at maxBatch threshold blocked the calling
   goroutine for the full 5s HTTP timeout when Langfuse hiccupped,
   defeating the "best-effort, never blocks calling path" contract
   in the package doc. Now fire-and-forget via goroutine.

2. (Opus + Kimi convergent) cmd/observerd/main.go:handleInbox
   - Free-form priority string was accepted; "nonsense" passed
     through unchecked. Now closed enum: urgent|high|medium|low (+
     empty defaults to medium). Tested: TestInbox_RejectsBadPriority.
   - No size cap on body, only emptiness check; multi-MB payloads
     would bloat observer's ring + JSONL. Now 8 KiB cap returns 413.
     Tested: TestInbox_RejectsOversizedBody.
   - Subject/sender/tag concatenated into InputSummary without
     newline stripping; embedded \n could corrupt JSONL line-based
     parsers. New sanitizeInboxField strips \r\n + caps at 256 chars
     before interpolation.

3. (Opus INFO) scripts/multi_coord_stress/main.go
   Removed dead `must[T]` generic — tracedSearch took over the
   fail-fast role for matrix searches, so the helper became unused.

4. (Opus INFO) scripts/multi_coord_stress/main.go:Event
   `JudgeRating int` collapsed "judge errored" and "judge said
   unrated" both to 0. Changed to *int — nil = errored, 1-5 =
   verdict. judgeInboxResult still returns 0 on error; caller
   gates on > 0 before assigning.

Dismissed (with rationale):
- Opus WARN ExcludeIDs ordering: verified by code read — filter
  applies after sort + before top-K truncation as documented;
  no slot waste possible.
- Opus INFO 10 prior-run reports contradict #011: those are
  point-in-time snapshots; intentional history.
- Kimi INFO Langfuse error suppression: design intent (best-effort
  per package doc).
- Kimi INFO contract schema validation: defer until contract count
  grows enough to make hand-edit drift a real risk.
- Kimi INFO paraphrase prompt duplicated across lift + multi_coord:
  defer (lift to internal/paraphrase/ when a third consumer appears).
- Qwen HOLD: single-line, no actionable finding.

go test ./cmd/observerd ./internal/langfuse all green; multi_coord
driver builds clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 17:42:07 -05:00

365 lines
12 KiB
Go

// observerd is the autonomous-iteration witness service. Port of
// the load-bearing pieces of mcp-server/observer.ts (Rust system).
//
// Routes (all under /observer):
// GET /observer/health — service liveness + ring size
// GET /observer/stats — aggregate counters + recent scenarios
// POST /observer/event — record one observed op
//
// Deferred to follow-up commits (see internal/observer doc):
// - POST /observer/review (cloud-LLM hand review fall-back)
// - background loops (analyzeErrors, consolidatePlaybooks,
// tailOverseerCorrections)
// - failure-cluster escalation to LLM Team
//
// /relevance was already ported to internal/matrix in 9588bd8 and is
// not duplicated here.
package main
import (
"encoding/json"
"errors"
"flag"
"fmt"
"log/slog"
"net/http"
"os"
"strings"
"time"
"github.com/go-chi/chi/v5"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/observer"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/shared"
"git.agentview.dev/profit/golangLAKEHOUSE/internal/workflow"
)
const maxRequestBytes = 4 << 20 // 4 MiB cap on request bodies
func main() {
configPath := flag.String("config", "lakehouse.toml", "path to TOML config")
flag.Parse()
cfg, err := shared.LoadConfig(*configPath)
if err != nil {
slog.Error("config", "err", err)
os.Exit(1)
}
// Persistence is optional — empty path = ephemeral (matches the
// pathwayd pattern). Production sets a stable path under
// /var/lib/lakehouse/observer/ops.jsonl.
var persistor *observer.Persistor
if cfg.Observerd.PersistPath != "" {
persistor, err = observer.NewPersistor(cfg.Observerd.PersistPath)
if err != nil {
slog.Error("observer persistor", "err", err)
os.Exit(1)
}
}
store := observer.NewStore(persistor)
if persistor != nil {
n, err := store.Load()
if err != nil {
slog.Warn("observer load", "err", err, "loaded", n)
} else {
slog.Info("observer loaded", "ops", n, "path", cfg.Observerd.PersistPath)
}
}
runner := workflow.NewRunner()
// matrixd URL: prefer explicit observerd config field, fall back
// to gateway's matrixd_url so a single-toml deploy works without
// duplicating the address.
matrixdURL := cfg.Gateway.MatrixdURL
registerBuiltinModes(runner, matrixdURL, cfg.Models.WeakModels)
h := &handlers{store: store, runner: runner}
if err := shared.Run("observerd", cfg.Observerd.Bind, h.register, cfg.Auth); err != nil {
slog.Error("server", "err", err)
os.Exit(1)
}
}
type handlers struct {
store *observer.Store
runner *workflow.Runner
}
func (h *handlers) register(r chi.Router) {
r.Get("/observer/stats", h.handleStats)
r.Post("/observer/event", h.handleEvent)
r.Post("/observer/workflow/run", h.handleWorkflowRun)
r.Get("/observer/workflow/modes", h.handleWorkflowModes)
r.Post("/observer/inbox", h.handleInbox)
}
// inboxMessage is the POST /observer/inbox body — an incoming
// real-world signal (email or SMS) that a coordinator would receive
// and act on. The handler only RECORDS it as an ObservedOp; whether
// to trigger a downstream matrix.search or workflow is the caller's
// concern. Keeps observer's witness role pure.
type inboxMessage struct {
Type string `json:"type"` // "email" | "sms"
Sender string `json:"sender"`
Subject string `json:"subject,omitempty"`
Body string `json:"body"`
Priority string `json:"priority"` // "urgent" | "high" | "medium" | "low"
Tag string `json:"tag,omitempty"`
}
// validInboxPriorities is the closed set of priority values. Per
// scrum (Kimi WARN): a free-form priority string was accepted, so
// downstream consumers parsing on exact enum could see "nonsense"
// values. Now rejected at the boundary.
var validInboxPriorities = map[string]bool{
"": true, // empty defaults to medium below
"urgent": true,
"high": true,
"medium": true,
"low": true,
}
// inboxMaxBodyChars caps the inbox body at a sane size so a large
// payload doesn't bloat observer's ring buffer or JSONL log. Per
// scrum (Opus WARN): only emptiness was checked, so a multi-MB body
// would be accepted unconditionally.
const inboxMaxBodyChars = 8 * 1024
// inboxMaxFieldChars caps subject/sender/tag at a much tighter size.
// These appear inside the InputSummary string + JSONL log entry, so
// long values bloat every record and embedded newlines corrupt
// downstream line-based parsers.
const inboxMaxFieldChars = 256
func sanitizeInboxField(s string) string {
// Strip newlines so multi-line attacker-controlled strings don't
// corrupt the JSONL log (one-line-per-op invariant).
s = strings.ReplaceAll(s, "\n", " ")
s = strings.ReplaceAll(s, "\r", " ")
if len(s) > inboxMaxFieldChars {
s = s[:inboxMaxFieldChars]
}
return s
}
func (h *handlers) handleInbox(w http.ResponseWriter, r *http.Request) {
var msg inboxMessage
if !decodeJSON(w, r, &msg) {
return
}
if msg.Type != "email" && msg.Type != "sms" {
http.Error(w, "type must be 'email' or 'sms'", http.StatusBadRequest)
return
}
if strings.TrimSpace(msg.Body) == "" {
http.Error(w, "body required", http.StatusBadRequest)
return
}
if len(msg.Body) > inboxMaxBodyChars {
http.Error(w, fmt.Sprintf("body exceeds %d chars", inboxMaxBodyChars), http.StatusRequestEntityTooLarge)
return
}
if !validInboxPriorities[msg.Priority] {
http.Error(w, "priority must be urgent|high|medium|low", http.StatusBadRequest)
return
}
if msg.Priority == "" {
msg.Priority = "medium"
}
// Sanitize the fields that go into the InputSummary string so
// newlines don't corrupt the JSONL log.
sender := sanitizeInboxField(msg.Sender)
subject := sanitizeInboxField(msg.Subject)
tag := sanitizeInboxField(msg.Tag)
op := observer.ObservedOp{
Endpoint: "/observer/inbox/" + msg.Type,
InputSummary: fmt.Sprintf("from=%s priority=%s tag=%s subject=%s", sender, msg.Priority, tag, subject),
OutputSummary: msg.Body,
Source: observer.SourceInbox,
Success: true,
}
if err := h.store.Record(op); err != nil {
if errors.Is(err, observer.ErrInvalidOp) {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
slog.Error("observer record inbox", "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
return
}
stats := h.store.Stats()
writeJSON(w, http.StatusOK, map[string]any{
"accepted": true,
"type": msg.Type,
"priority": msg.Priority,
"ring_size": stats.Total,
})
}
func (h *handlers) handleStats(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, h.store.Stats())
}
func (h *handlers) handleEvent(w http.ResponseWriter, r *http.Request) {
var op observer.ObservedOp
if !decodeJSON(w, r, &op) {
return
}
if err := h.store.Record(op); err != nil {
if errors.Is(err, observer.ErrInvalidOp) {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
slog.Error("observer record", "err", err)
http.Error(w, "internal", http.StatusInternalServerError)
return
}
stats := h.store.Stats()
writeJSON(w, http.StatusOK, map[string]any{
"accepted": true,
"ring_size": stats.Total,
})
}
// workflowRunRequest is the POST /observer/workflow/run body — a
// Workflow definition in JSON form (matches Archon's YAML shape but
// JSON-serialized for the HTTP path).
type workflowRunRequest struct {
Workflow workflow.Workflow `json:"workflow"`
}
func (h *handlers) handleWorkflowRun(r http.ResponseWriter, req *http.Request) {
var body workflowRunRequest
if !decodeJSON(r, req, &body) {
return
}
res, err := h.runner.Run(req.Context(), body.Workflow)
// Record per-node provenance into the observer ring AS the
// workflow runs — same shape as any other ObservedOp so the
// existing /observer/stats aggregation surfaces workflow ops
// alongside scenario ops without a schema change.
for _, n := range res.Nodes {
op := observer.ObservedOp{
Endpoint: "/observer/workflow/run/" + body.Workflow.Name + "/" + n.NodeID,
InputSummary: fmt.Sprintf("workflow=%s node=%s mode=%s", body.Workflow.Name, n.NodeID, n.Mode),
Success: n.Error == "",
DurationMs: n.DurationMs,
OutputSummary: summarizeOutput(n.Output),
Source: observer.SourceWorkflow,
Error: n.Error,
Timestamp: n.StartedAt.UTC().Format(time.RFC3339Nano),
}
if recErr := h.store.Record(op); recErr != nil {
slog.Warn("workflow run: provenance record failed", "err", recErr)
}
}
if err != nil {
// Aborting errors (cycle, missing dep, unknown mode) — surface
// as 4xx because the workflow definition itself is wrong.
slog.Warn("workflow run aborted", "err", err)
writeJSON(r, http.StatusBadRequest, map[string]any{
"error": err.Error(),
"result": res,
})
return
}
writeJSON(r, http.StatusOK, res)
}
func (h *handlers) handleWorkflowModes(w http.ResponseWriter, _ *http.Request) {
modes := h.runner.Modes()
writeJSON(w, http.StatusOK, map[string]any{
"modes": modes,
"count": len(modes),
})
}
// summarizeOutput renders a workflow node's output map for the
// ObservedOp's OutputSummary string. Best-effort — long values get
// truncated rather than ballooning the ring buffer's memory.
func summarizeOutput(output map[string]any) string {
if output == nil {
return "(nil)"
}
bs, err := json.Marshal(output)
if err != nil {
return fmt.Sprintf("(marshal err: %v)", err)
}
if len(bs) > 256 {
return string(bs[:256]) + "...(truncated)"
}
return string(bs)
}
// registerBuiltinModes wires the modes the runner knows about. The
// pure-function wrappers (matrix.relevance, matrix.downgrade,
// distillation.score, drift.scorer) are direct Go calls. matrix.search
// is HTTP-backed, pointed at the configured matrixd_url so workflows
// can compose retrieval into multi-pass measurement chains.
//
// Fixture modes (fixture.echo, fixture.upper) stay registered for
// the workflow_smoke that proves the runner mechanics independently
// of the real modes' availability.
//
// Real-mode follow-ups still pending:
// - playbook.record (HTTP to matrixd)
// - playbook.lookup (HTTP to matrixd)
// - llm.chat (HTTP to gateway /v1/chat)
func registerBuiltinModes(r *workflow.Runner, matrixdURL string, weakModels []string) {
// Fixture modes for runner mechanics smokes.
r.RegisterMode("fixture.echo", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
out := make(map[string]any, len(input))
for k, v := range input {
out[k] = v
}
return out, nil
})
r.RegisterMode("fixture.upper", func(_ workflow.Context, input map[string]any) (map[string]any, error) {
prompt, _ := input["prompt"].(string)
return map[string]any{"upper": strings.ToUpper(prompt)}, nil
})
// Real modes — pure-function wrappers (no I/O).
r.RegisterMode("matrix.relevance", workflow.MatrixRelevance)
// matrix.downgrade reads weakModels from config — Phase 2.
// nil/empty falls back to matrix.DefaultWeakModels per the
// MatrixDowngradeWithWeakList factory contract.
r.RegisterMode("matrix.downgrade", workflow.MatrixDowngradeWithWeakList(weakModels))
r.RegisterMode("distillation.score", workflow.DistillationScore)
r.RegisterMode("drift.scorer", workflow.DriftScorer)
// HTTP-backed modes — only register when their backend URL is set.
// matrixd_url defaults to a known address but tests/dev may run
// without matrixd.
if matrixdURL != "" {
hc := &http.Client{Timeout: 30 * time.Second}
r.RegisterMode("matrix.search", workflow.MatrixSearch(matrixdURL, hc))
}
}
func decodeJSON(w http.ResponseWriter, r *http.Request, v any) bool {
defer r.Body.Close()
r.Body = http.MaxBytesReader(w, r.Body, maxRequestBytes)
if err := json.NewDecoder(r.Body).Decode(v); err != nil {
var maxErr *http.MaxBytesError
if errors.As(err, &maxErr) || strings.Contains(err.Error(), "http: request body too large") {
http.Error(w, "body too large", http.StatusRequestEntityTooLarge)
return false
}
http.Error(w, "decode body: "+err.Error(), http.StatusBadRequest)
return false
}
return true
}
func writeJSON(w http.ResponseWriter, code int, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
if err := json.NewEncoder(w).Encode(v); err != nil {
slog.Warn("observer write json", "err", err)
}
}