Closes J's 2026-05-02 multi-call observability gap: a single
/v1/iterate session with N retries used to surface in Langfuse as
N+1 disconnected traces (one per /v1/chat hop + one for the iterate
request itself), with no parent/child linkage. Operators couldn't
scroll the retry chain in one trace tree to spot where grounding
failed.
## Wire-level change
- New header constant `shared.TraceIDHeader = "X-Lakehouse-Trace-Id"`
- `langfuseMiddleware` honors the header on inbound requests: if
set, reuses that trace id instead of minting a new one. Stashes
the trace id on the request context so handlers can attach
application-level child spans.
- `validatord.chatCaller` forwards the header to chatd. Every chat
hop in an iterate session lands as a child of the parent trace.
## Application-level spans
- `validator.IterateConfig` gains `Tracer` (optional callback).
When wired, each iteration attempt emits one Langfuse span
via `validator.AttemptSpan`:
Name: iterate.attempt[N]
Input: { iteration, model, provider, prompt }
Output: { verdict, raw, error }
Level: WARNING when verdict != accepted
- `validatord.iterTracer` is the production hook — bridges
`validator.Tracer` → `langfuse.Client.Span`.
- `IterateRequest`/`IterateResponse`/`IterateFailure` gain
`TraceID`; each `IterateAttempt` gains `SpanID`. The /v1/iterate
caller can pivot from the JSON response straight into the
Langfuse trace tree.
## What an operator sees post-cutover
GET /v1/iterate {kind=fill, prompt=...} → Trace TR-1
├─ http.request span (from middleware)
├─ iterate.attempt[0] span (validator.Iterate emit)
│ input: prompt+model
│ output: { verdict: validation_failed, error: ..., raw }
├─ chatd /v1/chat call (X-Lakehouse-Trace-Id: TR-1)
│ ├─ http.request span (chatd middleware)
│ └─ chatd-internal spans (existing)
├─ iterate.attempt[1] span
└─ ...
All in one Langfuse trace tree, not N+1 separate traces.
## Hallucinated-worker safety net is unchanged
The /v1/iterate flow's hard correctness gate is still
FillValidator + WorkerLookup. Phantom candidate IDs raise
ValidationError::Consistency which 422s and forces the iteration
loop to retry. The trace-id propagation is the OBSERVABILITY layer
on top — it makes the existing safety net's outcomes visible per-call,
not a replacement for it.
## Verification
- internal/validator: 4 new tests
- TestIterate_TracerEmitsSpanPerAttempt — span/attempt count + SpanID
- TestIterate_NoTraceIDSkipsTracer — no orphan spans without trace_id
- TestIterate_ChatCallerReceivesTraceID — propagation contract
- (existing iterate tests updated for new ChatCaller signature)
- internal/shared: 1 new test
- TestLangfuseMiddleware_HonorsTraceIDHeader — cross-service linkage
- cmd/validatord: existing HTTP tests still PASS via the dual-shape
UnmarshalJSON contract.
- validatord_smoke.sh: 5/5 PASS through gateway :3110 (unchanged).
- Full go test ./... green across 33 packages.
## Architecture invariant added
STATE_OF_PLAY "DO NOT RELITIGATE" gains a paragraph documenting
the X-Lakehouse-Trace-Id header contract + the iterate.attempt[N]
span emission. Future-Claude won't re-propose "wire trace-id
propagation" — the header IS the wiring.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
191 lines
6.0 KiB
Go
191 lines
6.0 KiB
Go
package shared
|
|
|
|
import (
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"sync"
|
|
"testing"
|
|
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/langfuse"
|
|
)
|
|
|
|
// TestLangfuseMiddleware_NilClientPassthrough locks the
|
|
// "no client → no-op" contract. Every daemon calls shared.Run;
|
|
// operators who don't set LANGFUSE_URL must not see middleware
|
|
// failures, latency, or behavior change of any kind.
|
|
func TestLangfuseMiddleware_NilClientPassthrough(t *testing.T) {
|
|
mw := langfuseMiddleware("test-service", nil)
|
|
called := false
|
|
h := mw(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
called = true
|
|
w.WriteHeader(http.StatusTeapot) // distinctive code
|
|
}))
|
|
srv := httptest.NewServer(h)
|
|
defer srv.Close()
|
|
resp, err := http.Get(srv.URL + "/anything")
|
|
if err != nil {
|
|
t.Fatalf("GET: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusTeapot {
|
|
t.Errorf("expected 418 (handler ran), got %d", resp.StatusCode)
|
|
}
|
|
if !called {
|
|
t.Error("handler should have run via passthrough")
|
|
}
|
|
}
|
|
|
|
// TestLangfuseMiddleware_HealthBypassed locks the /health-exempt
|
|
// rule (per langfuseMiddleware's doc comment): LB probes must not
|
|
// emit traces or the trace volume drowns out real signal.
|
|
func TestLangfuseMiddleware_HealthBypassed(t *testing.T) {
|
|
var (
|
|
mu sync.Mutex
|
|
captured []string // ingestion endpoint payloads
|
|
)
|
|
// Mock Langfuse ingestion endpoint that records every batch.
|
|
lfMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
body, _ := io.ReadAll(r.Body)
|
|
mu.Lock()
|
|
captured = append(captured, string(body))
|
|
mu.Unlock()
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
defer lfMock.Close()
|
|
|
|
lf := langfuse.New(lfMock.URL, "test-pk", "test-sk", nil)
|
|
mw := langfuseMiddleware("test-service", lf)
|
|
h := mw(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
srv := httptest.NewServer(h)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Get(srv.URL + "/health")
|
|
if err != nil {
|
|
t.Fatalf("GET /health: %v", err)
|
|
}
|
|
resp.Body.Close()
|
|
|
|
_ = lf.Close() // force flush
|
|
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
if len(captured) > 0 {
|
|
t.Errorf("expected zero ingestion calls for /health, got %d (%v)", len(captured), captured)
|
|
}
|
|
}
|
|
|
|
// TestLangfuseMiddleware_RealRequestEmitted locks the happy path:
|
|
// a real request through an authed route produces ingestion events
|
|
// (trace + span). We don't decode the payload here — the
|
|
// internal/langfuse client tests already verify the wire format.
|
|
// What this test asserts is the wiring: middleware → client → POST.
|
|
func TestLangfuseMiddleware_RealRequestEmitted(t *testing.T) {
|
|
var (
|
|
mu sync.Mutex
|
|
captured int
|
|
)
|
|
lfMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = io.ReadAll(r.Body)
|
|
mu.Lock()
|
|
captured++
|
|
mu.Unlock()
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
defer lfMock.Close()
|
|
|
|
lf := langfuse.New(lfMock.URL, "test-pk", "test-sk", nil)
|
|
mw := langfuseMiddleware("test-service", lf)
|
|
h := mw(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
srv := httptest.NewServer(h)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Get(srv.URL + "/api/data")
|
|
if err != nil {
|
|
t.Fatalf("GET /api/data: %v", err)
|
|
}
|
|
resp.Body.Close()
|
|
|
|
_ = lf.Close() // force flush — sends the queued trace + span
|
|
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
if captured == 0 {
|
|
t.Error("expected at least one ingestion call after real request")
|
|
}
|
|
}
|
|
|
|
// TestLangfuseMiddleware_HonorsTraceIDHeader locks the cross-service
|
|
// trace-linkage contract: when validatord forwards a /v1/iterate's
|
|
// trace id to chatd via X-Lakehouse-Trace-Id, chatd's middleware
|
|
// MUST reuse that id rather than minting a new one. Without this,
|
|
// every chat hop spawned its own orphan trace and an iterate session
|
|
// with N retries showed in Langfuse as N+1 disconnected traces.
|
|
//
|
|
// We verify by inspecting the trace id stashed on the request
|
|
// context — handlers downstream pull it via TraceIDFromCtx and
|
|
// attach finer-grained spans to it.
|
|
func TestLangfuseMiddleware_HonorsTraceIDHeader(t *testing.T) {
|
|
// nil client path is fine — TraceIDFromCtx still propagates the
|
|
// header value because the middleware sets the ctx value before
|
|
// minting any new id. But that's only true when the client is
|
|
// non-nil (the nil-client branch is a passthrough that never sets
|
|
// the ctx value). So we wire a real (mocked) Langfuse to exercise
|
|
// the header-honoring branch.
|
|
lfMock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = io.ReadAll(r.Body)
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
defer lfMock.Close()
|
|
lf := langfuse.New(lfMock.URL, "test-pk", "test-sk", nil)
|
|
|
|
mw := langfuseMiddleware("test-service", lf)
|
|
var observed string
|
|
h := mw(http.HandlerFunc(func(_ http.ResponseWriter, r *http.Request) {
|
|
observed = TraceIDFromCtx(r.Context())
|
|
}))
|
|
srv := httptest.NewServer(h)
|
|
defer srv.Close()
|
|
|
|
req, _ := http.NewRequest("POST", srv.URL+"/api/echo", nil)
|
|
req.Header.Set(TraceIDHeader, "trace-from-parent")
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
t.Fatalf("POST: %v", err)
|
|
}
|
|
resp.Body.Close()
|
|
if observed != "trace-from-parent" {
|
|
t.Errorf("expected forwarded trace id, got %q", observed)
|
|
}
|
|
}
|
|
|
|
// TestLangfuseMiddleware_StatusCaptured locks the status-writer
|
|
// wrapping: when the handler returns 500, the middleware must see
|
|
// 500 in the span output (otherwise error traces all show 200 and
|
|
// debugging gets harder).
|
|
func TestLangfuseMiddleware_StatusCaptured(t *testing.T) {
|
|
mw := langfuseMiddleware("test-service", nil) // nil client; just exercise wrapping
|
|
called := false
|
|
h := mw(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
called = true
|
|
http.Error(w, "boom", http.StatusInternalServerError)
|
|
}))
|
|
srv := httptest.NewServer(h)
|
|
defer srv.Close()
|
|
resp, err := http.Get(srv.URL + "/api/fail")
|
|
if err != nil {
|
|
t.Fatalf("GET: %v", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusInternalServerError {
|
|
t.Errorf("expected 500, got %d", resp.StatusCode)
|
|
}
|
|
if !called {
|
|
t.Error("handler should have run")
|
|
}
|
|
}
|