root 89ca72d471 materializer + replay ports + vectord substrate fix verified at scale
Two threads landing together — the doc edits interleave so they ship
in a single commit.

1. **vectord substrate fix verified at original scale** (closes the
   2026-05-01 thread). Re-ran multitier 5min @ conc=50: 132,211
   scenarios at 438/sec, 6/6 classes at 0% failure (was 4/6 pre-fix).
   Throughput dropped 1,115 → 438/sec because previously-broken
   scenarios now do real HNSW Add work — honest cost of correctness.
   The fix (i.vectors side-store + safeGraphAdd recover wrappers +
   smallIndexRebuildThreshold=32 + saveTask coalescing) holds at the
   footprint that originally surfaced the bug.

2. **Materializer port** — internal/materializer + cmd/materializer +
   scripts/materializer_smoke.sh. Ports scripts/distillation/transforms.ts
   (12 transforms) + build_evidence_index.ts (idempotency, day-partition,
   receipt). On-wire JSON shape matches TS so Bun and Go runs are
   interchangeable. 14 tests green.

3. **Replay port** — internal/replay + cmd/replay +
   scripts/replay_smoke.sh. Ports scripts/distillation/replay.ts
   (retrieve → bundle → /v1/chat → validate → log). Closes audit-FULL
   phase 7 live invocation on the Go side. Both runtimes append to the
   same data/_kb/replay_runs.jsonl (schema=replay_run.v1). 14 tests green.

Side effect on internal/distillation/types.go: EvidenceRecord gained
prompt_tokens, completion_tokens, and metadata fields to mirror the TS
shape the materializer transforms produce.

STATE_OF_PLAY refreshed to 2026-05-02; ARCHITECTURE_COMPARISON decisions
tracker moves the materializer + replay items from _open_ to DONE and
adds the substrate-fix scale verification row.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 03:31:02 -05:00

194 lines
5.3 KiB
Go

package replay
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
)
// DefaultRoot is what the CLI uses when --root isn't passed.
func DefaultRoot() string {
if r := os.Getenv("LH_DISTILL_ROOT"); r != "" {
return r
}
if cwd, err := os.Getwd(); err == nil {
return cwd
}
return "/home/profit/lakehouse"
}
// Replay runs the retrieve→prompt→model→validate→log pipeline.
// Returns a ReplayResult that's already been appended to
// data/_kb/replay_runs.jsonl unless DryRun + the file is read-only.
//
// Errors here are *infrastructure* failures (corpus unreadable, log
// write failed). A failed model call OR a failed validation gate is
// captured in ReplayResult.ValidationResult, not returned as error —
// callers can branch on Passed / EscalationPath.
func Replay(ctx context.Context, opts ReplayRequest, root string) (ReplayResult, error) {
t0 := time.Now()
recordedAt := time.Now().UTC().Format(time.RFC3339Nano)
taskHash := sha256Hex(opts.Task)
corpus, err := LoadRagCorpus(root)
if err != nil {
return ReplayResult{}, fmt.Errorf("load rag corpus: %w", err)
}
var bundle *ContextBundle
if !opts.NoRetrieval {
bundle = BuildContextBundle(corpus, opts.Task)
}
prompt := BuildPrompt(opts.Task, bundle)
localModel := orDefault(opts.LocalModel, DefaultLocalModel)
escalationModel := orDefault(opts.EscalationModel, DefaultEscalationModel)
gatewayURL := orDefault(opts.GatewayURL, gatewayFromEnv())
caller := httpModelCaller(gatewayURL)
if opts.DryRun {
caller = dryRunCaller(opts.Task, bundle)
}
escalation := []string{localModel}
modelUsed := localModel
var modelResponse string
var validation ValidationResult
localCall := caller(ctx, localModel, prompt.System, prompt.User)
if localCall.OK {
modelResponse = localCall.Content
validation = ValidateResponse(modelResponse, bundle)
} else {
validation = ValidationResult{
Passed: false,
Reasons: []string{"local call failed: " + localCall.Error},
}
}
if !validation.Passed && opts.AllowEscalation && !opts.LocalOnly {
escalation = append(escalation, escalationModel)
escalCall := caller(ctx, escalationModel, prompt.System, prompt.User)
if escalCall.OK {
modelResponse = escalCall.Content
modelUsed = escalationModel
validation = ValidateResponse(modelResponse, bundle)
if validation.Passed {
validation.Reasons = append([]string{"recovered via escalation to " + escalationModel}, validation.Reasons...)
}
} else {
validation.Reasons = append(validation.Reasons, "escalation also failed: "+escalCall.Error)
}
}
recordedRunID := fmt.Sprintf("replay:%s:%s",
taskHash[:16],
sha256Hex(recordedAt)[:12],
)
result := ReplayResult{
InputTask: opts.Task,
TaskHash: taskHash,
RetrievedArtifacts: RetrievedIDs{RagIDs: ragIDs(bundle)},
ContextBundle: bundle,
ModelResponse: modelResponse,
ModelUsed: modelUsed,
EscalationPath: escalation,
ValidationResult: validation,
RecordedRunID: recordedRunID,
RecordedAt: recordedAt,
DurationMs: time.Since(t0).Milliseconds(),
}
if err := logReplayEvidence(root, result); err != nil {
// Logging failure is real — surface it. The caller still gets the
// in-memory result so they can inspect what happened.
return result, fmt.Errorf("log replay evidence: %w", err)
}
return result, nil
}
// dryRunCaller wraps dryRunSynthesize as a ModelCaller. The escalation
// branch in Replay calls the caller a second time; for parity with TS,
// we return the same content suffixed with [ESCALATED] so a smoke can
// detect escalation in dry-run mode.
func dryRunCaller(task string, bundle *ContextBundle) ModelCaller {
calls := 0
return func(_ context.Context, _ string, _ string, _ string) callModelResult {
calls++
content := dryRunSynthesize(task, bundle)
if calls >= 2 {
content += "\n\n[ESCALATED]"
}
return callModelResult{Content: content, OK: true}
}
}
// logReplayEvidence appends one row to data/_kb/replay_runs.jsonl.
// model_response is truncated to 4000 chars in the persisted log to
// keep the file lean (matches TS behavior).
func logReplayEvidence(root string, result ReplayResult) error {
path := filepath.Join(root, "data", "_kb", "replay_runs.jsonl")
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return err
}
persist := struct {
Schema string `json:"schema"`
ReplayResult
}{
Schema: "replay_run.v1",
ReplayResult: result,
}
persist.ReplayResult.ModelResponse = trim(persist.ReplayResult.ModelResponse, 4000)
buf, err := json.Marshal(persist)
if err != nil {
return err
}
buf = append(buf, '\n')
f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
return err
}
defer f.Close()
_, err = f.Write(buf)
return err
}
func ragIDs(bundle *ContextBundle) []string {
if bundle == nil {
return []string{}
}
out := make([]string, 0, len(bundle.RetrievedPlaybooks))
for _, p := range bundle.RetrievedPlaybooks {
out = append(out, p.RagID)
}
return out
}
func sha256Hex(s string) string {
h := sha256.Sum256([]byte(s))
return hex.EncodeToString(h[:])
}
func gatewayFromEnv() string {
if u := os.Getenv("LH_GATEWAY_URL"); u != "" {
return u
}
return DefaultGatewayURL
}
func orDefault(v, fallback string) string {
if v == "" {
return fallback
}
return v
}