Two threads landing together — the doc edits interleave so they ship in a single commit. 1. **vectord substrate fix verified at original scale** (closes the 2026-05-01 thread). Re-ran multitier 5min @ conc=50: 132,211 scenarios at 438/sec, 6/6 classes at 0% failure (was 4/6 pre-fix). Throughput dropped 1,115 → 438/sec because previously-broken scenarios now do real HNSW Add work — honest cost of correctness. The fix (i.vectors side-store + safeGraphAdd recover wrappers + smallIndexRebuildThreshold=32 + saveTask coalescing) holds at the footprint that originally surfaced the bug. 2. **Materializer port** — internal/materializer + cmd/materializer + scripts/materializer_smoke.sh. Ports scripts/distillation/transforms.ts (12 transforms) + build_evidence_index.ts (idempotency, day-partition, receipt). On-wire JSON shape matches TS so Bun and Go runs are interchangeable. 14 tests green. 3. **Replay port** — internal/replay + cmd/replay + scripts/replay_smoke.sh. Ports scripts/distillation/replay.ts (retrieve → bundle → /v1/chat → validate → log). Closes audit-FULL phase 7 live invocation on the Go side. Both runtimes append to the same data/_kb/replay_runs.jsonl (schema=replay_run.v1). 14 tests green. Side effect on internal/distillation/types.go: EvidenceRecord gained prompt_tokens, completion_tokens, and metadata fields to mirror the TS shape the materializer transforms produce. STATE_OF_PLAY refreshed to 2026-05-02; ARCHITECTURE_COMPARISON decisions tracker moves the materializer + replay items from _open_ to DONE and adds the substrate-fix scale verification row. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
88 lines
3.0 KiB
Go
88 lines
3.0 KiB
Go
// replay — Go-side distillation replay runner. Closes audit-FULL
|
|
// phase 7 live invocation on the Go side. Mirrors
|
|
// scripts/distillation/replay.ts; both runtimes append to the same
|
|
// `data/_kb/replay_runs.jsonl` shape (schema=replay_run.v1).
|
|
//
|
|
// Usage:
|
|
//
|
|
// replay -task "rebuild evidence index"
|
|
// replay -task "..." -allow-escalation
|
|
// replay -task "..." -no-retrieval # baseline mode
|
|
// replay -task "..." -dry-run # synthetic, no LLM
|
|
// replay -task "..." -root /home/profit/lakehouse # custom repo root
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
|
|
"git.agentview.dev/profit/golangLAKEHOUSE/internal/replay"
|
|
)
|
|
|
|
func main() {
|
|
task := flag.String("task", "", "input task to replay")
|
|
localOnly := flag.Bool("local-only", false, "never escalate; record validation result only")
|
|
allowEscalation := flag.Bool("allow-escalation", false, "fall back to the bigger model when local validation fails")
|
|
noRetrieval := flag.Bool("no-retrieval", false, "baseline mode: skip retrieval bundle (still logs)")
|
|
dryRun := flag.Bool("dry-run", false, "synthesize a deterministic response — no LLM call")
|
|
root := flag.String("root", replay.DefaultRoot(), "lakehouse repo root (defaults to $LH_DISTILL_ROOT or cwd)")
|
|
gateway := flag.String("gateway", "", "override gateway URL (default: $LH_GATEWAY_URL or http://localhost:3110)")
|
|
localModel := flag.String("local-model", "", "override local model name")
|
|
escalationModel := flag.String("escalation-model", "", "override escalation model name")
|
|
flag.Parse()
|
|
|
|
if *task == "" {
|
|
fmt.Fprintln(os.Stderr, `usage: replay -task "<input>" [-local-only] [-allow-escalation] [-no-retrieval] [-dry-run]`)
|
|
os.Exit(2)
|
|
}
|
|
|
|
res, err := replay.Replay(context.Background(), replay.ReplayRequest{
|
|
Task: *task,
|
|
LocalOnly: *localOnly,
|
|
AllowEscalation: *allowEscalation,
|
|
NoRetrieval: *noRetrieval,
|
|
DryRun: *dryRun,
|
|
GatewayURL: *gateway,
|
|
LocalModel: *localModel,
|
|
EscalationModel: *escalationModel,
|
|
}, *root)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "replay: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
fmt.Printf("[replay] run_id=%s\n", res.RecordedRunID)
|
|
if res.ContextBundle == nil {
|
|
fmt.Println("[replay] retrieval: DISABLED")
|
|
} else {
|
|
fmt.Printf("[replay] retrieval: %d playbooks\n", len(res.ContextBundle.RetrievedPlaybooks))
|
|
}
|
|
fmt.Printf("[replay] escalation_path: %s\n", strings.Join(res.EscalationPath, " → "))
|
|
fmt.Printf("[replay] model_used: %s · %dms\n", res.ModelUsed, res.DurationMs)
|
|
verdict := "PASS"
|
|
if !res.ValidationResult.Passed {
|
|
verdict = "FAIL"
|
|
}
|
|
suffix := ""
|
|
if len(res.ValidationResult.Reasons) > 0 {
|
|
suffix = " (" + strings.Join(res.ValidationResult.Reasons, "; ") + ")"
|
|
}
|
|
fmt.Printf("[replay] validation: %s%s\n", verdict, suffix)
|
|
fmt.Println()
|
|
fmt.Println("─── response ───")
|
|
body := res.ModelResponse
|
|
if len(body) > 1500 {
|
|
fmt.Println(body[:1500])
|
|
fmt.Printf("... [%d more chars]\n", len(body)-1500)
|
|
} else {
|
|
fmt.Println(body)
|
|
}
|
|
|
|
if !res.ValidationResult.Passed {
|
|
os.Exit(1)
|
|
}
|
|
}
|