Two threads landing together — the doc edits interleave so they ship in a single commit. 1. **vectord substrate fix verified at original scale** (closes the 2026-05-01 thread). Re-ran multitier 5min @ conc=50: 132,211 scenarios at 438/sec, 6/6 classes at 0% failure (was 4/6 pre-fix). Throughput dropped 1,115 → 438/sec because previously-broken scenarios now do real HNSW Add work — honest cost of correctness. The fix (i.vectors side-store + safeGraphAdd recover wrappers + smallIndexRebuildThreshold=32 + saveTask coalescing) holds at the footprint that originally surfaced the bug. 2. **Materializer port** — internal/materializer + cmd/materializer + scripts/materializer_smoke.sh. Ports scripts/distillation/transforms.ts (12 transforms) + build_evidence_index.ts (idempotency, day-partition, receipt). On-wire JSON shape matches TS so Bun and Go runs are interchangeable. 14 tests green. 3. **Replay port** — internal/replay + cmd/replay + scripts/replay_smoke.sh. Ports scripts/distillation/replay.ts (retrieve → bundle → /v1/chat → validate → log). Closes audit-FULL phase 7 live invocation on the Go side. Both runtimes append to the same data/_kb/replay_runs.jsonl (schema=replay_run.v1). 14 tests green. Side effect on internal/distillation/types.go: EvidenceRecord gained prompt_tokens, completion_tokens, and metadata fields to mirror the TS shape the materializer transforms produce. STATE_OF_PLAY refreshed to 2026-05-02; ARCHITECTURE_COMPARISON decisions tracker moves the materializer + replay items from _open_ to DONE and adds the substrate-fix scale verification row. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
194 lines
5.3 KiB
Go
194 lines
5.3 KiB
Go
package replay
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"time"
|
|
)
|
|
|
|
// DefaultRoot is what the CLI uses when --root isn't passed.
|
|
func DefaultRoot() string {
|
|
if r := os.Getenv("LH_DISTILL_ROOT"); r != "" {
|
|
return r
|
|
}
|
|
if cwd, err := os.Getwd(); err == nil {
|
|
return cwd
|
|
}
|
|
return "/home/profit/lakehouse"
|
|
}
|
|
|
|
// Replay runs the retrieve→prompt→model→validate→log pipeline.
|
|
// Returns a ReplayResult that's already been appended to
|
|
// data/_kb/replay_runs.jsonl unless DryRun + the file is read-only.
|
|
//
|
|
// Errors here are *infrastructure* failures (corpus unreadable, log
|
|
// write failed). A failed model call OR a failed validation gate is
|
|
// captured in ReplayResult.ValidationResult, not returned as error —
|
|
// callers can branch on Passed / EscalationPath.
|
|
func Replay(ctx context.Context, opts ReplayRequest, root string) (ReplayResult, error) {
|
|
t0 := time.Now()
|
|
recordedAt := time.Now().UTC().Format(time.RFC3339Nano)
|
|
|
|
taskHash := sha256Hex(opts.Task)
|
|
|
|
corpus, err := LoadRagCorpus(root)
|
|
if err != nil {
|
|
return ReplayResult{}, fmt.Errorf("load rag corpus: %w", err)
|
|
}
|
|
|
|
var bundle *ContextBundle
|
|
if !opts.NoRetrieval {
|
|
bundle = BuildContextBundle(corpus, opts.Task)
|
|
}
|
|
prompt := BuildPrompt(opts.Task, bundle)
|
|
|
|
localModel := orDefault(opts.LocalModel, DefaultLocalModel)
|
|
escalationModel := orDefault(opts.EscalationModel, DefaultEscalationModel)
|
|
gatewayURL := orDefault(opts.GatewayURL, gatewayFromEnv())
|
|
|
|
caller := httpModelCaller(gatewayURL)
|
|
if opts.DryRun {
|
|
caller = dryRunCaller(opts.Task, bundle)
|
|
}
|
|
|
|
escalation := []string{localModel}
|
|
modelUsed := localModel
|
|
var modelResponse string
|
|
var validation ValidationResult
|
|
|
|
localCall := caller(ctx, localModel, prompt.System, prompt.User)
|
|
if localCall.OK {
|
|
modelResponse = localCall.Content
|
|
validation = ValidateResponse(modelResponse, bundle)
|
|
} else {
|
|
validation = ValidationResult{
|
|
Passed: false,
|
|
Reasons: []string{"local call failed: " + localCall.Error},
|
|
}
|
|
}
|
|
|
|
if !validation.Passed && opts.AllowEscalation && !opts.LocalOnly {
|
|
escalation = append(escalation, escalationModel)
|
|
escalCall := caller(ctx, escalationModel, prompt.System, prompt.User)
|
|
if escalCall.OK {
|
|
modelResponse = escalCall.Content
|
|
modelUsed = escalationModel
|
|
validation = ValidateResponse(modelResponse, bundle)
|
|
if validation.Passed {
|
|
validation.Reasons = append([]string{"recovered via escalation to " + escalationModel}, validation.Reasons...)
|
|
}
|
|
} else {
|
|
validation.Reasons = append(validation.Reasons, "escalation also failed: "+escalCall.Error)
|
|
}
|
|
}
|
|
|
|
recordedRunID := fmt.Sprintf("replay:%s:%s",
|
|
taskHash[:16],
|
|
sha256Hex(recordedAt)[:12],
|
|
)
|
|
result := ReplayResult{
|
|
InputTask: opts.Task,
|
|
TaskHash: taskHash,
|
|
RetrievedArtifacts: RetrievedIDs{RagIDs: ragIDs(bundle)},
|
|
ContextBundle: bundle,
|
|
ModelResponse: modelResponse,
|
|
ModelUsed: modelUsed,
|
|
EscalationPath: escalation,
|
|
ValidationResult: validation,
|
|
RecordedRunID: recordedRunID,
|
|
RecordedAt: recordedAt,
|
|
DurationMs: time.Since(t0).Milliseconds(),
|
|
}
|
|
|
|
if err := logReplayEvidence(root, result); err != nil {
|
|
// Logging failure is real — surface it. The caller still gets the
|
|
// in-memory result so they can inspect what happened.
|
|
return result, fmt.Errorf("log replay evidence: %w", err)
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// dryRunCaller wraps dryRunSynthesize as a ModelCaller. The escalation
|
|
// branch in Replay calls the caller a second time; for parity with TS,
|
|
// we return the same content suffixed with [ESCALATED] so a smoke can
|
|
// detect escalation in dry-run mode.
|
|
func dryRunCaller(task string, bundle *ContextBundle) ModelCaller {
|
|
calls := 0
|
|
return func(_ context.Context, _ string, _ string, _ string) callModelResult {
|
|
calls++
|
|
content := dryRunSynthesize(task, bundle)
|
|
if calls >= 2 {
|
|
content += "\n\n[ESCALATED]"
|
|
}
|
|
return callModelResult{Content: content, OK: true}
|
|
}
|
|
}
|
|
|
|
// logReplayEvidence appends one row to data/_kb/replay_runs.jsonl.
|
|
// model_response is truncated to 4000 chars in the persisted log to
|
|
// keep the file lean (matches TS behavior).
|
|
func logReplayEvidence(root string, result ReplayResult) error {
|
|
path := filepath.Join(root, "data", "_kb", "replay_runs.jsonl")
|
|
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
|
return err
|
|
}
|
|
|
|
persist := struct {
|
|
Schema string `json:"schema"`
|
|
ReplayResult
|
|
}{
|
|
Schema: "replay_run.v1",
|
|
ReplayResult: result,
|
|
}
|
|
persist.ReplayResult.ModelResponse = trim(persist.ReplayResult.ModelResponse, 4000)
|
|
|
|
buf, err := json.Marshal(persist)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
buf = append(buf, '\n')
|
|
|
|
f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
_, err = f.Write(buf)
|
|
return err
|
|
}
|
|
|
|
func ragIDs(bundle *ContextBundle) []string {
|
|
if bundle == nil {
|
|
return []string{}
|
|
}
|
|
out := make([]string, 0, len(bundle.RetrievedPlaybooks))
|
|
for _, p := range bundle.RetrievedPlaybooks {
|
|
out = append(out, p.RagID)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func sha256Hex(s string) string {
|
|
h := sha256.Sum256([]byte(s))
|
|
return hex.EncodeToString(h[:])
|
|
}
|
|
|
|
func gatewayFromEnv() string {
|
|
if u := os.Getenv("LH_GATEWAY_URL"); u != "" {
|
|
return u
|
|
}
|
|
return DefaultGatewayURL
|
|
}
|
|
|
|
func orDefault(v, fallback string) string {
|
|
if v == "" {
|
|
return fallback
|
|
}
|
|
return v
|
|
}
|