root 89ca72d471 materializer + replay ports + vectord substrate fix verified at scale
Two threads landing together — the doc edits interleave so they ship
in a single commit.

1. **vectord substrate fix verified at original scale** (closes the
   2026-05-01 thread). Re-ran multitier 5min @ conc=50: 132,211
   scenarios at 438/sec, 6/6 classes at 0% failure (was 4/6 pre-fix).
   Throughput dropped 1,115 → 438/sec because previously-broken
   scenarios now do real HNSW Add work — honest cost of correctness.
   The fix (i.vectors side-store + safeGraphAdd recover wrappers +
   smallIndexRebuildThreshold=32 + saveTask coalescing) holds at the
   footprint that originally surfaced the bug.

2. **Materializer port** — internal/materializer + cmd/materializer +
   scripts/materializer_smoke.sh. Ports scripts/distillation/transforms.ts
   (12 transforms) + build_evidence_index.ts (idempotency, day-partition,
   receipt). On-wire JSON shape matches TS so Bun and Go runs are
   interchangeable. 14 tests green.

3. **Replay port** — internal/replay + cmd/replay +
   scripts/replay_smoke.sh. Ports scripts/distillation/replay.ts
   (retrieve → bundle → /v1/chat → validate → log). Closes audit-FULL
   phase 7 live invocation on the Go side. Both runtimes append to the
   same data/_kb/replay_runs.jsonl (schema=replay_run.v1). 14 tests green.

Side effect on internal/distillation/types.go: EvidenceRecord gained
prompt_tokens, completion_tokens, and metadata fields to mirror the TS
shape the materializer transforms produce.

STATE_OF_PLAY refreshed to 2026-05-02; ARCHITECTURE_COMPARISON decisions
tracker moves the materializer + replay items from _open_ to DONE and
adds the substrate-fix scale verification row.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 03:31:02 -05:00

132 lines
3.9 KiB
Go

package replay
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
// callModelResult is what the gateway round-trip returns.
type callModelResult struct {
Content string
OK bool
Error string
}
// ModelCaller is the seam tests use to swap out HTTP. Production
// supplies httpModelCaller; tests can supply scripted responses.
type ModelCaller func(ctx context.Context, model, system, user string) callModelResult
// httpModelCaller posts to ${gatewayURL}/v1/chat with provider derived
// from model name. Mirrors replay.ts:callModel.
func httpModelCaller(gatewayURL string) ModelCaller {
client := &http.Client{Timeout: 180 * time.Second}
return func(ctx context.Context, model, system, user string) callModelResult {
provider := inferProvider(model)
body, err := json.Marshal(map[string]any{
"provider": provider,
"model": model,
"messages": []map[string]string{
{"role": "system", "content": system},
{"role": "user", "content": user},
},
"max_tokens": 1500,
"temperature": 0.1,
})
if err != nil {
return callModelResult{Error: "marshal request: " + err.Error()}
}
req, err := http.NewRequestWithContext(ctx, "POST", gatewayURL+"/v1/chat", bytes.NewReader(body))
if err != nil {
return callModelResult{Error: "build request: " + err.Error()}
}
req.Header.Set("Content-Type", "application/json")
resp, err := client.Do(req)
if err != nil {
return callModelResult{Error: trim(err.Error(), 240)}
}
defer resp.Body.Close()
buf, _ := io.ReadAll(resp.Body)
if resp.StatusCode >= 400 {
return callModelResult{Error: fmt.Sprintf("HTTP %d: %s", resp.StatusCode, trim(string(buf), 240))}
}
var parsed struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
} `json:"choices"`
}
if err := json.Unmarshal(buf, &parsed); err != nil {
return callModelResult{Error: "parse response: " + err.Error()}
}
content := ""
if len(parsed.Choices) > 0 {
content = parsed.Choices[0].Message.Content
}
return callModelResult{Content: content, OK: true}
}
}
// inferProvider picks the right /v1/chat provider for a given model
// name. Mirrors replay.ts:callModel's branching exactly so the gateway
// sees the same request shape regardless of caller runtime.
//
// "/" in name → openrouter
// kimi-/qwen3-coder/... → ollama_cloud
// else → ollama (local)
func inferProvider(model string) string {
if strings.Contains(model, "/") {
return "openrouter"
}
switch {
case strings.HasPrefix(model, "kimi-"),
strings.HasPrefix(model, "qwen3-coder"),
strings.HasPrefix(model, "deepseek-v"),
strings.HasPrefix(model, "mistral-large"),
model == "gpt-oss:120b",
model == "qwen3.5:397b":
return "ollama_cloud"
}
return "ollama"
}
// dryRunSynthesize produces a deterministic synthetic response that
// echoes context-bundle signals. Used by tests + dry-run mode to
// exercise retrieval + validation without a live LLM.
func dryRunSynthesize(task string, bundle *ContextBundle) string {
parts := []string{
"Synthetic dry-run response for task: " + trim(task, 120),
"",
}
if bundle != nil {
parts = append(parts, fmt.Sprintf(
"Retrieved %d playbooks; %d accepted, %d partial.",
len(bundle.RetrievedPlaybooks),
len(bundle.PriorSuccessfulOutputs),
len(bundle.FailurePatterns),
))
if len(bundle.ValidationSteps) > 0 {
parts = append(parts, "Following validation checklist:")
for i, s := range bundle.ValidationSteps {
if i >= 3 {
break
}
parts = append(parts, "- "+s)
}
}
if len(bundle.PriorSuccessfulOutputs) > 0 {
parts = append(parts, "")
parts = append(parts, "Anchored on prior accepted: "+bundle.PriorSuccessfulOutputs[0].Title)
}
} else {
parts = append(parts, "No retrieval context — answering from task alone. Verify and check produced output before approving.")
}
return strings.Join(parts, "\n")
}