Two threads landing together — the doc edits interleave so they ship in a single commit. 1. **vectord substrate fix verified at original scale** (closes the 2026-05-01 thread). Re-ran multitier 5min @ conc=50: 132,211 scenarios at 438/sec, 6/6 classes at 0% failure (was 4/6 pre-fix). Throughput dropped 1,115 → 438/sec because previously-broken scenarios now do real HNSW Add work — honest cost of correctness. The fix (i.vectors side-store + safeGraphAdd recover wrappers + smallIndexRebuildThreshold=32 + saveTask coalescing) holds at the footprint that originally surfaced the bug. 2. **Materializer port** — internal/materializer + cmd/materializer + scripts/materializer_smoke.sh. Ports scripts/distillation/transforms.ts (12 transforms) + build_evidence_index.ts (idempotency, day-partition, receipt). On-wire JSON shape matches TS so Bun and Go runs are interchangeable. 14 tests green. 3. **Replay port** — internal/replay + cmd/replay + scripts/replay_smoke.sh. Ports scripts/distillation/replay.ts (retrieve → bundle → /v1/chat → validate → log). Closes audit-FULL phase 7 live invocation on the Go side. Both runtimes append to the same data/_kb/replay_runs.jsonl (schema=replay_run.v1). 14 tests green. Side effect on internal/distillation/types.go: EvidenceRecord gained prompt_tokens, completion_tokens, and metadata fields to mirror the TS shape the materializer transforms produce. STATE_OF_PLAY refreshed to 2026-05-02; ARCHITECTURE_COMPARISON decisions tracker moves the materializer + replay items from _open_ to DONE and adds the substrate-fix scale verification row. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
94 lines
2.3 KiB
Go
94 lines
2.3 KiB
Go
// Package materializer ports scripts/distillation/transforms.ts +
|
|
// build_evidence_index.ts to Go. Source rows in data/_kb/*.jsonl are
|
|
// transformed into EvidenceRecord rows under data/evidence/YYYY/MM/DD/.
|
|
//
|
|
// Per ADR-001 #4: port LOGIC, not bit-identical reproducibility — but
|
|
// on-wire JSON layout matches the TS shape so Bun and Go runs stay
|
|
// interchangeable for tooling that reads either output.
|
|
package materializer
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"sort"
|
|
)
|
|
|
|
// CanonicalSha256 returns the hex SHA-256 of `obj` after sorting all
|
|
// object keys recursively. Matches the TS canonicalSha256 in
|
|
// auditor/schemas/distillation/types.ts so a row hashed by either
|
|
// runtime gets the same sig_hash.
|
|
//
|
|
// Determinism contract: identical input → identical hash, regardless
|
|
// of the producer's serialization order.
|
|
func CanonicalSha256(obj any) (string, error) {
|
|
ordered := orderKeys(obj)
|
|
buf, err := json.Marshal(ordered)
|
|
if err != nil {
|
|
return "", fmt.Errorf("canonical marshal: %w", err)
|
|
}
|
|
sum := sha256.Sum256(buf)
|
|
return hex.EncodeToString(sum[:]), nil
|
|
}
|
|
|
|
// orderKeys recursively sorts every map's keys. For arrays we keep the
|
|
// element order (arrays are inherently ordered). Scalars pass through.
|
|
func orderKeys(v any) any {
|
|
switch t := v.(type) {
|
|
case map[string]any:
|
|
keys := make([]string, 0, len(t))
|
|
for k := range t {
|
|
keys = append(keys, k)
|
|
}
|
|
sort.Strings(keys)
|
|
out := make(orderedMap, 0, len(keys))
|
|
for _, k := range keys {
|
|
out = append(out, kvPair{Key: k, Value: orderKeys(t[k])})
|
|
}
|
|
return out
|
|
case []any:
|
|
out := make([]any, len(t))
|
|
for i, e := range t {
|
|
out[i] = orderKeys(e)
|
|
}
|
|
return out
|
|
default:
|
|
return v
|
|
}
|
|
}
|
|
|
|
// orderedMap preserves insertion order on JSON marshal. We populate it
|
|
// in sorted-key order so the produced bytes are stable.
|
|
type orderedMap []kvPair
|
|
|
|
type kvPair struct {
|
|
Key string
|
|
Value any
|
|
}
|
|
|
|
func (om orderedMap) MarshalJSON() ([]byte, error) {
|
|
if len(om) == 0 {
|
|
return []byte("{}"), nil
|
|
}
|
|
out := []byte{'{'}
|
|
for i, kv := range om {
|
|
if i > 0 {
|
|
out = append(out, ',')
|
|
}
|
|
k, err := json.Marshal(kv.Key)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, k...)
|
|
out = append(out, ':')
|
|
v, err := json.Marshal(kv.Value)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, v...)
|
|
}
|
|
out = append(out, '}')
|
|
return out, nil
|
|
}
|