// Multi-coordinator stress harness — Phase 1 of the 48-hour mock. // // Three coordinators (Alice, Bob, Carol) each own a contract with a // different demand profile. They run queries against the matrix // indexer with separate playbook namespaces. The harness fires // scenario phases (baseline → surge → merge → handover → split) and // captures every response so we can verify: // // 1. Diversity — different (coord, contract, role) triples should // surface DIFFERENT top-K worker IDs. If everything returns the // same handful of workers, the system is "cycling" not "locking // into scenarios." // 2. Non-determinism — same query reissued should return near- // identical top-K (controlled variance from HNSW + judge, if any). // 3. Learning — after Alice records playbook entries for her // contract's queries, Bob takes over the same contract using // Alice's playbook namespace; Alice's recordings should surface // in Bob's results. // // Phase 1 deliberately skips: time-based event clock (events fire // sequentially), email/SMS ingest (no integration yet), Langfuse // tracing (would need Go-side wiring). Those are Phase 2/3. package main import ( "bufio" "bytes" "context" "encoding/json" "flag" "fmt" "io" "log" "net/http" "os" "path/filepath" "sort" "strings" "time" "git.agentview.dev/profit/golangLAKEHOUSE/internal/langfuse" ) // ── data shapes ────────────────────────────────────────────────── type Demand struct { Role string `json:"role"` Count int `json:"count"` Skills []string `json:"skills"` Certs []string `json:"certs"` InRoster *bool `json:"in_roster,omitempty"` // nil = assume true } type Contract struct { Name string `json:"name"` Client string `json:"client"` Location string `json:"location"` Shift string `json:"shift"` Demand []Demand `json:"demand"` } type Coordinator struct { Name string PlaybookCorpus string } // ── matrix.search wire shapes ──────────────────────────────────── type matrixSearchReq struct { QueryText string `json:"query_text"` Corpora []string `json:"corpora"` K int `json:"k"` UsePlaybook bool `json:"use_playbook,omitempty"` PlaybookCorpus string `json:"playbook_corpus,omitempty"` ExcludeIDs []string `json:"exclude_ids,omitempty"` } type matrixResult struct { ID string `json:"id"` Distance float32 `json:"distance"` Corpus string `json:"corpus"` Metadata json.RawMessage `json:"metadata,omitempty"` } type matrixResp struct { Results []matrixResult `json:"results"` PerCorpusCounts map[string]int `json:"per_corpus_counts"` PlaybookBoosted int `json:"playbook_boosted,omitempty"` PlaybookInjected int `json:"playbook_injected,omitempty"` } // ── event capture ──────────────────────────────────────────────── type ResultRef struct { Rank int `json:"rank"` ID string `json:"id"` Corpus string `json:"corpus"` Distance float32 `json:"distance"` } type Event struct { Phase string `json:"phase"` Hour int `json:"hour"` // operational-narrative time label, not real wall clock Coordinator string `json:"coordinator"` Contract string `json:"contract"` Role string `json:"role"` Query string `json:"query"` SurgeMultiplier int `json:"surge_multiplier,omitempty"` UsePlaybook bool `json:"use_playbook"` PlaybookCorpus string `json:"playbook_corpus,omitempty"` ExcludeIDs []string `json:"exclude_ids,omitempty"` TopK []ResultRef `json:"top_k"` PerCorpusCounts map[string]int `json:"per_corpus_counts,omitempty"` PlaybookBoosted int `json:"playbook_boosted,omitempty"` PlaybookInjected int `json:"playbook_injected,omitempty"` // JudgeRating: 1-5 quality score on top-1 result against the // original inbox body (not the LLM-parsed query). Lets us flag // the case where LLM parsing produces a tight-distance match // but the result doesn't actually fit the original ask. // 0 = unrated, 1-5 = judge verdict. JudgeRating int `json:"judge_rating,omitempty"` Note string `json:"note,omitempty"` TimestampUnixNano int64 `json:"ts_ns"` } type Output struct { Coordinators []string `json:"coordinators"` Contracts []string `json:"contracts"` Events []Event `json:"events"` Diversity Diversity `json:"diversity"` Determinism Determ `json:"determinism"` Learning Learning `json:"learning"` GeneratedAt time.Time `json:"generated_at"` } // Diversity = how distinct are top-K worker sets across (coord, // contract, role) triples that SHOULD differ. We compute mean Jaccard // similarity for matched-role-across-contracts pairs (lower is more // diverse) and matched-coord-different-roles pairs. type Diversity struct { SameRoleAcrossContractsMeanJaccard float64 `json:"same_role_across_contracts_mean_jaccard"` DifferentRolesSameContractMeanJaccard float64 `json:"different_roles_same_contract_mean_jaccard"` NumPairsSameRoleAcrossContracts int `json:"num_pairs_same_role_across_contracts"` NumPairsDifferentRolesSameContract int `json:"num_pairs_different_roles_same_contract"` } // Determ = same query reissued — top-K should be near-identical. // Jaccard close to 1.0 = stable / deterministic, < 0.95 = some HNSW // or judge variance. type Determ struct { MeanJaccard float64 `json:"mean_jaccard"` NumReissuedPairs int `json:"num_reissued_pairs"` } // Learning = handover signal. After Alice records playbooks for her // contract, Bob runs the same queries with Alice's playbook namespace. // We measure: do Alice's recorded answer IDs surface in Bob's top-K? // // Two modes: // - Verbatim handover: Bob runs Alice's exact queries (trivial case). // - Paraphrase handover: Bob runs paraphrased queries against Alice's // playbook (the hard case — does cosine on paraphrase find the // recorded query's vector?). This is the multi-coord analog of the // paraphrase reality test in playbook_lift. type Learning struct { HandoverQueriesRun int `json:"handover_queries_run"` RecordedAnswersTop1Count int `json:"recorded_answers_top1_count"` RecordedAnswersTopKCount int `json:"recorded_answers_topk_count"` HandoverHitRate float64 `json:"handover_hit_rate"` // Paraphrase handover — only populated when --with-paraphrase-handover. ParaphraseHandoverRun int `json:"paraphrase_handover_run,omitempty"` ParaphraseTop1Count int `json:"paraphrase_top1_count,omitempty"` ParaphraseTopKCount int `json:"paraphrase_topk_count,omitempty"` ParaphraseHandoverHitRate float64 `json:"paraphrase_handover_hit_rate,omitempty"` } // ── main ───────────────────────────────────────────────────────── func main() { var ( gateway = flag.String("gateway", "http://127.0.0.1:3110", "gateway base URL") contractsDir = flag.String("contracts", "tests/reality/contracts", "directory of contract JSON files") corporaCSV = flag.String("corpora", "workers,ethereal_workers", "comma-separated matrix corpora") k = flag.Int("k", 8, "top-k from matrix.search per query") out = flag.String("out", "reports/reality-tests/multi_coord_stress_001.json", "output JSON path") ollama = flag.String("ollama", "http://127.0.0.1:11434", "Ollama base URL (only used if --with-paraphrase-handover)") judgeModel = flag.String("judge", "qwen2.5:latest", "Ollama model for paraphrase generation (only used if --with-paraphrase-handover)") withParaphraseHandover = flag.Bool("with-paraphrase-handover", false, "after the verbatim handover phase, run a paraphrase handover phase: Bob runs paraphrased versions of Alice's queries against Alice's playbook") langfuseEnv = flag.String("langfuse-env", "/etc/lakehouse/langfuse.env", "path to Langfuse credentials env file (empty = skip tracing)") ) flag.Parse() contracts, err := loadContracts(*contractsDir) if err != nil { log.Fatalf("load contracts: %v", err) } if len(contracts) < 3 { log.Fatalf("need ≥3 contracts in %s, got %d", *contractsDir, len(contracts)) } // First three contracts → coord assignments. Names are fixed so // playbook corpora are stable across runs (rerun lands on same // namespaces, exercising the persistence path indirectly). coords := []Coordinator{ {Name: "alice", PlaybookCorpus: "playbook_alice"}, {Name: "bob", PlaybookCorpus: "playbook_bob"}, {Name: "carol", PlaybookCorpus: "playbook_carol"}, } // Initial assignment: alice→alpha, bob→beta, carol→gamma. assignments := map[string]*Contract{ "alice": &contracts[0], "bob": &contracts[1], "carol": &contracts[2], } corpora := strings.Split(*corporaCSV, ",") hc := &http.Client{Timeout: 30 * time.Second} ctx := context.Background() _ = ctx // Optional Langfuse client. Best-effort: missing env file or // unreachable Langfuse just means traces don't go anywhere; the // run still proceeds. var lf *langfuse.Client if *langfuseEnv != "" { if creds, err := loadLangfuseEnv(*langfuseEnv); err == nil { lf = langfuse.New(creds.URL, creds.PublicKey, creds.SecretKey, nil) log.Printf("[stress] Langfuse client live → %s", creds.URL) defer func() { if err := lf.Flush(context.Background()); err != nil { log.Printf("[stress] Langfuse final flush: %v", err) } }() } else { log.Printf("[stress] Langfuse skipped: %v", err) } } output := Output{ Coordinators: []string{"alice", "bob", "carol"}, Contracts: []string{contracts[0].Name, contracts[1].Name, contracts[2].Name}, GeneratedAt: time.Now().UTC(), } log.Printf("[stress] 3 coords, 3 contracts, k=%d, corpora=%v", *k, corpora) // ── Phase 1: baseline ─────────────────────────────────────── // Each coord runs their own contract's role queries. Records // playbook entries (top-1 of each as a synthetic "successful // match" outcome) into their personal namespace. log.Printf("[stress] phase 1: baseline") for _, coord := range coords { c := assignments[coord.Name] for _, d := range c.Demand { q := buildQuery(c, d, 1) resp := must(matrixSearch(hc, *gateway, q, corpora, *k, true, coord.PlaybookCorpus)) ev := captureEvent("baseline", 0, coord.Name, c.Name, d.Role, q, 1, true, coord.PlaybookCorpus, resp) output.Events = append(output.Events, ev) // Record top-1 as a successful playbook entry for this coord. if len(resp.Results) > 0 { if err := playbookRecord(hc, *gateway, q, resp.Results[0].ID, resp.Results[0].Corpus, 1.0, coord.PlaybookCorpus); err != nil { log.Printf(" record (%s/%s): %v", coord.Name, d.Role, err) } } } } // ── Phase 1b: new-resume injection (Hour 6) ───────────────── // Mid-day, three new resumes arrive — workers with no prior // history. We embed + add them to the workers vectord index, // then verify they're findable by their unique skill marker. // Tests the substrate's ability to absorb fresh candidates // without restart. log.Printf("[stress] phase 1b: new-resume injection (3 fresh workers, verify findable)") // Each fresh worker has a SEMANTIC query that should surface them // based on the actual content of their resume — role + skills + // location. nomic-embed-text is dense/semantic, NOT lexical, so a // "find me FRESHTAG_..." style unique-substring query does NOT // surface the fresh worker; the embedder weights rare substrings // as low-information noise. The semantic query below represents // what a real coordinator would actually issue. freshWorkers := []struct { ID string Resume string Verify string // semantic query expected to surface this worker }{ { ID: "fresh-001", Resume: "Senior rigger with 12 years tower-crane signaling experience. NCCCO crane signal/rigger certification active. Chicago IL metro, available immediately. Construction-site rigging specialist.", Verify: "Senior tower crane rigger NCCCO certification Chicago construction signaling", }, { ID: "fresh-002", Resume: "Bilingual safety coordinator (Spanish + English). OSHA trainer credentials, 8 years manufacturing safety training delivery. Indianapolis IN. Manages multilingual crew safety briefings and incident documentation.", Verify: "Bilingual Spanish English OSHA trainer safety coordinator Indianapolis manufacturing", }, { ID: "fresh-003", Resume: "FAA Part 107 certified drone pilot. UAV site surveying with GIS mapping output for construction site progress reports. Chicago IL metro. 5 years aerial surveying for general contractors.", Verify: "FAA Part 107 drone surveyor UAV pilot GIS construction site mapping Chicago", }, } const freshIdx = "fresh_workers" if err := ensureFreshIndex(hc, *gateway, freshIdx, 768); err != nil { log.Fatalf("ensure fresh_workers index: %v", err) } for _, fw := range freshWorkers { if err := ingestFreshWorker(hc, *gateway, freshIdx, fw.ID, fw.Resume, map[string]any{ "name": fw.ID, "role": "fresh-resume", "source": "phase-1b-injection", }); err != nil { log.Fatalf("ingest fresh worker %s: %v", fw.ID, err) } } // Verify queries search across main + fresh corpora — the small // fresh corpus should surface the freshly-added worker because // it has no recall competition there. verifyCorpora := append([]string{}, corpora...) verifyCorpora = append(verifyCorpora, freshIdx) for _, fw := range freshWorkers { resp := must(matrixSearch(hc, *gateway, fw.Verify, verifyCorpora, *k, false, "")) ev := captureEvent("new-resume-verify", 6, "system", "fresh-resume-pool", "fresh", fw.Verify, 1, false, "", resp) // Find the fresh worker's rank in top-K (rank 0 = top-1). freshRank := -1 for i, r := range resp.Results { if r.ID == fw.ID { freshRank = i break } } switch { case freshRank == 0: ev.Note = fmt.Sprintf("fresh worker %s at top-1 — semantic absorption working", fw.ID) case freshRank > 0: ev.Note = fmt.Sprintf("fresh worker %s at rank %d (in top-K but not top-1)", fw.ID, freshRank) default: ev.Note = fmt.Sprintf("fresh worker %s NOT in top-K (top-1 was %s) — embedder didn't surface fresh-resume content over existing population", fw.ID, resp.Results[0].ID) } output.Events = append(output.Events, ev) } // ── Phase 1c: inbox burst (Hour 9) ────────────────────────── // Mid-morning, 6 incoming signals arrive — emails + SMS — each // carrying a structured demand for the system to act on. Events // fire in PRIORITY ORDER (urgent → high → medium). For each, we: // 1. POST to /v1/observer/inbox so the witness loop records it // 2. Run matrix.search using the embedded demand // 3. Capture both as events // // Priority weighting matters because real coordinators triage // urgent client-side asks before medium-priority background // signals. The substrate doesn't enforce ordering today (callers // fire in their preferred order); this phase verifies the // recording surface and the search-from-inbox flow work. log.Printf("[stress] phase 1c: inbox burst (6 events, priority-ordered)") var inboxTraceID string if lf != nil { inboxTraceID = lf.Trace(ctx, langfuse.TraceInput{ Name: "multi_coord_stress phase 1c inbox burst", Tags: []string{"stress", "inbox", "phase-1c"}, Metadata: map[string]any{ "hour": 9, "corpora": corpora, "k": *k, "event_count": 6, }, }) } type inboxEvent struct { Priority string // "urgent" | "high" | "medium" | "low" Type string // "email" | "sms" Sender string Subject string Body string Coord string } inboxEvents := []inboxEvent{ { Priority: "urgent", Type: "email", Sender: "ops@northstar.com", Subject: "URGENT: 50 forklift operators Cleveland Monday", Body: "Need 50 forklift operators in Cleveland OH for Monday day shift. OSHA-30 + active forklift cert required. Current Milwaukee batch cannot relocate.", Coord: "alice", }, { Priority: "urgent", Type: "email", Sender: "client@crossroads-mfg.com", Subject: "URGENT: Production line down — need 30 production workers tonight", Body: "Production line failure at Indianapolis IN site. Need 30 production workers swing shift starting tonight. Assembly + machine operation experience required.", Coord: "bob", }, { Priority: "high", Type: "email", Sender: "supervisor@loop-construction.com", Subject: "Need crane operator Chicago for 2-week project", Body: "Crane operator with NCCCO certification needed for 2-week Chicago IL site project. Day shift. Mobile crane experience preferred.", Coord: "carol", }, { Priority: "high", Type: "sms", Sender: "+1-555-0142", Body: "Bilingual safety coord needed Indy plant ASAP. Spanish + English. OSHA trainer credential.", Coord: "bob", }, { Priority: "medium", Type: "sms", Sender: "+1-555-0188", Body: "Drone surveyor for Chicago site progress mapping. FAA Part 107.", Coord: "carol", }, { Priority: "medium", Type: "email", Sender: "scheduling@northstar.com", Subject: "FYI: warehouse worker capacity check Milwaukee", Body: "Routine capacity check on Milwaukee warehouse worker pool — anyone with cold storage experience for next week?", Coord: "alice", }, } // Sort by priority (urgent < high < medium < low for ordering). prioRank := map[string]int{"urgent": 0, "high": 1, "medium": 2, "low": 3} sort.SliceStable(inboxEvents, func(i, j int) bool { return prioRank[inboxEvents[i].Priority] < prioRank[inboxEvents[j].Priority] }) for _, ie := range inboxEvents { // 1. Record inbox event at observerd stepStart := time.Now() if err := postInbox(hc, *gateway, ie.Type, ie.Sender, ie.Subject, ie.Body, ie.Priority, ie.Coord); err != nil { log.Printf(" inbox record failed (%s): %v", ie.Priority, err) continue } if lf != nil && inboxTraceID != "" { lf.Span(ctx, langfuse.SpanInput{ TraceID: inboxTraceID, Name: "observerd.inbox.record", Input: map[string]any{"type": ie.Type, "sender": ie.Sender, "priority": ie.Priority, "subject": ie.Subject, "body_chars": len(ie.Body)}, Output: map[string]any{"accepted": true}, StartTime: stepStart, EndTime: time.Now(), Metadata: map[string]any{"coordinator": ie.Coord}, }) } // 2. LLM parses the body into a structured demand. parseStart := time.Now() parsed, perr := parseInboxDemand(hc, *ollama, *judgeModel, ie.Body) parseEnd := time.Now() if perr != nil { if lf != nil && inboxTraceID != "" { lf.Span(ctx, langfuse.SpanInput{ TraceID: inboxTraceID, Name: "llm.parse_demand", Input: map[string]any{"body": ie.Body, "model": *judgeModel}, Output: map[string]any{"error": perr.Error()}, StartTime: parseStart, EndTime: parseEnd, Level: "ERROR", }) } log.Printf(" inbox demand parse failed (%s): %v", ie.Priority, perr) continue } if lf != nil && inboxTraceID != "" { lf.Span(ctx, langfuse.SpanInput{ TraceID: inboxTraceID, Name: "llm.parse_demand", Input: map[string]any{"body": ie.Body, "model": *judgeModel}, Output: parsed, StartTime: parseStart, EndTime: parseEnd, }) } // 3. Build a query string from the parsed demand and search. query := parsed.AsQuery() coord := coordByName(coords, ie.Coord) searchStart := time.Now() resp, err := matrixSearch(hc, *gateway, query, corpora, *k, true, coord.PlaybookCorpus) searchEnd := time.Now() if err != nil { log.Printf(" inbox-triggered search failed (%s): %v", ie.Priority, err) continue } if lf != nil && inboxTraceID != "" { topIDs := make([]string, 0, len(resp.Results)) for _, r := range resp.Results { topIDs = append(topIDs, r.ID) } lf.Span(ctx, langfuse.SpanInput{ TraceID: inboxTraceID, Name: "matrix.search", Input: map[string]any{ "query": query, "corpora": corpora, "k": *k, "playbook_corpus": coord.PlaybookCorpus, }, Output: map[string]any{ "top_k_ids": topIDs, "top1_distance": firstDistance(resp.Results), "playbook_boosted": resp.PlaybookBoosted, "playbook_injected": resp.PlaybookInjected, }, StartTime: searchStart, EndTime: searchEnd, }) } ev := captureEvent("inbox-triggered-search", 9, ie.Coord, "inbox-burst", ie.Subject, query, 1, true, coord.PlaybookCorpus, resp) parsedJSON, _ := json.Marshal(parsed) ev.Note = fmt.Sprintf("inbox %s/%s from %s · LLM-parsed demand: %s", ie.Type, ie.Priority, ie.Sender, string(parsedJSON)) // 4. Judge re-rates top-1 against the ORIGINAL body. if len(resp.Results) > 0 { judgeStart := time.Now() rating := judgeInboxResult(hc, *ollama, *judgeModel, ie.Body, resp.Results[0]) ev.JudgeRating = rating if lf != nil && inboxTraceID != "" { lf.Span(ctx, langfuse.SpanInput{ TraceID: inboxTraceID, Name: "llm.judge_top1", Input: map[string]any{ "original_body": ie.Body, "top1_id": resp.Results[0].ID, "top1_corpus": resp.Results[0].Corpus, }, Output: map[string]any{"rating": rating}, StartTime: judgeStart, EndTime: time.Now(), }) } } output.Events = append(output.Events, ev) } // ── Phase 2: surge ────────────────────────────────────────── // Each coord's contract demand doubles. URGENT phrasing. log.Printf("[stress] phase 2: surge (2x demand, urgent phrasing)") for _, coord := range coords { c := assignments[coord.Name] for _, d := range c.Demand { q := buildQuery(c, d, 2) resp := must(matrixSearch(hc, *gateway, q, corpora, *k, true, coord.PlaybookCorpus)) ev := captureEvent("surge", 12, coord.Name, c.Name, d.Role, q, 2, true, coord.PlaybookCorpus, resp) output.Events = append(output.Events, ev) } } // ── Phase 2b: 200-worker swap (Hour 18) ────────────────────── // Alpha's client says "the 200 workers you placed are unavailable // — find replacements." We capture the top-K from the warehouse // query, then re-issue the same query with those IDs excluded. // Real product test: does the system find genuinely different // candidates, or does it sit on the same population? log.Printf("[stress] phase 2b: 200-worker swap (alpha warehouse — exclude originally placed)") warehouseDemand := contracts[0].Demand[0] // slot 0 is warehouse worker by contract design swapQuery := buildQuery(&contracts[0], warehouseDemand, 1) origResp := must(matrixSearch(hc, *gateway, swapQuery, corpora, *k, false, "")) placedIDs := make([]string, 0, len(origResp.Results)) for _, r := range origResp.Results { placedIDs = append(placedIDs, r.ID) } origEv := captureEvent("swap-original", 18, "alice", contracts[0].Name, warehouseDemand.Role, swapQuery, 1, false, "", origResp) origEv.Note = fmt.Sprintf("captured %d originally-placed worker IDs", len(placedIDs)) output.Events = append(output.Events, origEv) swapResp := must(matrixSearch(hc, *gateway, swapQuery, corpora, *k, false, "", placedIDs...)) swapEv := captureEvent("swap-replace", 18, "alice", contracts[0].Name, warehouseDemand.Role, swapQuery, 1, false, "", swapResp) swapEv.ExcludeIDs = placedIDs swapIDs := make([]string, 0, len(swapResp.Results)) for _, r := range swapResp.Results { swapIDs = append(swapIDs, r.ID) } swapJacc := jaccardStrings(placedIDs, swapIDs) swapEv.Note = fmt.Sprintf("Jaccard(orig, swap) = %.3f (lower = better; 0 = fully replaced)", swapJacc) output.Events = append(output.Events, swapEv) // ── Phase 3: merge — alpha + beta combined under alice ────── log.Printf("[stress] phase 3: merge (alpha + beta combined, alice handles)") mergedDemand := append(append([]Demand{}, contracts[0].Demand...), contracts[1].Demand...) for _, d := range mergedDemand { mergedC := &Contract{Name: contracts[0].Name + "+" + contracts[1].Name, Location: contracts[0].Location + " + " + contracts[1].Location, Shift: "shared"} q := buildQuery(mergedC, d, 1) resp := must(matrixSearch(hc, *gateway, q, corpora, *k, true, coords[0].PlaybookCorpus)) ev := captureEvent("merge", 24, "alice", mergedC.Name, d.Role, q, 1, true, coords[0].PlaybookCorpus, resp) output.Events = append(output.Events, ev) } // ── Phase 4: handover — bob takes alpha contract, USING ───── // alice's playbook namespace. Tests whether Alice's recordings // surface in Bob's results when Bob runs Alice's contract. log.Printf("[stress] phase 4: handover (bob takes alpha, using alice's playbook)") aliceRecordedAnswers := map[string]string{} // role → recorded answer id for _, ev := range output.Events { if ev.Phase == "baseline" && ev.Coordinator == "alice" && len(ev.TopK) > 0 { aliceRecordedAnswers[ev.Role] = ev.TopK[0].ID } } handoverHitsTop1 := 0 handoverHitsTopK := 0 handoverRun := 0 for _, d := range contracts[0].Demand { q := buildQuery(&contracts[0], d, 1) resp := must(matrixSearch(hc, *gateway, q, corpora, *k, true, coords[0].PlaybookCorpus)) ev := captureEvent("handover", 30, "bob", contracts[0].Name, d.Role, q, 1, true, coords[0].PlaybookCorpus, resp) output.Events = append(output.Events, ev) handoverRun++ recordedID, ok := aliceRecordedAnswers[d.Role] if !ok { continue } if len(ev.TopK) > 0 && ev.TopK[0].ID == recordedID { handoverHitsTop1++ handoverHitsTopK++ } else { for _, r := range ev.TopK { if r.ID == recordedID { handoverHitsTopK++ break } } } } output.Learning.HandoverQueriesRun = handoverRun output.Learning.RecordedAnswersTop1Count = handoverHitsTop1 output.Learning.RecordedAnswersTopKCount = handoverHitsTopK if handoverRun > 0 { output.Learning.HandoverHitRate = float64(handoverHitsTop1) / float64(handoverRun) } // ── Phase 4b: paraphrase handover ─────────────────────────── // Bob runs PARAPHRASED versions of Alice's queries against // Alice's playbook. The verbatim handover above is the trivial // case (identical queries → identical retrieval → playbook // boost). The paraphrase handover is the real test: did Alice's // institutional memory survive the wording change Bob would // naturally introduce? if *withParaphraseHandover { log.Printf("[stress] phase 4b: paraphrase handover (bob runs paraphrased versions of alice's queries)") pHandoverRun := 0 pTop1 := 0 pTopK := 0 for _, d := range contracts[0].Demand { origQuery := buildQuery(&contracts[0], d, 1) paraphrase, err := generateParaphrase(hc, *ollama, *judgeModel, origQuery) if err != nil { log.Printf(" paraphrase gen failed for %s: %v", d.Role, err) continue } resp, err := matrixSearch(hc, *gateway, paraphrase, corpora, *k, true, coords[0].PlaybookCorpus) if err != nil { log.Printf(" paraphrase search failed for %s: %v", d.Role, err) continue } ev := captureEvent("handover-paraphrase", 36, "bob", contracts[0].Name, d.Role, paraphrase, 1, true, coords[0].PlaybookCorpus, resp) ev.Note = "paraphrase of: " + origQuery output.Events = append(output.Events, ev) pHandoverRun++ recordedID, ok := aliceRecordedAnswers[d.Role] if !ok { continue } if len(ev.TopK) > 0 && ev.TopK[0].ID == recordedID { pTop1++ pTopK++ } else { for _, r := range ev.TopK { if r.ID == recordedID { pTopK++ break } } } } output.Learning.ParaphraseHandoverRun = pHandoverRun output.Learning.ParaphraseTop1Count = pTop1 output.Learning.ParaphraseTopKCount = pTopK if pHandoverRun > 0 { output.Learning.ParaphraseHandoverHitRate = float64(pTop1) / float64(pHandoverRun) } } // ── Phase 5: split — surge re-distributed across 3 coords ── log.Printf("[stress] phase 5: split (alpha surge spread across all 3 coords)") for i, d := range contracts[0].Demand { coord := coords[i%len(coords)] c := &contracts[0] q := buildQuery(c, d, 2) resp := must(matrixSearch(hc, *gateway, q, corpora, *k, true, coord.PlaybookCorpus)) ev := captureEvent("split", 42, coord.Name, c.Name+"-share-"+coord.Name, d.Role, q, 2, true, coord.PlaybookCorpus, resp) output.Events = append(output.Events, ev) } // ── Phase 6: non-determinism check ───────────────────────── // Reissue each baseline query once and compare top-K Jaccard. log.Printf("[stress] phase 6: non-determinism (reissue baselines, measure Jaccard)") jaccards := []float64{} for _, ev := range output.Events { if ev.Phase != "baseline" { continue } resp := must(matrixSearch(hc, *gateway, ev.Query, corpora, *k, false, "")) // playbook OFF for reissue to isolate retrieval stability reissue := captureEvent("reissue", 48, ev.Coordinator, ev.Contract, ev.Role, ev.Query, 1, false, "", resp) output.Events = append(output.Events, reissue) // Compare against ev.TopK (also playbook-on baseline). Note: // this conflates retrieval stability with playbook stability. // We capture both ev (playbook on) and a fresh retrieval (off); // real determinism = retrieval-only top-K comparison. freshRetrievalResp := must(matrixSearch(hc, *gateway, ev.Query, corpora, *k, false, "")) freshRetrievalEv := captureEvent("reissue-retrieval-only", 48, ev.Coordinator, ev.Contract, ev.Role, ev.Query, 1, false, "", freshRetrievalResp) j := jaccardTopK(reissue.TopK, freshRetrievalEv.TopK) jaccards = append(jaccards, j) } output.Determinism.NumReissuedPairs = len(jaccards) output.Determinism.MeanJaccard = mean(jaccards) // ── Phase 7: diversity analysis ───────────────────────────── log.Printf("[stress] phase 7: diversity analysis") output.Diversity = computeDiversity(output.Events) // ── write ─────────────────────────────────────────────────── if err := os.MkdirAll(filepath.Dir(*out), 0o755); err != nil { log.Fatalf("mkdir: %v", err) } bs, _ := json.MarshalIndent(output, "", " ") if err := os.WriteFile(*out, bs, 0o644); err != nil { log.Fatalf("write %s: %v", *out, err) } log.Printf("[stress] DONE — events=%d", len(output.Events)) log.Printf("[stress] diversity: same-role-across-contracts mean Jaccard = %.3f (n=%d)", output.Diversity.SameRoleAcrossContractsMeanJaccard, output.Diversity.NumPairsSameRoleAcrossContracts) log.Printf("[stress] different-roles-same-contract mean Jaccard = %.3f (n=%d)", output.Diversity.DifferentRolesSameContractMeanJaccard, output.Diversity.NumPairsDifferentRolesSameContract) log.Printf("[stress] determinism: mean Jaccard on reissue = %.3f (n=%d)", output.Determinism.MeanJaccard, output.Determinism.NumReissuedPairs) log.Printf("[stress] learning verbatim: handover hit rate (top-1) = %d/%d = %.0f%%", output.Learning.RecordedAnswersTop1Count, output.Learning.HandoverQueriesRun, output.Learning.HandoverHitRate*100) if output.Learning.ParaphraseHandoverRun > 0 { log.Printf("[stress] learning paraphrase: handover hit rate (top-1) = %d/%d = %.0f%% (top-K = %d/%d)", output.Learning.ParaphraseTop1Count, output.Learning.ParaphraseHandoverRun, output.Learning.ParaphraseHandoverHitRate*100, output.Learning.ParaphraseTopKCount, output.Learning.ParaphraseHandoverRun) } log.Printf("[stress] results → %s", *out) } // generateParaphrase asks the judge model to rephrase a staffing query // while preserving intent — same prompt template as // scripts/playbook_lift/main.go, kept here as a copy to avoid a shared // internal package for two scripts. If callers ever need a third // paraphraser, lift this into internal/paraphrase/. func generateParaphrase(hc *http.Client, ollamaURL, model, query string) (string, error) { system := `You rephrase staffing queries while preserving intent. Output JSON only: {"paraphrase": ""}. Rules: - Keep the same role, certifications, geography, and constraints. - Vary the wording (synonyms, reordered clauses, different sentence shape). - Do NOT add or remove requirements. - Do NOT explain — just emit the JSON.` body, _ := json.Marshal(map[string]any{ "model": model, "stream": false, "format": "json", "messages": []map[string]string{ {"role": "system", "content": system}, {"role": "user", "content": query}, }, "options": map[string]any{"temperature": 0.5}, }) req, _ := http.NewRequest("POST", ollamaURL+"/api/chat", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") resp, err := hc.Do(req) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode/100 != 2 { return "", fmt.Errorf("ollama chat: HTTP %d", resp.StatusCode) } rb, _ := io.ReadAll(resp.Body) var ollamaResp struct { Message struct { Content string `json:"content"` } `json:"message"` } if err := json.Unmarshal(rb, &ollamaResp); err != nil { return "", err } var out struct { Paraphrase string `json:"paraphrase"` } if err := json.Unmarshal([]byte(ollamaResp.Message.Content), &out); err != nil { return "", fmt.Errorf("decode paraphrase: %w (content=%q)", err, ollamaResp.Message.Content) } if strings.TrimSpace(out.Paraphrase) == "" { return "", fmt.Errorf("empty paraphrase (content=%q)", ollamaResp.Message.Content) } return out.Paraphrase, nil } // ── helpers ────────────────────────────────────────────────────── func loadContracts(dir string) ([]Contract, error) { files, err := filepath.Glob(filepath.Join(dir, "contract_*.json")) if err != nil { return nil, err } if len(files) == 0 { return nil, fmt.Errorf("no contract_*.json files in %s", dir) } var out []Contract for _, f := range files { bs, err := os.ReadFile(f) if err != nil { return nil, err } var c Contract if err := json.Unmarshal(bs, &c); err != nil { return nil, fmt.Errorf("%s: %w", f, err) } out = append(out, c) } return out, nil } func buildQuery(c *Contract, d Demand, surge int) string { var b strings.Builder if surge > 1 { b.WriteString(fmt.Sprintf("URGENT: need %d ", d.Count*surge)) } else { b.WriteString(fmt.Sprintf("Need %d ", d.Count)) } b.WriteString(d.Role) if c.Location != "" { b.WriteString(" for ") b.WriteString(c.Location) } if c.Shift != "" { b.WriteString(", ") b.WriteString(c.Shift) b.WriteString(" shift") } if len(d.Certs) > 0 { b.WriteString(", certifications: ") b.WriteString(strings.Join(d.Certs, ", ")) } if len(d.Skills) > 0 { b.WriteString(", skills: ") b.WriteString(strings.Join(d.Skills, ", ")) } return b.String() } func captureEvent(phase string, hour int, coord, contract, role, query string, surge int, usePlaybook bool, pbCorpus string, resp *matrixResp) Event { topK := make([]ResultRef, 0, len(resp.Results)) for i, r := range resp.Results { topK = append(topK, ResultRef{Rank: i, ID: r.ID, Corpus: r.Corpus, Distance: r.Distance}) } return Event{ Phase: phase, Hour: hour, Coordinator: coord, Contract: contract, Role: role, Query: query, SurgeMultiplier: surge, UsePlaybook: usePlaybook, PlaybookCorpus: pbCorpus, TopK: topK, PerCorpusCounts: resp.PerCorpusCounts, PlaybookBoosted: resp.PlaybookBoosted, PlaybookInjected: resp.PlaybookInjected, TimestampUnixNano: time.Now().UnixNano(), } } func computeDiversity(events []Event) Diversity { // Filter to baseline events for clean apples-to-apples. type key struct{ contract, role string } byKey := map[key][]string{} for _, ev := range events { if ev.Phase != "baseline" { continue } k := key{ev.Contract, ev.Role} ids := make([]string, len(ev.TopK)) for i, r := range ev.TopK { ids[i] = r.ID } byKey[k] = ids } // Same role across contracts: same `role`, different `contract`. rolesSeen := map[string][][]string{} contractsSeen := map[string][]struct { role string ids []string }{} for k, ids := range byKey { rolesSeen[k.role] = append(rolesSeen[k.role], ids) contractsSeen[k.contract] = append(contractsSeen[k.contract], struct { role string ids []string }{k.role, ids}) } var ( sameRoleJacc []float64 diffRolesJacc []float64 ) // Same-role-across-contracts: each role's idsSet pair-wise. for _, idsList := range rolesSeen { for i := 0; i < len(idsList); i++ { for j := i + 1; j < len(idsList); j++ { sameRoleJacc = append(sameRoleJacc, jaccardStrings(idsList[i], idsList[j])) } } } // Different-roles-same-contract. for _, items := range contractsSeen { for i := 0; i < len(items); i++ { for j := i + 1; j < len(items); j++ { if items[i].role == items[j].role { continue } diffRolesJacc = append(diffRolesJacc, jaccardStrings(items[i].ids, items[j].ids)) } } } return Diversity{ SameRoleAcrossContractsMeanJaccard: mean(sameRoleJacc), DifferentRolesSameContractMeanJaccard: mean(diffRolesJacc), NumPairsSameRoleAcrossContracts: len(sameRoleJacc), NumPairsDifferentRolesSameContract: len(diffRolesJacc), } } func jaccardTopK(a, b []ResultRef) float64 { aIDs := make([]string, len(a)) bIDs := make([]string, len(b)) for i, r := range a { aIDs[i] = r.ID } for i, r := range b { bIDs[i] = r.ID } return jaccardStrings(aIDs, bIDs) } func jaccardStrings(a, b []string) float64 { if len(a) == 0 && len(b) == 0 { return 1.0 } setA := map[string]bool{} for _, x := range a { setA[x] = true } intersect := 0 for _, x := range b { if setA[x] { intersect++ } } union := len(setA) for _, x := range b { if !setA[x] { union++ } } if union == 0 { return 0 } return float64(intersect) / float64(union) } func mean(xs []float64) float64 { if len(xs) == 0 { return 0 } s := 0.0 for _, x := range xs { s += x } return s / float64(len(xs)) } // ── HTTP helpers ───────────────────────────────────────────────── func matrixSearch(hc *http.Client, gw, query string, corpora []string, k int, usePlaybook bool, playbookCorpus string, excludeIDs ...string) (*matrixResp, error) { body, _ := json.Marshal(matrixSearchReq{ QueryText: query, Corpora: corpora, K: k, UsePlaybook: usePlaybook, PlaybookCorpus: playbookCorpus, ExcludeIDs: excludeIDs, }) req, _ := http.NewRequest("POST", gw+"/v1/matrix/search", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") resp, err := hc.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode/100 != 2 { rb, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("matrix.search %d: %s", resp.StatusCode, string(rb)) } var out matrixResp if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { return nil, err } return &out, nil } // ensureFreshIndex creates the fresh_workers vectord index if it // doesn't exist yet. Idempotent — re-creating returns 409 which we // treat as "already there." Two-tier search pattern: fresh content // goes to a small "hot" index and the search merges it with the // main workers index. Solves the HNSW post-build add recall issue // surfaced in runs #003-#005 (incremental adds to a 5K+ HNSW graph // can land in poorly-connected regions and disappear from search; // a small hot index has no such crowding). func ensureFreshIndex(hc *http.Client, gw, indexName string, dim int) error { body, _ := json.Marshal(map[string]any{ "name": indexName, "dimension": dim, "distance": "cosine", }) req, _ := http.NewRequest("POST", gw+"/v1/vectors/index", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") resp, err := hc.Do(req) if err != nil { return fmt.Errorf("create index: %w", err) } defer resp.Body.Close() if resp.StatusCode == http.StatusConflict || resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusCreated { return nil } rb, _ := io.ReadAll(resp.Body) return fmt.Errorf("create index %d: %s", resp.StatusCode, string(rb)) } // ingestFreshWorker embeds + adds a single fresh worker to the // given vectord index. Two HTTP hops via the gateway: /v1/embed for // the vector, /v1/vectors/index//add to insert. The idx // parameter exists so callers can target a separate hot index // (fresh_workers) rather than the main 5K-item workers index, where // HNSW post-build recall is unreliable. func ingestFreshWorker(hc *http.Client, gw, idx, id, text string, metadata map[string]any) error { embedBs, _ := json.Marshal(map[string]any{ "texts": []string{text}, "model": "nomic-embed-text-v2-moe", }) req, _ := http.NewRequest("POST", gw+"/v1/embed", bytes.NewReader(embedBs)) req.Header.Set("Content-Type", "application/json") resp, err := hc.Do(req) if err != nil { return fmt.Errorf("embed: %w", err) } defer resp.Body.Close() if resp.StatusCode/100 != 2 { rb, _ := io.ReadAll(resp.Body) return fmt.Errorf("embed %d: %s", resp.StatusCode, string(rb)) } var er struct { Vectors [][]float32 `json:"vectors"` } if err := json.NewDecoder(resp.Body).Decode(&er); err != nil { return fmt.Errorf("decode embed: %w", err) } if len(er.Vectors) == 0 || len(er.Vectors[0]) == 0 { return fmt.Errorf("embed returned no vectors") } metaBs, _ := json.Marshal(metadata) addBs, _ := json.Marshal(map[string]any{ "items": []map[string]any{ {"id": id, "vector": er.Vectors[0], "metadata": json.RawMessage(metaBs)}, }, }) req2, _ := http.NewRequest("POST", gw+"/v1/vectors/index/"+idx+"/add", bytes.NewReader(addBs)) req2.Header.Set("Content-Type", "application/json") resp2, err := hc.Do(req2) if err != nil { return fmt.Errorf("vectord add: %w", err) } defer resp2.Body.Close() if resp2.StatusCode/100 != 2 { rb, _ := io.ReadAll(resp2.Body) return fmt.Errorf("vectord add %d: %s", resp2.StatusCode, string(rb)) } return nil } type langfuseEnvCreds struct { URL string PublicKey string SecretKey string } // loadLangfuseEnv parses a key=value env file (one assignment per // non-comment line) and pulls LANGFUSE_URL, LANGFUSE_PUBLIC_KEY, // LANGFUSE_SECRET_KEY. All three required; missing any returns an // error so callers can skip-with-warning rather than silently // run without tracing. func loadLangfuseEnv(path string) (*langfuseEnvCreds, error) { f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() creds := &langfuseEnvCreds{} sc := bufio.NewScanner(f) for sc.Scan() { line := strings.TrimSpace(sc.Text()) if line == "" || strings.HasPrefix(line, "#") { continue } eq := strings.IndexByte(line, '=') if eq < 0 { continue } k, v := strings.TrimSpace(line[:eq]), strings.TrimSpace(line[eq+1:]) switch k { case "LANGFUSE_URL": creds.URL = v case "LANGFUSE_PUBLIC_KEY": creds.PublicKey = v case "LANGFUSE_SECRET_KEY": creds.SecretKey = v } } if err := sc.Err(); err != nil { return nil, err } if creds.URL == "" || creds.PublicKey == "" || creds.SecretKey == "" { return nil, fmt.Errorf("langfuse env missing one of URL/PUBLIC_KEY/SECRET_KEY") } return creds, nil } func firstDistance(results []matrixResult) float32 { if len(results) == 0 { return 0 } return results[0].Distance } // parsedDemand is the LLM-extracted structure from an inbox message // body — what a real coordinator would type into a search form. // Empty fields are honest: the body didn't say. type parsedDemand struct { Role string `json:"role"` Count int `json:"count"` Location string `json:"location"` Certs []string `json:"certs"` Skills []string `json:"skills"` Shift string `json:"shift"` } // AsQuery composes a matrix.search query string from the parsed // fields. Mirrors buildQuery's shape so search-time semantics match // what the contract-driven phases produce. Empty fields are skipped // rather than emitted as "" markers. func (p parsedDemand) AsQuery() string { var b strings.Builder if p.Count > 0 { fmt.Fprintf(&b, "Need %d ", p.Count) } else { b.WriteString("Need ") } b.WriteString(p.Role) if p.Location != "" { b.WriteString(" for ") b.WriteString(p.Location) } if p.Shift != "" { b.WriteString(", ") b.WriteString(p.Shift) b.WriteString(" shift") } if len(p.Certs) > 0 { b.WriteString(", certifications: ") b.WriteString(strings.Join(p.Certs, ", ")) } if len(p.Skills) > 0 { b.WriteString(", skills: ") b.WriteString(strings.Join(p.Skills, ", ")) } return b.String() } // judgeInboxResult rates the top retrieval against the ORIGINAL inbox // body. Returns 1-5 (5 = perfect match, 1 = irrelevant); 0 on any // error. Real product driver: a tight-distance result on a // LLM-parsed query may still be wrong-domain (parser dropped a // critical constraint, or the corpus genuinely has no match). The // rating gives coordinators an honest "this is close in vector // space but doesn't actually fit your ask" signal. func judgeInboxResult(hc *http.Client, ollamaURL, model, inboxBody string, top matrixResult) int { system := `You rate retrieval results for a staffing co-pilot. Rate the result 1-5 against the original inbox request: 5 = perfect match (this person/role IS what was asked for) 4 = strong match (right field, right level, minor mismatches) 3 = adjacent match (related field or partial overlap) 2 = weak/tangential match 1 = irrelevant Output JSON only: {"rating": N, "reason": ""}.` user := fmt.Sprintf("Original inbox request:\n%s\n\nResult corpus: %s\nResult ID: %s\nResult metadata:\n%s", inboxBody, top.Corpus, top.ID, string(top.Metadata)) body, _ := json.Marshal(map[string]any{ "model": model, "stream": false, "format": "json", "messages": []map[string]string{ {"role": "system", "content": system}, {"role": "user", "content": user}, }, "options": map[string]any{"temperature": 0}, }) req, _ := http.NewRequest("POST", ollamaURL+"/api/chat", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") resp, err := hc.Do(req) if err != nil { return 0 } defer resp.Body.Close() if resp.StatusCode/100 != 2 { return 0 } rb, _ := io.ReadAll(resp.Body) var ollamaResp struct { Message struct { Content string `json:"content"` } `json:"message"` } if err := json.Unmarshal(rb, &ollamaResp); err != nil { return 0 } var v struct { Rating int `json:"rating"` } if err := json.Unmarshal([]byte(ollamaResp.Message.Content), &v); err != nil { return 0 } if v.Rating < 1 || v.Rating > 5 { return 0 } return v.Rating } // parseInboxDemand asks the judge model to extract structured fields // from an inbox body. Same Ollama+JSON-format pattern as the // generateParaphrase function. Real production would have a dedicated // small model fine-tuned on staffing-language inbox parsing; here we // use the same model that judges relevance. temperature=0 for // deterministic extraction. func parseInboxDemand(hc *http.Client, ollamaURL, model, inboxBody string) (*parsedDemand, error) { system := `You parse staffing requests from email/SMS bodies. Extract structured fields. Output JSON ONLY, this exact shape: {"role": "...", "count": N, "location": "...", "certs": [...], "skills": [...], "shift": "..."}. Rules: - role: the job role being requested (lowercase, e.g. "warehouse worker", "forklift operator") - count: number of workers needed (integer; if "a few" or unspecified, use 1) - location: city + state if both mentioned (e.g. "Cleveland, OH"); city only if state missing - certs: certification list as named in the body (e.g. ["OSHA-30", "forklift cert"]) - skills: skill list as named in the body (e.g. ["pallet jack", "spanish"]) - shift: "day"|"swing"|"night" if mentioned, else "" - If a field isn't in the body, use empty string or empty array (never null) - Do NOT explain — emit the JSON only.` body, _ := json.Marshal(map[string]any{ "model": model, "stream": false, "format": "json", "messages": []map[string]string{ {"role": "system", "content": system}, {"role": "user", "content": inboxBody}, }, "options": map[string]any{"temperature": 0}, }) req, _ := http.NewRequest("POST", ollamaURL+"/api/chat", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") resp, err := hc.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode/100 != 2 { return nil, fmt.Errorf("ollama chat: HTTP %d", resp.StatusCode) } rb, _ := io.ReadAll(resp.Body) var ollamaResp struct { Message struct { Content string `json:"content"` } `json:"message"` } if err := json.Unmarshal(rb, &ollamaResp); err != nil { return nil, err } var out parsedDemand if err := json.Unmarshal([]byte(ollamaResp.Message.Content), &out); err != nil { return nil, fmt.Errorf("decode parsed demand: %w (content=%q)", err, ollamaResp.Message.Content) } if strings.TrimSpace(out.Role) == "" { return nil, fmt.Errorf("parsed demand has empty role (content=%q)", ollamaResp.Message.Content) } return &out, nil } // postInbox sends an inbox message (email or SMS) to observerd via // the gateway. observerd records it as an ObservedOp with // Source=SourceInbox; downstream actions (search, ingest, etc.) are // the caller's concern. func postInbox(hc *http.Client, gw, msgType, sender, subject, body, priority, tag string) error { bodyJSON, _ := json.Marshal(map[string]any{ "type": msgType, "sender": sender, "subject": subject, "body": body, "priority": priority, "tag": tag, }) req, _ := http.NewRequest("POST", gw+"/v1/observer/inbox", bytes.NewReader(bodyJSON)) req.Header.Set("Content-Type", "application/json") resp, err := hc.Do(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode/100 != 2 { rb, _ := io.ReadAll(resp.Body) return fmt.Errorf("inbox %d: %s", resp.StatusCode, string(rb)) } return nil } // coordByName looks up a coordinator by name. Used by inbox-triggered // searches that route based on the email's tagged coordinator. func coordByName(coords []Coordinator, name string) Coordinator { for _, c := range coords { if c.Name == name { return c } } return coords[0] } func playbookRecord(hc *http.Client, gw, query, answerID, answerCorpus string, score float64, corpus string) error { body, _ := json.Marshal(map[string]any{ "query_text": query, "answer_id": answerID, "answer_corpus": answerCorpus, "score": score, "tags": []string{"multi-coord-stress"}, "corpus": corpus, }) req, _ := http.NewRequest("POST", gw+"/v1/matrix/playbooks/record", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") resp, err := hc.Do(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode/100 != 2 { rb, _ := io.ReadAll(resp.Body) return fmt.Errorf("playbook record %d: %s", resp.StatusCode, string(rb)) } return nil } func must[T any](v T, err error) T { if err != nil { log.Fatalf("[stress] %v", err) } return v }