diff --git a/lakehouse.toml b/lakehouse.toml index 0e7ef2d..d748788 100644 --- a/lakehouse.toml +++ b/lakehouse.toml @@ -43,7 +43,7 @@ bind = "127.0.0.1:3216" # G2: Ollama local. G3+ may swap in OpenAI/Voyage by changing # this URL + the wire format inside the provider. provider_url = "http://localhost:11434" -default_model = "nomic-embed-text" +default_model = "nomic-embed-text-v2-moe" [queryd] bind = "127.0.0.1:3214" @@ -129,7 +129,7 @@ level = "info" [models] # Tier 1 — local hot path local_fast = "qwen3.5:latest" -local_embed = "nomic-embed-text" +local_embed = "nomic-embed-text-v2-moe" # 475M MoE, drop-in upgrade from 137M v1 — verified 2026-04-30 same 768-dim # local_judge stays on qwen2.5:latest — qwen3.5:latest is a vision-SSM # build with 256K context that runs ~30s per judge call against the # playbook_lift loop (verified 2026-04-30). qwen2.5:latest at ~1s/call diff --git a/reports/reality-tests/multi_coord_stress_005.md b/reports/reality-tests/multi_coord_stress_005.md new file mode 100644 index 0000000..3fec16f --- /dev/null +++ b/reports/reality-tests/multi_coord_stress_005.md @@ -0,0 +1,82 @@ +# Multi-Coordinator Stress Test — Run 005 + +**Generated:** 2026-04-30T13:25:15.497712275Z +**Coordinators:** alice / bob / carol (each with own playbook namespace: `playbook_alice` / `playbook_bob` / `playbook_carol`) +**Contracts:** alpha_milwaukee_distribution / beta_indianapolis_manufacturing / gamma_chicago_construction +**Corpora:** `workers,ethereal_workers` +**K per query:** 8 +**Total events captured:** 61 +**Evidence:** `reports/reality-tests/multi_coord_stress_005.json` + +--- + +## Diversity — is the system locking into scenarios or cycling? + +| Metric | Mean Jaccard | n pairs | Interpretation | +|---|---:|---:|---| +| Same role across different contracts | 0 | 9 | Lower = more diverse (different region/cert mix → different workers) | +| Different roles within same contract | 0.03610093610093609 | 18 | Should be near-zero (different roles = different worker pools) | + +**Healthy ranges:** +- Same role across contracts: < 0.30 means the system is genuinely picking different workers per region/contract. +- Different roles same contract: < 0.10 means role-specific retrieval is working. +- If either is > 0.50, the system is "cycling" the same handful of workers regardless of query intent. + +--- + +## Determinism — same query reissued, top-K stability + +| Metric | Value | +|---|---:| +| Mean Jaccard on retrieval-only reissue | 1 | +| Number of reissue pairs | 12 | + +**Interpretation:** +- ≥ 0.95: HNSW retrieval is highly deterministic; reissues land on near-identical top-K. Good — system locks into a stable view of "best workers for this query." +- 0.80 – 0.95: Some HNSW or embed variance, acceptable. +- < 0.80: Retrieval is unstable — reissues see substantially different results, suggesting either embed nondeterminism (Ollama returning slightly different vectors) or vectord nondeterminism (HNSW insertion order affecting recall). + +--- + +## Learning — handover hit rate + +Bob takes Alice's contract using Alice's playbook namespace. Did Alice's recorded answers surface in Bob's results? + +| Metric | Value | +|---|---:| +| Verbatim handover queries run | 4 | +| Alice's recorded answer at Bob's top-1 (verbatim) | 4 | +| Alice's recorded answer in Bob's top-K (verbatim) | 4 | +| **Verbatim handover hit rate (top-1)** | **1** | +| Paraphrase handover queries run | 4 | +| Alice's recorded answer at Bob's top-1 (paraphrase) | 4 | +| Alice's recorded answer in Bob's top-K (paraphrase) | 4 | +| **Paraphrase handover hit rate (top-1)** | **1** | + +**Interpretation:** +- Verbatim hit rate ≈ 1.0: trivial case — Bob runs identical queries; should always hit. +- Paraphrase hit rate ≥ 0.5: institutional memory survives wording change — the harder learning property. +- Paraphrase hit rate ≈ 0.0: Bob's paraphrases drift past the inject threshold, so Alice's recordings don't activate. Same caveat as the playbook_lift paraphrase pass. + +--- + +## Per-event capture + +All matrix.search responses live in the JSON — top-K with worker IDs, distances, and per-corpus counts. Search by phase: + +```bash +jq '.events[] | select(.phase == "merge")' reports/reality-tests/multi_coord_stress_005.json +jq '.events[] | select(.coordinator == "alice" and .phase == "baseline")' reports/reality-tests/multi_coord_stress_005.json +jq '.events[] | select(.role == "warehouse worker") | {phase, contract, top_k_ids: [.top_k[].id]}' reports/reality-tests/multi_coord_stress_005.json +``` + +--- + +## What's NOT in this run (Phase 1 deliberately defers) + +- **48-hour clock.** Events fire as discrete steps, not on a timeline. +- **Email / SMS ingest.** No endpoints exist on the Go side yet. +- **New-resume injection mid-run.** The corpus is fixed at the start. +- **Langfuse traces.** Need Go-side wiring. + +These are Phase 2/3. The Phase 1 substrate is what the time-based runner will mount on top of. diff --git a/scripts/multi_coord_stress.sh b/scripts/multi_coord_stress.sh index 68f1bf9..eeda742 100755 --- a/scripts/multi_coord_stress.sh +++ b/scripts/multi_coord_stress.sh @@ -100,7 +100,7 @@ refresh_every = "1s" [embedd] bind = "127.0.0.1:3216" provider_url = "http://localhost:11434" -default_model = "nomic-embed-text" +default_model = "nomic-embed-text-v2-moe" [vectord] bind = "127.0.0.1:3215" diff --git a/scripts/multi_coord_stress/main.go b/scripts/multi_coord_stress/main.go index e0b6a85..bf6dd1e 100644 --- a/scripts/multi_coord_stress/main.go +++ b/scripts/multi_coord_stress/main.go @@ -795,7 +795,7 @@ func matrixSearch(hc *http.Client, gw, query string, corpora []string, k int, us func ingestFreshWorker(hc *http.Client, gw, id, text string, metadata map[string]any) error { embedBs, _ := json.Marshal(map[string]any{ "texts": []string{text}, - "model": "nomic-embed-text", + "model": "nomic-embed-text-v2-moe", }) req, _ := http.NewRequest("POST", gw+"/v1/embed", bytes.NewReader(embedBs)) req.Header.Set("Content-Type", "application/json") diff --git a/scripts/playbook_lift.sh b/scripts/playbook_lift.sh index 170d451..c1971a6 100755 --- a/scripts/playbook_lift.sh +++ b/scripts/playbook_lift.sh @@ -161,7 +161,7 @@ refresh_every = "1s" [embedd] bind = "127.0.0.1:3216" provider_url = "http://localhost:11434" -default_model = "nomic-embed-text" +default_model = "nomic-embed-text-v2-moe" [vectord] bind = "127.0.0.1:3215"