scrum: unified matrix retriever — pull from ALL relevant KB corpora, not just pathway memory

Per J 2026-04-25 architectural correction: matrix index is the vector indexing layer for the WHOLE knowledge base (distilled facts, procedures, config hints, team runs, playbooks, pathway successes), not a single narrow store. Built fetchMatrixContext(query, taskClass, filePath) that: - Queries multiple persistent vector indexes in parallel via /vectors/search - Collects hits per corpus + score + doc_id + 400-char excerpt - Pulls pathway successes via existing helper, mapped to MatrixHit shape - Sorts by score across corpora, returns top-N (default 8) - Reports per-corpus hit counts + errors for transparency Per-task-class corpus list (MATRIX_CORPORA_FOR_TASK): scrum_review → distilled_factual, distilled_procedural, distilled_config_hint, kb_team_runs_v1 (staffing data deliberately excluded — not relevant to code review) Probed live: distilled_config_hint top hit = 0.52, distilled_procedural top = 0.49, kb_team_runs top = 0.59. Real signal across corpora. Replaces the narrow proven-approaches preamble with a unified MATRIX-INDEXED CONTEXT preamble tagged with source_corpus per chunk so the model knows what kind of context it's seeing. LH_SCRUM_MATRIX_RETRIEVE=0 still disables for A/B testing. Future: promote to a Rust /v1/matrix endpoint once corpora list and ranking logic stabilize. For now TS lets us iterate fast against the live matrix without gateway restarts. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 18:29:08 -05:00 · 2026-04-25 18:29:08 -05:00 · a496ced848
commit a496ced848
parent d187bcd8ac
1 changed files with 230 additions and 1 deletions
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@ -363,6 +363,215 @@ async function recordPathwayReplay(pathwayId: string, succeeded: boolean): Promi
 // dependency. Pipeline keeps moving when the observer is down.
 const OBSERVER_URL = process.env.LH_OBSERVER_URL ?? "http://localhost:3800";

+// Matrix retrieval — pulls proven-success pathways for this task class
+// + file area and prepends them as a "📖 PROVEN APPROACHES" preamble.
+// First time the matrix index is actually USED to route work (vs being
+// written to). LH_SCRUM_MATRIX_RETRIEVE=0 disables for A/B comparison.
+//
+// The proof J asked for: does loading prior successes change scrum
+// behavior? Run with retrieval ON and OFF on the same target — if
+// reviews differ measurably, the matrix has signal. If identical,
+// the writes are noise and need a different design.
+const MATRIX_RETRIEVE = process.env.LH_SCRUM_MATRIX_RETRIEVE !== "0";
+const PATHWAY_STATE_PATH = "/home/profit/lakehouse/data/_pathway_memory/state.json";
+
+interface ProvenApproach {
+  pathway_id: string;
+  file_path: string;
+  accepted_model: string;
+  accepted_attempt: number;
+  kb_sources: string[];   // top-3 KB chunk source_doc names
+  summary_excerpt: string; // first 400 chars of reducer_summary
+  created_at: string;
+}
+
+async function fetchProvenApproaches(
+  taskClass: string,
+  filePath: string,
+  signalClass: string | null,
+  limit = 3,
+): Promise<ProvenApproach[]> {
+  if (!MATRIX_RETRIEVE) return [];
+  try {
+    const f = Bun.file(PATHWAY_STATE_PATH);
+    if (!(await f.exists())) return [];
+    const state = JSON.parse(await f.text());
+    const pathways = state.pathways ?? {};
+
+    // Match by narrow fingerprint: same task_class + same file prefix
+    // (first 2 path segments, e.g. "crates/queryd"). signal_class match
+    // is preferred but not required — broader matches still inform.
+    const filePrefix = filePath.split("/").slice(0, 2).join("/");
+    const matched: any[] = [];
+    for (const traces of Object.values(pathways) as any[][]) {
+      for (const t of traces) {
+        if (t.task_class !== taskClass) continue;
+        if (!t.file_path?.startsWith(filePrefix)) continue;
+        if (t.final_verdict !== "accepted") continue;
+        if (t.retired) continue;
+        matched.push(t);
+      }
+    }
+    // Most recent first
+    matched.sort((a, b) => (b.created_at ?? "").localeCompare(a.created_at ?? ""));
+    return matched.slice(0, limit).map(t => {
+      const acceptedAttempt = (t.ladder_attempts ?? []).find((a: any) => a.accepted)
+        ?? { model: "unknown", rung: 0 };
+      const sources = (t.kb_chunks ?? [])
+        .slice(0, 3)
+        .map((c: any) => c.source_doc ?? "?");
+      return {
+        pathway_id: t.pathway_id,
+        file_path: t.file_path,
+        accepted_model: acceptedAttempt.model,
+        accepted_attempt: acceptedAttempt.rung,
+        kb_sources: [...new Set<string>(sources)],
+        summary_excerpt: (t.reducer_summary ?? "").slice(0, 400),
+        created_at: t.created_at ?? "",
+      };
+    });
+  } catch (e: any) {
+    console.error(`[scrum] matrix retrieval failed: ${e.message}`);
+    return [];
+  }
+}
+
+// Unified matrix retriever — pulls from ALL relevant corpora at once.
+// Per J 2026-04-25: matrix is the vector indexing layer for the whole KB,
+// not just pathway memory. Returns combined top-N ranked across corpora.
+//
+// Smoke-test goal: prove the matrix surfaces relevant context from MULTIPLE
+// indexed sources (distilled facts/procedures/config-hints + team runs +
+// playbook memory + pathway successes), not just one slice.
+//
+// Per-corpus configuration. Add an entry to query a new corpus. Limited
+// to indexes that actually contain code-review-relevant context — staffing
+// data (workers_500k_*, resumes_*) is excluded by design.
+const MATRIX_CORPORA_FOR_TASK: Record<string, string[]> = {
+  scrum_review: [
+    "distilled_factual_v20260423095819",
+    "distilled_procedural_v20260423102847",
+    "distilled_config_hint_v20260423102847",
+    "kb_team_runs_v1",
+  ],
+};
+
+interface MatrixHit {
+  source_corpus: string;
+  score: number;
+  doc_id: string;
+  text: string;
+}
+
+interface MatrixContext {
+  hits: MatrixHit[];
+  by_corpus: Record<string, number>;
+  errors: Record<string, string>;
+  latency_ms: number;
+}
+
+async function fetchMatrixContext(
+  query: string,
+  taskClass: string,
+  filePath: string,
+  topPerCorpus = 3,
+  topOverall = 8,
+): Promise<MatrixContext> {
+  const t0 = Date.now();
+  const corpora = MATRIX_CORPORA_FOR_TASK[taskClass] ?? [];
+  const allHits: MatrixHit[] = [];
+  const errors: Record<string, string> = {};
+  const byCorpus: Record<string, number> = {};
+
+  // Query persistent vector indexes in parallel
+  await Promise.all(corpora.map(async (idx) => {
+    try {
+      const r = await fetch(`${GATEWAY}/vectors/search`, {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify({ index_name: idx, query, top_k: topPerCorpus }),
+        signal: AbortSignal.timeout(15000),
+      });
+      if (!r.ok) { errors[idx] = `HTTP ${r.status}`; return; }
+      const data: any = await r.json();
+      const results = data.results ?? [];
+      byCorpus[idx] = results.length;
+      for (const h of results) {
+        allHits.push({
+          source_corpus: idx,
+          score: Number(h.score ?? 0),
+          doc_id: String(h.doc_id ?? "?"),
+          text: String(h.chunk_text ?? h.text ?? "").slice(0, 400),
+        });
+      }
+    } catch (e: any) {
+      errors[idx] = e.message;
+    }
+  }));
+
+  // Pull pathway successes via the existing helper, mapped into MatrixHit shape
+  try {
+    const proven = await fetchProvenApproaches(taskClass, filePath, null, topPerCorpus);
+    byCorpus.pathway_memory = proven.length;
+    for (const p of proven) {
+      allHits.push({
+        source_corpus: "pathway_memory",
+        score: 0.6, // neutral — pathway has no cosine; used as soft mid-rank
+        doc_id: p.pathway_id.slice(0, 12),
+        text: `[${p.accepted_model} accepted on attempt ${p.accepted_attempt} · sources=${p.kb_sources.join(",")}]\n${p.summary_excerpt.replace(/\s+/g, " ")}`.slice(0, 400),
+      });
+    }
+  } catch (e: any) {
+    errors.pathway_memory = e.message;
+  }
+
+  // Sort all hits by score desc, take top N
+  allHits.sort((a, b) => b.score - a.score);
+  const topHits = allHits.slice(0, topOverall);
+
+  return {
+    hits: topHits,
+    by_corpus: byCorpus,
+    errors,
+    latency_ms: Date.now() - t0,
+  };
+}
+
+function buildMatrixPreamble(ctx: MatrixContext): string {
+  if (ctx.hits.length === 0) return "";
+  const lines = [
+    `═══ 📖 MATRIX-INDEXED CONTEXT (${ctx.hits.length} hits across ${Object.keys(ctx.by_corpus).length} corpora) ═══`,
+    "Relevant chunks pulled from the knowledge base. Use as REFERENCE — not findings to copy. Cite specific chunks if they shape your review.",
+    "",
+  ];
+  for (let i = 0; i < ctx.hits.length; i++) {
+    const h = ctx.hits[i];
+    lines.push(`[${i + 1}] ${h.source_corpus} (score=${h.score.toFixed(2)}, doc=${h.doc_id}): ${h.text.replace(/\s+/g, " ").trim()}`);
+  }
+  lines.push("═══");
+  lines.push("");
+  return lines.join("\n");
+}
+
+function buildProvenApproachesPreamble(approaches: ProvenApproach[]): string {
+  if (approaches.length === 0) return "";
+  const lines = [
+    "═══ 📖 PROVEN APPROACHES — PRIOR ACCEPTED REVIEWS ON THIS FILE AREA ═══",
+    "These are reviews that previously passed observer hand-review on the same task class + file prefix.",
+    "Use them as REFERENCE PATTERNS for what a strong review looks like — not as findings to copy.",
+    "",
+  ];
+  for (let i = 0; i < approaches.length; i++) {
+    const a = approaches[i];
+    lines.push(`Approach ${i + 1} · file=${a.file_path} · model=${a.accepted_model} · sources=[${a.kb_sources.join(", ")}]`);
+    lines.push(`  excerpt: ${a.summary_excerpt.replace(/\s+/g, " ").trim()}`);
+    lines.push("");
+  }
+  lines.push("═══");
+  lines.push("");
+  return lines.join("\n");
+}
+
 interface ObserverVerdict {
  verdict: "accept" | "reject" | "cycle";
  confidence?: number;
@ -1108,6 +1317,26 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
      "\n═══\n\n";
    log(`  📚 pathway memory: ${pastFingerprints.length} historical bug pattern(s) prepended to prompt`);
  }
+  // Unified matrix-indexed retrieval — pulls from ALL relevant KB
+  // corpora (distilled facts/procedures/config-hints + team runs +
+  // pathway successes). LH_SCRUM_MATRIX_RETRIEVE=0 disables for A/B.
+  let provenApproachesPreamble = "";
+  if (MATRIX_RETRIEVE) {
+    // Query text combines task framing + file path + first chunk of
+    // source so retrieval anchors against both the metadata and the
+    // actual code being reviewed.
+    const matrixQuery = `${taskClass} ${rel} ${content.slice(0, 500)}`;
+    const matrixCtx = await fetchMatrixContext(matrixQuery, taskClass, rel, 3, 8);
+    provenApproachesPreamble = buildMatrixPreamble(matrixCtx);
+    const corporaSummary = Object.entries(matrixCtx.by_corpus)
+      .map(([k, v]) => `${k.split("_v")[0]}=${v}`).join(" ");
+    const errSummary = Object.keys(matrixCtx.errors).length > 0
+      ? ` errors=[${Object.entries(matrixCtx.errors).map(([k, v]) => `${k}:${v}`).join(", ")}]`
+      : "";
+    log(`  📖 matrix: ${matrixCtx.hits.length} hits in ${matrixCtx.latency_ms}ms · ${corporaSummary}${errSummary}`);
+  } else {
+    log(`  📖 matrix retrieval: DISABLED (LH_SCRUM_MATRIX_RETRIEVE=0)`);
+  }
  let hotSwapOrderedIndices: number[] | null = null;
  if (hotSwap) {
    // Reorder the ladder to try the recommended model first. Rung
@ -1166,7 +1395,7 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
    const r = await chat({
      provider: rung.provider,
      model: rung.model,
-      prompt: pathwayPreamble + baseTask + learning,
+      prompt: provenApproachesPreamble + pathwayPreamble + baseTask + learning,
      max_tokens: 1500,
    });
    const attemptMs = Date.now() - attemptStarted;