From a496ced848b7b4f664180afacfaf210fd940a0ed Mon Sep 17 00:00:00 2001 From: root Date: Sat, 25 Apr 2026 18:29:08 -0500 Subject: [PATCH] =?UTF-8?q?scrum:=20unified=20matrix=20retriever=20?= =?UTF-8?q?=E2=80=94=20pull=20from=20ALL=20relevant=20KB=20corpora,=20not?= =?UTF-8?q?=20just=20pathway=20memory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per J 2026-04-25 architectural correction: matrix index is the vector indexing layer for the WHOLE knowledge base (distilled facts, procedures, config hints, team runs, playbooks, pathway successes), not a single narrow store. Built fetchMatrixContext(query, taskClass, filePath) that: - Queries multiple persistent vector indexes in parallel via /vectors/search - Collects hits per corpus + score + doc_id + 400-char excerpt - Pulls pathway successes via existing helper, mapped to MatrixHit shape - Sorts by score across corpora, returns top-N (default 8) - Reports per-corpus hit counts + errors for transparency Per-task-class corpus list (MATRIX_CORPORA_FOR_TASK): scrum_review โ†’ distilled_factual, distilled_procedural, distilled_config_hint, kb_team_runs_v1 (staffing data deliberately excluded โ€” not relevant to code review) Probed live: distilled_config_hint top hit = 0.52, distilled_procedural top = 0.49, kb_team_runs top = 0.59. Real signal across corpora. Replaces the narrow proven-approaches preamble with a unified MATRIX-INDEXED CONTEXT preamble tagged with source_corpus per chunk so the model knows what kind of context it's seeing. LH_SCRUM_MATRIX_RETRIEVE=0 still disables for A/B testing. Future: promote to a Rust /v1/matrix endpoint once corpora list and ranking logic stabilize. For now TS lets us iterate fast against the live matrix without gateway restarts. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/real-world/scrum_master_pipeline.ts | 231 +++++++++++++++++++++- 1 file changed, 230 insertions(+), 1 deletion(-) diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts index 8af8fb2..e230218 100644 --- a/tests/real-world/scrum_master_pipeline.ts +++ b/tests/real-world/scrum_master_pipeline.ts @@ -363,6 +363,215 @@ async function recordPathwayReplay(pathwayId: string, succeeded: boolean): Promi // dependency. Pipeline keeps moving when the observer is down. const OBSERVER_URL = process.env.LH_OBSERVER_URL ?? "http://localhost:3800"; +// Matrix retrieval โ€” pulls proven-success pathways for this task class +// + file area and prepends them as a "๐Ÿ“– PROVEN APPROACHES" preamble. +// First time the matrix index is actually USED to route work (vs being +// written to). LH_SCRUM_MATRIX_RETRIEVE=0 disables for A/B comparison. +// +// The proof J asked for: does loading prior successes change scrum +// behavior? Run with retrieval ON and OFF on the same target โ€” if +// reviews differ measurably, the matrix has signal. If identical, +// the writes are noise and need a different design. +const MATRIX_RETRIEVE = process.env.LH_SCRUM_MATRIX_RETRIEVE !== "0"; +const PATHWAY_STATE_PATH = "/home/profit/lakehouse/data/_pathway_memory/state.json"; + +interface ProvenApproach { + pathway_id: string; + file_path: string; + accepted_model: string; + accepted_attempt: number; + kb_sources: string[]; // top-3 KB chunk source_doc names + summary_excerpt: string; // first 400 chars of reducer_summary + created_at: string; +} + +async function fetchProvenApproaches( + taskClass: string, + filePath: string, + signalClass: string | null, + limit = 3, +): Promise { + if (!MATRIX_RETRIEVE) return []; + try { + const f = Bun.file(PATHWAY_STATE_PATH); + if (!(await f.exists())) return []; + const state = JSON.parse(await f.text()); + const pathways = state.pathways ?? {}; + + // Match by narrow fingerprint: same task_class + same file prefix + // (first 2 path segments, e.g. "crates/queryd"). signal_class match + // is preferred but not required โ€” broader matches still inform. + const filePrefix = filePath.split("/").slice(0, 2).join("/"); + const matched: any[] = []; + for (const traces of Object.values(pathways) as any[][]) { + for (const t of traces) { + if (t.task_class !== taskClass) continue; + if (!t.file_path?.startsWith(filePrefix)) continue; + if (t.final_verdict !== "accepted") continue; + if (t.retired) continue; + matched.push(t); + } + } + // Most recent first + matched.sort((a, b) => (b.created_at ?? "").localeCompare(a.created_at ?? "")); + return matched.slice(0, limit).map(t => { + const acceptedAttempt = (t.ladder_attempts ?? []).find((a: any) => a.accepted) + ?? { model: "unknown", rung: 0 }; + const sources = (t.kb_chunks ?? []) + .slice(0, 3) + .map((c: any) => c.source_doc ?? "?"); + return { + pathway_id: t.pathway_id, + file_path: t.file_path, + accepted_model: acceptedAttempt.model, + accepted_attempt: acceptedAttempt.rung, + kb_sources: [...new Set(sources)], + summary_excerpt: (t.reducer_summary ?? "").slice(0, 400), + created_at: t.created_at ?? "", + }; + }); + } catch (e: any) { + console.error(`[scrum] matrix retrieval failed: ${e.message}`); + return []; + } +} + +// Unified matrix retriever โ€” pulls from ALL relevant corpora at once. +// Per J 2026-04-25: matrix is the vector indexing layer for the whole KB, +// not just pathway memory. Returns combined top-N ranked across corpora. +// +// Smoke-test goal: prove the matrix surfaces relevant context from MULTIPLE +// indexed sources (distilled facts/procedures/config-hints + team runs + +// playbook memory + pathway successes), not just one slice. +// +// Per-corpus configuration. Add an entry to query a new corpus. Limited +// to indexes that actually contain code-review-relevant context โ€” staffing +// data (workers_500k_*, resumes_*) is excluded by design. +const MATRIX_CORPORA_FOR_TASK: Record = { + scrum_review: [ + "distilled_factual_v20260423095819", + "distilled_procedural_v20260423102847", + "distilled_config_hint_v20260423102847", + "kb_team_runs_v1", + ], +}; + +interface MatrixHit { + source_corpus: string; + score: number; + doc_id: string; + text: string; +} + +interface MatrixContext { + hits: MatrixHit[]; + by_corpus: Record; + errors: Record; + latency_ms: number; +} + +async function fetchMatrixContext( + query: string, + taskClass: string, + filePath: string, + topPerCorpus = 3, + topOverall = 8, +): Promise { + const t0 = Date.now(); + const corpora = MATRIX_CORPORA_FOR_TASK[taskClass] ?? []; + const allHits: MatrixHit[] = []; + const errors: Record = {}; + const byCorpus: Record = {}; + + // Query persistent vector indexes in parallel + await Promise.all(corpora.map(async (idx) => { + try { + const r = await fetch(`${GATEWAY}/vectors/search`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ index_name: idx, query, top_k: topPerCorpus }), + signal: AbortSignal.timeout(15000), + }); + if (!r.ok) { errors[idx] = `HTTP ${r.status}`; return; } + const data: any = await r.json(); + const results = data.results ?? []; + byCorpus[idx] = results.length; + for (const h of results) { + allHits.push({ + source_corpus: idx, + score: Number(h.score ?? 0), + doc_id: String(h.doc_id ?? "?"), + text: String(h.chunk_text ?? h.text ?? "").slice(0, 400), + }); + } + } catch (e: any) { + errors[idx] = e.message; + } + })); + + // Pull pathway successes via the existing helper, mapped into MatrixHit shape + try { + const proven = await fetchProvenApproaches(taskClass, filePath, null, topPerCorpus); + byCorpus.pathway_memory = proven.length; + for (const p of proven) { + allHits.push({ + source_corpus: "pathway_memory", + score: 0.6, // neutral โ€” pathway has no cosine; used as soft mid-rank + doc_id: p.pathway_id.slice(0, 12), + text: `[${p.accepted_model} accepted on attempt ${p.accepted_attempt} ยท sources=${p.kb_sources.join(",")}]\n${p.summary_excerpt.replace(/\s+/g, " ")}`.slice(0, 400), + }); + } + } catch (e: any) { + errors.pathway_memory = e.message; + } + + // Sort all hits by score desc, take top N + allHits.sort((a, b) => b.score - a.score); + const topHits = allHits.slice(0, topOverall); + + return { + hits: topHits, + by_corpus: byCorpus, + errors, + latency_ms: Date.now() - t0, + }; +} + +function buildMatrixPreamble(ctx: MatrixContext): string { + if (ctx.hits.length === 0) return ""; + const lines = [ + `โ•โ•โ• ๐Ÿ“– MATRIX-INDEXED CONTEXT (${ctx.hits.length} hits across ${Object.keys(ctx.by_corpus).length} corpora) โ•โ•โ•`, + "Relevant chunks pulled from the knowledge base. Use as REFERENCE โ€” not findings to copy. Cite specific chunks if they shape your review.", + "", + ]; + for (let i = 0; i < ctx.hits.length; i++) { + const h = ctx.hits[i]; + lines.push(`[${i + 1}] ${h.source_corpus} (score=${h.score.toFixed(2)}, doc=${h.doc_id}): ${h.text.replace(/\s+/g, " ").trim()}`); + } + lines.push("โ•โ•โ•"); + lines.push(""); + return lines.join("\n"); +} + +function buildProvenApproachesPreamble(approaches: ProvenApproach[]): string { + if (approaches.length === 0) return ""; + const lines = [ + "โ•โ•โ• ๐Ÿ“– PROVEN APPROACHES โ€” PRIOR ACCEPTED REVIEWS ON THIS FILE AREA โ•โ•โ•", + "These are reviews that previously passed observer hand-review on the same task class + file prefix.", + "Use them as REFERENCE PATTERNS for what a strong review looks like โ€” not as findings to copy.", + "", + ]; + for (let i = 0; i < approaches.length; i++) { + const a = approaches[i]; + lines.push(`Approach ${i + 1} ยท file=${a.file_path} ยท model=${a.accepted_model} ยท sources=[${a.kb_sources.join(", ")}]`); + lines.push(` excerpt: ${a.summary_excerpt.replace(/\s+/g, " ").trim()}`); + lines.push(""); + } + lines.push("โ•โ•โ•"); + lines.push(""); + return lines.join("\n"); +} + interface ObserverVerdict { verdict: "accept" | "reject" | "cycle"; confidence?: number; @@ -1108,6 +1317,26 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of "\nโ•โ•โ•\n\n"; log(` ๐Ÿ“š pathway memory: ${pastFingerprints.length} historical bug pattern(s) prepended to prompt`); } + // Unified matrix-indexed retrieval โ€” pulls from ALL relevant KB + // corpora (distilled facts/procedures/config-hints + team runs + + // pathway successes). LH_SCRUM_MATRIX_RETRIEVE=0 disables for A/B. + let provenApproachesPreamble = ""; + if (MATRIX_RETRIEVE) { + // Query text combines task framing + file path + first chunk of + // source so retrieval anchors against both the metadata and the + // actual code being reviewed. + const matrixQuery = `${taskClass} ${rel} ${content.slice(0, 500)}`; + const matrixCtx = await fetchMatrixContext(matrixQuery, taskClass, rel, 3, 8); + provenApproachesPreamble = buildMatrixPreamble(matrixCtx); + const corporaSummary = Object.entries(matrixCtx.by_corpus) + .map(([k, v]) => `${k.split("_v")[0]}=${v}`).join(" "); + const errSummary = Object.keys(matrixCtx.errors).length > 0 + ? ` errors=[${Object.entries(matrixCtx.errors).map(([k, v]) => `${k}:${v}`).join(", ")}]` + : ""; + log(` ๐Ÿ“– matrix: ${matrixCtx.hits.length} hits in ${matrixCtx.latency_ms}ms ยท ${corporaSummary}${errSummary}`); + } else { + log(` ๐Ÿ“– matrix retrieval: DISABLED (LH_SCRUM_MATRIX_RETRIEVE=0)`); + } let hotSwapOrderedIndices: number[] | null = null; if (hotSwap) { // Reorder the ladder to try the recommended model first. Rung @@ -1166,7 +1395,7 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of const r = await chat({ provider: rung.provider, model: rung.model, - prompt: pathwayPreamble + baseTask + learning, + prompt: provenApproachesPreamble + pathwayPreamble + baseTask + learning, max_tokens: 1500, }); const attemptMs = Date.now() - attemptStarted;