// consensus_reducer_design.ts — N=3 design consultation. // // J's ask: enhance the tree-split reducer to preserve FULL backtrack-able // context (endpoints tried, attempt count per model in the ladder, KB // sources retrieved, context7 bridge hits, MCP observer signals, audit // verdicts) instead of collapsing to a summary. Then index the full // context through our existing vectord matrix indexing (HNSW + Lance + // playbook_memory) so successful pathways become hot-swappable — the // system asks "what did we try, what worked, in what order" for a // similar task class and gets a ranked playbook back. // // Before building, consult three diverse models and print their proposals // side-by-side so we can pick the convergent design. const GATEWAY = "http://localhost:3100"; const DESIGN_BRIEF = ` # Context — Lakehouse signal→commit loop We run 6x scrum-master iterations that audit Rust crates for PRD alignment, produce findings + confidence, and feed an auto-applier that lands small mechanical commits through a cargo-green-and-warning-stable gate. Key components: - \`tests/real-world/scrum_master_pipeline.ts\` — orchestrator. 9-rung model LADDER (kimi-k2:1t → qwen3-coder:480b → deepseek → mistral-large → gpt-oss:120b → qwen3.5:397b → openrouter free rescues → local qwen3.5:latest). Each target file retrieves 5 PRD chunks + 5 proposal-doc chunks via vectord RAG, tree-splits large files into 3.5K shards, asks each rung in order, accepts first response passing structural checks. - \`mcp-server/observer.ts\` — receives scrum \`/event\` emissions (file, verdict, critical_failures_count, gradient_tier, attempts, reviewer_model, tree_split_fired). Escalates failure clusters to LLM Team by POSTing to /v1/chat with qwen3-coder:480b. - \`context7-bridge\` — external library docs lookup. - \`auditor/audit.ts\` — independent N=3 consensus re-check of scrum findings; writes to data/_kb/audit_facts.jsonl via LLM Team \`/api/run?mode=extract\`. - \`crates/vectord/src/playbook_memory.rs\` — indexing for proven playbooks: PlaybookEntry, DocRef, FailureRecord, BoostEntry, PatternReport. Uses HNSW index + Lance columnar backend + promotion pipeline. Already battle-tested for workers/staffing queries. - Tree-split REDUCER: after shards return map-style summaries, they are concatenated with internal §N§ markers and fed to a reviewer model to produce ONE file-level review. Currently the reducer sees summaries, not the full context behind each shard's conclusion. # The problem The reducer currently TRUNCATES to a short summary. When the auditor or a future iteration wants to backtrack WHY the reducer concluded what it did — which attempt succeeded, which failed, what KB chunks were retrieved, what observer signal classified the file as LOOPING vs CONVERGING — that context is lost. So: 1. Auditor can't verify citation provenance beyond the summary line. 2. Applier can't tell a "tried X, failed, qwen fixed it" playbook from a "tried X and it was easy" playbook — they look identical downstream. 3. The matrix indexing is only used for RAG chunks during the scrum pass, NOT for storing the full end-to-end pathway of a successful review. # The design question Propose an enhanced reducer + indexing design that: (a) Preserves the FULL backtrack context per reviewed file: - every ladder attempt (model, ms, accepted_y/n, reject_reason) - every retrieved KB chunk (source doc, chunk id, cosine score, rank) - every observer signal (class, priors, prior-iter outcomes) - every context7 bridge hit (library, version pulled) - every sub-pipeline call (LLM Team extract results, audit consensus) (b) Stores this pathway into vectord's matrix indexing alongside the review verdict so it becomes retrievable by similarity. When a new file's fingerprint (task_class + file-path prefix + signal class) matches a past successful pathway, the system can hot-swap by replaying or short-circuiting to the model/KB combo that worked. (c) Surfaces the matrix-index hit rate as a feedback signal on the scrum's UI — "this file was solved 3 times before by the same ladder rung; consider short-circuiting to rung 5." (d) Is compatible with the existing playbook_memory.rs primitives (PlaybookEntry, DocRef, FailureRecord, BoostEntry) — extend don't replace. The indexing layer is in production for workers/staffing; we want the reducer pathway to piggyback on proven infrastructure. # Constraints - NO new crate. Extend vectord + scrum_master_pipeline. - Full context can be LARGE — a reviewed file might have 5 retrievals, 4 ladder attempts, 8 observer priors. Design the embedding / fingerprint so similar-but-not-identical pathways cluster. - The reducer summary is still needed for the reviewer LLM input — don't remove it, ADD the full-context sidecar. - Audit trail: every pathway must be replayable deterministically from what's stored (i.e., enough context to re-run without the original prompt cache). # Required output (STRICT JSON, no prose, no markdown fences): { "approach": "one-paragraph summary of your proposed design", "data_model": { "new_fields_on_playbook_entry": [...], "new_types": [ {"name": "...", "purpose": "...", "fields": [...]} ] }, "storage_strategy": { "what_to_vectorize": "the text that becomes the embedding", "fingerprint_key": "the deterministic key for similarity retrieval", "backend": "HNSW, Lance, playbook_memory — pick" }, "reducer_changes": { "inputs_added": [...], "outputs_added": [...], "compatibility_notes": "how existing callers stay working" }, "hot_swap_logic": "concrete rule for when to skip the ladder and replay a past pathway", "ui_signal": "what to surface so J sees whether matrix indexing is earning its keep", "risks": [...], "why_this_beats_summarization": "one-paragraph argument" } `.trim(); interface Probe { name: string; provider: "ollama" | "ollama_cloud" | "openrouter"; model: string; } // Round-3 probe set — 4 probes covering the remaining ladder rungs + // architecture/provider diversity. J wanted all 4 of the untouched // options so the aggregated 10-model signal is saturated across the // usable ladder. const PROBES: Probe[] = [ { name: "qwen35-397b", provider: "ollama_cloud", model: "qwen3.5:397b" }, { name: "openrouter-gpt-oss", provider: "openrouter", model: "openai/gpt-oss-120b:free" }, { name: "openrouter-gemma3", provider: "openrouter", model: "google/gemma-3-27b-it:free" }, { name: "qwen3-coder-480b-2", provider: "ollama_cloud", model: "qwen3-coder:480b" }, // second probe of the coding specialist — stability check ]; async function ask(p: Probe): Promise<{ name: string; raw: string; ms: number; error?: string }> { const started = Date.now(); try { const r = await fetch(`${GATEWAY}/v1/chat`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ provider: p.provider, model: p.model, messages: [ { role: "system", content: "You are a senior architect. Output STRICT JSON only." }, { role: "user", content: DESIGN_BRIEF }, ], max_tokens: 3000, temperature: 0, }), }); const ms = Date.now() - started; if (!r.ok) return { name: p.name, raw: "", ms, error: `HTTP ${r.status}: ${(await r.text()).slice(0, 200)}` }; const j = await r.json(); const content = j.content ?? j.message?.content ?? j.choices?.[0]?.message?.content ?? ""; return { name: p.name, raw: String(content), ms }; } catch (e: any) { return { name: p.name, raw: "", ms: Date.now() - started, error: String(e).slice(0, 200) }; } } function extractJson(raw: string): any | null { let s = raw.trim(); const fence = s.match(/^```(?:json)?\s*/); if (fence) s = s.slice(fence[0].length); if (s.endsWith("```")) s = s.slice(0, -3).trim(); const first = s.indexOf("{"); const last = s.lastIndexOf("}"); if (first < 0 || last <= first) return null; try { return JSON.parse(s.slice(first, last + 1)); } catch { return null; } } function summarize(obj: any, max = 240): string { if (!obj) return "(no JSON parsed)"; if (typeof obj === "string") return obj.length > max ? obj.slice(0, max) + "…" : obj; if (Array.isArray(obj)) return obj.map((x) => summarize(x, max)).join("; "); return Object.entries(obj) .map(([k, v]) => `${k}=${summarize(v, max)}`) .join(" | "); } async function main() { console.log(`\n── N=3 design consensus ──`); console.log(`models: ${PROBES.map((p) => p.model).join(", ")}\n`); const results = await Promise.all(PROBES.map(ask)); for (const r of results) { console.log(`\n── ${r.name} (${r.ms}ms) ──`); if (r.error) { console.log(` ERROR: ${r.error}`); continue; } const j = extractJson(r.raw); if (!j) { console.log(` raw (no JSON): ${r.raw.slice(0, 600)}…`); continue; } console.log(` approach: ${summarize(j.approach, 400)}`); console.log(` fingerprint: ${summarize(j.storage_strategy?.fingerprint_key, 200)}`); console.log(` vectorize: ${summarize(j.storage_strategy?.what_to_vectorize, 200)}`); console.log(` backend: ${summarize(j.storage_strategy?.backend, 200)}`); console.log(` hot_swap: ${summarize(j.hot_swap_logic, 300)}`); console.log(` new_types: ${summarize(j.data_model?.new_types, 400)}`); console.log(` risks: ${summarize(j.risks, 300)}`); console.log(` why>summary: ${summarize(j.why_this_beats_summarization, 300)}`); } // Write full JSON to disk so we can inspect later. const outPath = `/home/profit/lakehouse/data/_kb/consensus_reducer_design_${Date.now().toString(36)}.json`; await Bun.write(outPath, JSON.stringify(results, null, 2)); console.log(`\nfull responses → ${outPath}`); } await main();