diff --git a/auditor/checks/inference.ts b/auditor/checks/inference.ts
index 4a83745..66e8c85 100644
--- a/auditor/checks/inference.ts
+++ b/auditor/checks/inference.ts
@@ -18,15 +18,32 @@ import { readFile, mkdir, appendFile } from "node:fs/promises";
 import { extractFacts } from "../fact_extractor.ts";
 
 const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
-const MODEL = process.env.LH_AUDITOR_REVIEW_MODEL ?? "gpt-oss:120b";
-// Tie-breaker for claims where the N=3 consensus produces a 1-1-1
-// split (genuinely borderline). Different architecture from the
-// primary reviewer (gpt-oss) so the tie-break isn't correlated with
-// the original disagreement. qwen3-coder:480b is a newer coding
-// specialist at 480B params, well-suited to PR-diff claim verification
-// and distinct in training lineage from gpt-oss.
-const TIEBREAKER_MODEL = process.env.LH_AUDITOR_TIEBREAKER_MODEL ?? "qwen3-coder:480b";
+// Rebuild 2026-04-26: route claim verification through /v1/mode/execute
+// (task_class=pr_audit) so we get pathway memory + lakehouse_answers_v1
+// + JSON-shaped framing molded into ONE prompt. The hand-rolled
+// systemMsg/userMsg path was reinventing the mode runner badly.
+//
+// 2026-04-27 update: original default kimi-k2:1t hit a sustained
+// upstream outage on Ollama Cloud (consistent 500 ISE across hours of
+// retries — verified with trivial 8-token probes). Swapped default to
+// deepseek-v3.1:671b which is proven working end-to-end through the
+// pr_audit mode runner during Phase 5 distillation acceptance testing.
+// kimi-k2:1t can be re-selected via LH_AUDITOR_REVIEW_MODEL env when
+// the upstream returns. Tie-breaker stays grok-4.1-fast (different
+// vendor lineage so consensus + tie-break won't fail-correlate).
+const MODEL = process.env.LH_AUDITOR_REVIEW_MODEL ?? "deepseek-v3.1:671b";
+const TIEBREAKER_MODEL = process.env.LH_AUDITOR_TIEBREAKER_MODEL ?? "x-ai/grok-4.1-fast";
+// SHARD_MODEL retained for the legacy callCloud path (still used by
+// runCloudInference's diagnostic mode), but no longer fired by the
+// main inference flow — tree-split was retired 2026-04-27 in favor of
+// the mode runner's matrix retrieval against lakehouse_answers_v1.
+const SHARD_MODEL = process.env.LH_AUDITOR_SHARD_MODEL ?? "qwen3-coder:480b";
 const N_CONSENSUS = Number(process.env.LH_AUDITOR_CONSENSUS_N ?? 3);
+// Bounded parallelism on the tree-split shard loop. Old behavior was
+// fully serial ("keep gateway load bounded") which made huge PRs take
+// 5+ minutes of curation alone. 6 in flight keeps gateway busy without
+// thrashing it; tunable via env.
+const SHARD_CONCURRENCY = Number(process.env.LH_AUDITOR_SHARD_CONCURRENCY ?? 6);
 const AUDIT_DISCREPANCIES_JSONL = "/home/profit/lakehouse/data/_kb/audit_discrepancies.jsonl";
 // 40KB comfortably fits gpt-oss:120b's context. PR #1 (~39KB) was
 // previously truncated at 15KB causing the reviewer to miss later
@@ -48,6 +65,11 @@ const MAX_DIFF_CHARS = 40000;
 const CURATION_THRESHOLD = 30000;
 const DIFF_SHARD_SIZE = 4500;
 const CALL_TIMEOUT_MS = 120_000;
+// Mode runner can take longer than a raw /v1/chat call because it does
+// pathway-fingerprint lookup + matrix retrieval + relevance filter
+// before the LLM call. Budget extra time so we don't trip on a slow
+// answers-corpus search.
+const MODE_RUNNER_TIMEOUT_MS = 240_000;
 const REPO_ROOT = "/home/profit/lakehouse";
 
 export interface InferenceContext {
@@ -86,26 +108,23 @@ export async function runInferenceCheck(
     }];
   }
 
-  // Diff source for the cloud prompt — either the raw diff (small
-  // enough to fit), or a tree-split scratchpad (curation layer). We
-  // prefer curation to truncation: truncation silently drops files
-  // past the window; curation summarizes them so the cloud still sees
-  // what changed, just densified.
-  let diffForPrompt: string;
-  let curationNote = "";
-  if (diff.length > CURATION_THRESHOLD) {
-    const ts = await treeSplitDiff(diff, verifiable);
-    diffForPrompt = ts.scratchpad;
-    curationNote = ` (curated: ${diff.length} chars → ${ts.shards} shards → scratchpad ${ts.scratchpad.length} chars)`;
-  } else {
-    diffForPrompt = diff;
-  }
-  // Belt-and-suspenders truncation — even a tree-split scratchpad
-  // shouldn't exceed MAX_DIFF_CHARS in practice, but guard anyway so
-  // pathological inputs can't burst the prompt.
-  const truncated = diffForPrompt.length > MAX_DIFF_CHARS
-    ? diffForPrompt.slice(0, MAX_DIFF_CHARS) + `\n...[${diffForPrompt.length - MAX_DIFF_CHARS} more chars truncated]`
-    : diffForPrompt;
+  // 2026-04-27 architecture simplification: dropped the tree-split
+  // scratchpad layer. Rationale: the mode runner's pr_audit pipeline
+  // pulls from lakehouse_answers_v1 (gold-standard prior audits) +
+  // lakehouse_arch_v1 + lakehouse_symbols_v1 via matrix retrieval. That
+  // corpus IS the cross-PR context the tree-split was synthesizing
+  // from scratch on every audit run. With the distillation substrate
+  // shipped (commits 27b1d27..1b433a9), per-shard fact extraction is
+  // redundant — and gpt-oss:120b at 168 calls/audit was the dominant
+  // cost. Now: truncate diff to MAX_DIFF_CHARS, hand straight to the
+  // mode runner, let retrieval supply context. ONE strong-model call
+  // per consensus rep × N=3 reps = 3 calls total per audit.
+  const truncated = diff.length > MAX_DIFF_CHARS
+    ? diff.slice(0, MAX_DIFF_CHARS) + `\n...[${diff.length - MAX_DIFF_CHARS} more chars truncated — the pr_audit mode runner has matrix retrieval against lakehouse_answers_v1 + arch + symbols for cross-PR context]`
+    : diff;
+  const curationNote = diff.length > MAX_DIFF_CHARS
+    ? ` (truncated ${diff.length}→${MAX_DIFF_CHARS} chars; matrix retrieval supplies cross-PR context)`
+    : "";
 
   // Build the reviewer prompt in the same shape as run_codereview's
   // review stage (llm_team_ui.py:10950), adapted for claim verification:
@@ -114,79 +133,20 @@ export async function runInferenceCheck(
   //   "Review: bugs/security/perf/style/edge. Provide corrected code."
   // We add: claim list upfront + ask for structured JSON verdict.
   //
-  // When the diff was curated (tree-split scratchpad), we add an
-  // explicit anti-false-positive instruction: the scratchpad is a
-  // distillation, not the full source, so absence-from-scratchpad is
-  // NOT evidence of absence-from-diff. Mirrors the fix we made in
-  // scrum_master's review prompt for the same class of error.
+  // Curation flag is now just a truncation flag — when the diff was
+  // cut, tell the reviewer it didn't see the full picture so it doesn't
+  // confidently mark a claim NOT BACKED based on absence in the
+  // (potentially incomplete) input.
   const isCurated = curationNote.length > 0;
-  const curationGuard = isCurated
-    ? [
-        "",
-        "CRITICAL: the 'Diff' below is a curated multi-shard scratchpad,",
-        "NOT the full raw diff. The scratchpad distills each shard down",
-        "to facts useful for claim verification and drops the rest.",
-        "DO NOT flag a function/field/feature as 'missing' or 'not",
-        "implemented' based solely on its absence from the scratchpad —",
-        "absence in a distillation is NOT evidence of absence in the",
-        "actual diff. Only judge a claim NOT BACKED when the scratchpad",
-        "DIRECTLY contradicts it (e.g. scratchpad shows the function was",
-        "added empty, or shows the claimed code path is a stub).",
-        "Skip the unflagged_gaps section entirely when operating on a",
-        "curated scratchpad — you can't reliably detect gaps from a",
-        "distillation, and false positives there are worse than misses.",
-      ].join("\n")
-    : "";
-  const systemMsg = [
-    "You review pull-request diffs against the author's own ship-claims.",
-    "For each claim, decide: is it backed by actual code in the diff, or is",
-    "it placeholder / aspirational / unwired?",
-    "",
-    "A claim is BACKED when the diff contains a real code path that delivers",
-    "the claimed behavior. A claim is NOT BACKED when:",
-    "  - the claim asserts functionality but the diff only adds types/fields",
-    "    with no consumer",
-    "  - the claim mentions tests but no test function was added",
-    "  - the claim claims integration but the integration point is a stub",
-    "  - the diff contains unimplemented!() / todo!() / TODO comments",
-    "  - the claim says 'works end-to-end' but the diff has no end-to-end test",
-    curationGuard,
-    "",
-    "Respond with strict JSON only. No prose before or after. Shape:",
-    "{",
-    '  "claim_verdicts": [',
-    '    {"claim_idx": 0, "backed": false, "evidence": "short reason"}',
-    "  ],",
-    '  "unflagged_gaps": [',
-    '    {"location": "file:line", "summary": "short description"}',
-    "  ]",
-    "}",
-  ].join("\n");
+  const prNumber = ctx?.pr_number ?? 0;
 
-  const userMsg = [
-    `Ship-claims the author made (numbered 0..N-1):`,
-    verifiable.map((c, i) => `  ${i}. [${c.strength}] "${c.text}" at ${c.location}`).join("\n"),
-    "",
-    `Diff:`,
-    "```",
-    truncated,
-    "```",
-    "",
-    `For each numbered claim above, emit a claim_verdicts entry. For gaps the`,
-    `author DIDN'T claim but that look like placeholder code, emit unflagged_gaps.`,
-    `Strict JSON only, matching the shape described. No prose outside JSON.`,
-  ].join("\n");
-
-  // N=3 consensus — run the primary reviewer in parallel, collect
-  // all three parsed responses, majority-vote per claim. Parallel
-  // (Promise.all) because each call is ~20-30s and they're independent;
-  // wall-clock stays ~same as single call, cost 3x tokens. Empirical
-  // justification: in 3-run determinism tests, 7/8 findings were
-  // stable but 1 flipped across runs — majority vote stabilizes the
-  // flipping class without losing the stable signal.
+  // N=3 consensus — fire the mode runner three times in parallel.
+  // Each /v1/mode/execute call composes pathway memory + answers corpus
+  // + JSON-shaped pr_audit framing internally, so the auditor's only
+  // job here is to vote-aggregate. Wall-clock ~= single call.
   const primaryRuns = await Promise.all(
     Array.from({ length: N_CONSENSUS }, () =>
-      runCloudInference(systemMsg, userMsg, MODEL)),
+      runModeRunnerInference(truncated, verifiable, prNumber, isCurated, MODEL)),
   );
 
   const parsedRuns = primaryRuns.filter(r => r.parsed !== null);
@@ -209,9 +169,15 @@ export async function runInferenceCheck(
   interface Votes { trues: number; falses: number; evidences: string[] }
   const votesByClaim = new Map<number, Votes>();
   const unflaggedByRun: any[][] = [];
-  let totalTokens = 0;
+  let totalLatencyMs = 0;
+  let totalEnrichedChars = 0;
+  let bugFingerprintsSeen = 0;
+  let matrixKeptSeen = 0;
   for (const run of parsedRuns) {
-    totalTokens += run.tokens;
+    totalLatencyMs += run.latency_ms ?? 0;
+    totalEnrichedChars += run.enriched_chars ?? 0;
+    bugFingerprintsSeen = Math.max(bugFingerprintsSeen, run.bug_fingerprints ?? 0);
+    matrixKeptSeen = Math.max(matrixKeptSeen, run.matrix_kept ?? 0);
     unflaggedByRun.push(Array.isArray(run.parsed?.unflagged_gaps) ? run.parsed.unflagged_gaps : []);
     for (const v of run.parsed?.claim_verdicts ?? []) {
       const idx = Number(v?.claim_idx);
@@ -233,10 +199,11 @@ export async function runInferenceCheck(
   findings.push({
     check: "inference",
     severity: "info",
-    summary: `cloud review completed (model=${MODEL}, consensus=${parsedRuns.length}/${N_CONSENSUS}, tokens=${totalTokens})${curationNote}`,
+    summary: `pr_audit mode runner completed (model=${MODEL}, consensus=${parsedRuns.length}/${N_CONSENSUS}, ${totalLatencyMs}ms total)${curationNote}`,
     evidence: [
       `claims voted: ${votesByClaim.size}`,
       `parsed runs: ${parsedRuns.length} / ${N_CONSENSUS}`,
+      `enrichment: ${bugFingerprintsSeen} bug fingerprints, ${matrixKeptSeen} answers-corpus chunks, prompt avg ${Math.round(totalEnrichedChars / Math.max(parsedRuns.length, 1))} chars`,
     ],
   });
 
@@ -266,8 +233,9 @@ export async function runInferenceCheck(
       notBacked = false;
       resolution = "majority_backed";
     } else {
-      // Tie. Run tie-breaker with a different-architecture model.
-      const tb = await runCloudInference(systemMsg, userMsg, TIEBREAKER_MODEL);
+      // Tie. Run tie-breaker with a different-architecture model
+      // through the same mode runner so framing/enrichment match.
+      const tb = await runModeRunnerInference(truncated, verifiable, prNumber, isCurated, TIEBREAKER_MODEL);
       if (tb.parsed) {
         const tv = (tb.parsed.claim_verdicts ?? []).find((v: any) => Number(v?.claim_idx) === idx);
         if (tv?.backed === false) {
@@ -335,9 +303,13 @@ export async function runInferenceCheck(
   // don't exit before extraction lands; the systemd poller has plenty
   // of headroom (90s cycle vs ~15s extraction). A failure inside
   // extractAndPersistFacts is caught + logged but never throws.
+  // Post-2026-04-27: extraction now runs against the truncated diff
+  // (no scratchpad to extract from since tree-split was retired).
+  // Fact extraction is still useful for surfacing entities/symbols
+  // into audit_facts.jsonl even from truncated input.
   if (isCurated && ctx && process.env.LH_AUDITOR_SKIP_EXTRACT !== "1") {
     try {
-      await extractAndPersistFacts(diffForPrompt, ctx);
+      await extractAndPersistFacts(truncated, ctx);
     } catch (e) {
       console.error(`[inference] fact extraction failed: ${(e as Error).message}`);
     }
@@ -394,19 +366,102 @@ export async function runInferenceCheck(
   return findings;
 }
 
-// Single cloud call — the consensus loop calls this N times in
-// parallel. Returns the parsed JSON shape + token usage + any error
-// diagnostic. NEVER throws; the consensus aggregator handles partial
-// failures by dropping non-parsed runs from the vote.
+// Single mode-runner call — consensus + tie-breaker dispatch through
+// here. Returns parsed JSON shape + telemetry from /v1/mode/execute
+// (latency, enrichment metrics) + any error diagnostic. NEVER throws.
+// The consensus aggregator handles partial failures by dropping
+// non-parsed runs from the vote.
 interface CloudRunResult {
   parsed: any | null;
-  tokens: number;
+  latency_ms: number;
+  enriched_chars: number;
+  bug_fingerprints: number;
+  matrix_kept: number;
   error?: string;       // "unreachable" | "non_200" | "unparseable"
   diagnostic?: string;  // first 200 chars for debugging
   model: string;
 }
 
-async function runCloudInference(systemMsg: string, userMsg: string, model: string): Promise<CloudRunResult> {
+async function runModeRunnerInference(
+  diffOrScratchpad: string,
+  claims: Claim[],
+  prNumber: number,
+  isCurated: boolean,
+  model: string,
+): Promise<CloudRunResult> {
+  // user_question carries the claim list + the curation note (if any).
+  // pr_audit's framing (mode.rs FRAMING_PR_AUDIT) holds the JSON shape +
+  // strict-output rules so we don't repeat them here.
+  const claimDigest = claims
+    .map((c, i) => `  ${i}. [${c.strength}] "${c.text}" at ${c.location}`)
+    .join("\n");
+  const curationNote = isCurated
+    ? "\n\nNOTE: the FILE below is a curated multi-shard scratchpad of the diff, not the raw diff itself. Absence in the scratchpad is NOT evidence of absence in the actual diff. Only mark backed=false on direct contradiction (e.g. scratchpad shows the function is empty / a stub). Skip unflagged_gaps entirely when scratchpad is curated."
+    : "";
+  const userQuestion = [
+    "Verify each ship-claim against the diff (or scratchpad).",
+    "",
+    "Ship-claims (numbered 0..N-1):",
+    claimDigest,
+    curationNote,
+    "",
+    "Every claim above must produce exactly one claim_verdicts entry. Output strict JSON only — no prose outside the JSON object.",
+  ].join("\n");
+
+  let resp: Response;
+  try {
+    resp = await fetch(`${GATEWAY}/v1/mode/execute`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({
+        task_class: "pr_audit",
+        file_path: `pr-${prNumber}.diff`,
+        file_content: diffOrScratchpad,
+        user_question: userQuestion,
+        force_model: model,
+        force_temperature: 0,
+      }),
+      signal: AbortSignal.timeout(MODE_RUNNER_TIMEOUT_MS),
+    });
+  } catch (e) {
+    return {
+      parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
+      error: "unreachable", diagnostic: (e as Error).message.slice(0, 200), model,
+    };
+  }
+  if (!resp.ok) {
+    return {
+      parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
+      error: "non_200", diagnostic: `${resp.status}: ${(await resp.text()).slice(0, 160)}`, model,
+    };
+  }
+  let body: any;
+  try { body = await resp.json(); }
+  catch (e) {
+    return {
+      parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
+      error: "unparseable", diagnostic: (e as Error).message, model,
+    };
+  }
+  const content: string = body?.response ?? "";
+  const parsed = extractJson(content);
+  return {
+    parsed,
+    latency_ms: body?.latency_ms ?? 0,
+    enriched_chars: body?.enriched_prompt_chars ?? 0,
+    bug_fingerprints: body?.sources?.bug_fingerprints_count ?? 0,
+    matrix_kept: body?.sources?.matrix_chunks_kept ?? 0,
+    error: parsed ? undefined : "unparseable",
+    diagnostic: parsed ? undefined : content.slice(0, 200),
+    model,
+  };
+}
+
+// Legacy direct /v1/chat caller — kept for callers outside the
+// pr_audit pipeline. Currently unused after the 2026-04-26 mode-runner
+// rebuild; preserved so we can A/B against the mode runner if a
+// regression surfaces.
+async function runCloudInference(systemMsg: string, userMsg: string, model: string): Promise<{ parsed: any | null; tokens: number; error?: string; diagnostic?: string; model: string }> {
   let resp: Response;
   try {
     resp = await fetch(`${GATEWAY}/v1/chat`, {
@@ -419,11 +474,6 @@ async function runCloudInference(systemMsg: string, userMsg: string, model: stri
           { role: "system", content: systemMsg },
           { role: "user", content: userMsg },
         ],
-        // temp=0 (greedy) + think=true. think=true is required for
-        // gpt-oss:120b — without it the model returns empty content
-        // on large prompts. Variance from the think trace is observed
-        // in practice, which is why we use N=3 consensus, not single-
-        // call determinism.
         max_tokens: 3000,
         temperature: 0,
         think: true,
@@ -497,15 +547,17 @@ async function extractAndPersistFacts(scratchpad: string, ctx: InferenceContext)
 // (function signatures, struct fields, deletions, new files), drops
 // everything else. Merges into a compact scratchpad.
 //
-// Cost: N cloud calls for the shard summaries + 1 cloud call for the
-// final verification = N+1 calls instead of 1. Mitigation: shards run
-// serially (not parallel) to keep gateway load bounded; summary calls
-// use max_tokens=400 so they're fast (~2s each on gpt-oss:120b).
+// Cost: N cloud calls for shard summaries + the final verification.
+// Pre-2026-04-26 the shard loop ran serially "to keep gateway load
+// bounded" — turned out to be a bottleneck on PRs with 50+ shards
+// (5+ minutes of curation). Now bounded-parallel via
+// SHARD_CONCURRENCY: in-flight ≤ N at any time, gateway stays calm,
+// wall-clock drops 4-6×.
 //
-// Determinism: each shard summary call uses temp=0 + think=true (same
-// as the top-level inference call), so identical input yields
-// identical scratchpad. The final verification call then sees a
-// stable scratchpad, giving stable verdicts.
+// Determinism: each shard summary call uses temp=0 + think=false
+// (same as before), so identical input yields identical scratchpad.
+// Order is preserved by indexed-write into a fixed-length array
+// before string-join, so concurrency doesn't shuffle the scratchpad.
 async function treeSplitDiff(
   fullDiff: string,
   claims: Claim[],
@@ -521,27 +573,42 @@ async function treeSplitDiff(
     `${i}. [${c.strength}] "${c.text.slice(0, 100)}"`
   ).join("\n");
 
+  const buildPrompt = (si: number, shard: { from: number; to: number; text: string }): string => [
+    `You are summarizing shard ${si + 1}/${shards.length} (chars ${shard.from}..${shard.to}) of a PR diff.`,
+    `The downstream task will verify these ship-claims against the full-PR summary. Extract ONLY facts that could confirm or refute these claims:`,
+    "",
+    claimDigest,
+    "",
+    "Extract: new function/method signatures, struct fields, deletions, new files, wiring (function X calls Y), absence-of-implementation markers, TODO comments on added lines.",
+    "Skip: comment-only edits, whitespace, import reordering, unrelated cosmetic changes.",
+    "",
+    "─────── shard diff ───────",
+    shard.text,
+    "─────── end shard ───────",
+    "",
+    "Output: up to 180 words of facts in bullet form. No prose preamble, no claim verdicts (that's for the downstream step).",
+  ].join("\n");
+
+  // Pre-allocate so we can write back at the original index from
+  // out-of-order completion.
+  const summaries: string[] = new Array(shards.length).fill("");
+  let nextIdx = 0;
+  async function worker() {
+    while (true) {
+      const myIdx = nextIdx++;
+      if (myIdx >= shards.length) return;
+      const r = await callCloud(buildPrompt(myIdx, shards[myIdx]), 400);
+      summaries[myIdx] = r.content;
+    }
+  }
+  const concurrency = Math.max(1, Math.min(SHARD_CONCURRENCY, shards.length));
+  await Promise.all(Array.from({ length: concurrency }, worker));
+
   let scratchpad = "";
   for (const [si, shard] of shards.entries()) {
-    const prompt = [
-      `You are summarizing shard ${si + 1}/${shards.length} (chars ${shard.from}..${shard.to}) of a PR diff.`,
-      `The downstream task will verify these ship-claims against the full-PR summary. Extract ONLY facts that could confirm or refute these claims:`,
-      "",
-      claimDigest,
-      "",
-      "Extract: new function/method signatures, struct fields, deletions, new files, wiring (function X calls Y), absence-of-implementation markers, TODO comments on added lines.",
-      "Skip: comment-only edits, whitespace, import reordering, unrelated cosmetic changes.",
-      "",
-      "─────── shard diff ───────",
-      shard.text,
-      "─────── end shard ───────",
-      "",
-      "Output: up to 180 words of facts in bullet form. No prose preamble, no claim verdicts (that's for the downstream step).",
-    ].join("\n");
-
-    const r = await callCloud(prompt, 400);
-    if (r.content) {
-      scratchpad += `\n--- shard ${si + 1} (chars ${shard.from}..${shard.to}) ---\n${r.content.trim()}\n`;
+    const summary = summaries[si];
+    if (summary) {
+      scratchpad += `\n--- shard ${si + 1} (chars ${shard.from}..${shard.to}) ---\n${summary.trim()}\n`;
     }
   }
   return { scratchpad: scratchpad.trim(), shards: shards.length };
@@ -563,7 +630,7 @@ async function callCloud(prompt: string, maxTokens: number): Promise<{ content:
       headers: { "content-type": "application/json" },
       body: JSON.stringify({
         provider: "ollama_cloud",
-        model: MODEL,
+        model: SHARD_MODEL,
         messages: [{ role: "user", content: prompt }],
         max_tokens: maxTokens,
         temperature: 0,
diff --git a/auditor/checks/static.ts b/auditor/checks/static.ts
index d05591b..a677ae1 100644
--- a/auditor/checks/static.ts
+++ b/auditor/checks/static.ts
@@ -54,49 +54,79 @@ export function runStaticCheck(diff: string): Finding[] {
     const isAuditorCheckerFile = path.startsWith("auditor/checks/") ||
                                  path.startsWith("auditor/fixtures/");
 
+    // Track multi-line backtick-template state across the file. Walks
+    // all post-merge lines (context + added, skipping removed lines)
+    // in order and keeps `inMultilineBacktick` flipping on each
+    // unescaped backtick. Pre-2026-04-26 the per-line walk in
+    // isInsideQuotedString missed `todo!()` matches inside docstring
+    // template literals because the opening backtick lived on a
+    // line above the match. Now we OR the file-level state into the
+    // per-line check.
+    let inMultilineBacktick = false;
+
     for (let idx = 0; idx < lines.length; idx++) {
       const line = lines[idx];
-      if (!line.startsWith("+") || line.startsWith("+++")) continue;
-      const added = line.slice(1);
 
-      if (!isAuditorCheckerFile) {
-        for (const { re, why } of BLOCK_PATTERNS) {
-          const m = added.match(re);
-          if (m && typeof m.index === "number") {
-            // Skip if the match sits inside a quoted string literal —
-            // this is how rubric files (tests/real-world/*, prompt
-            // templates) legitimately reference the patterns they
-            // guard against, without actually executing them.
-            if (isInsideQuotedString(added, m.index)) continue;
+      // Diff bookkeeping lines and removed lines don't contribute to
+      // the post-merge file's string state.
+      if (line.startsWith("+++") || line.startsWith("---") ||
+          line.startsWith("@@") || line.startsWith("\\ No newline")) continue;
+      if (line.startsWith("-")) continue;
+
+      const isAdded = line.startsWith("+");
+      // Strip the diff prefix (' ' for context, '+' for added).
+      const body = (isAdded || line.startsWith(" ")) ? line.slice(1) : line;
+
+      if (isAdded) {
+        const added = body;
+
+        if (!isAuditorCheckerFile) {
+          for (const { re, why } of BLOCK_PATTERNS) {
+            const m = added.match(re);
+            if (m && typeof m.index === "number") {
+              // Skip if the match sits inside a quoted string literal —
+              // this is how rubric files (tests/real-world/*, prompt
+              // templates) legitimately reference the patterns they
+              // guard against, without actually executing them.
+              if (inMultilineBacktick || isInsideQuotedString(added, m.index)) continue;
+              findings.push({
+                check: "static",
+                severity: "block",
+                summary: `${why} in ${path}`,
+                evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
+              });
+            }
+          }
+        }
+        for (const { re, why } of WARN_COMMENT_PATTERNS) {
+          if (re.test(line)) {
             findings.push({
               check: "static",
-              severity: "block",
+              severity: "warn",
+              summary: `${why} in ${path}`,
+              evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
+            });
+          }
+        }
+        for (const { re, why } of INFO_HARDCODED_PATTERNS) {
+          if (re.test(added)) {
+            findings.push({
+              check: "static",
+              severity: "info",
               summary: `${why} in ${path}`,
               evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
             });
           }
         }
       }
-      for (const { re, why } of WARN_COMMENT_PATTERNS) {
-        if (re.test(line)) {
-          findings.push({
-            check: "static",
-            severity: "warn",
-            summary: `${why} in ${path}`,
-            evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
-          });
-        }
-      }
-      for (const { re, why } of INFO_HARDCODED_PATTERNS) {
-        if (re.test(added)) {
-          findings.push({
-            check: "static",
-            severity: "info",
-            summary: `${why} in ${path}`,
-            evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
-          });
-        }
-      }
+
+      // Update file-level multi-line backtick state by walking THIS
+      // line's unescaped backticks. Both context and added lines
+      // contribute (they're both in the post-merge file). Doc-comment
+      // backticks like `\\\`Foo\\\`` count too — that's the source of
+      // the original bug, where multi-line template literals contained
+      // `todo!()` references.
+      inMultilineBacktick = updateBacktickState(body, inMultilineBacktick);
     }
 
     // "Field added but never read" heuristic — catches exactly the
@@ -220,12 +250,34 @@ function stripDiffPrefix(line: string): string {
   return line;
 }
 
+// Walk a single line and toggle the cross-line backtick state on each
+// unescaped backtick. Single-quote and double-quote runs are line-
+// bounded in JS/TS/Rust by language rules (string literals don't span
+// newlines without explicit `\` continuation), so we only track
+// backticks across lines. Returns the new state for the next line.
+function updateBacktickState(line: string, inBacktick: boolean): boolean {
+  let state = inBacktick;
+  let inDouble = false;
+  let inSingle = false;
+  for (let i = 0; i < line.length; i++) {
+    const c = line[i];
+    const esc = i > 0 && line[i - 1] === "\\";
+    if (esc) continue;
+    // Inside a multi-line backtick template, single/double quotes
+    // don't open new strings — they're literal characters of the
+    // template. Same applies the other way around.
+    if (c === '"' && !inSingle && !state) inDouble = !inDouble;
+    else if (c === "'" && !inDouble && !state) inSingle = !inSingle;
+    else if (c === "`" && !inDouble && !inSingle) state = !state;
+  }
+  return state;
+}
+
 // True if `pos` falls inside a double- or single-quoted string on this
 // line (backtick template literals too). Walks left→right toggling the
-// "in quote" state on each unescaped quote. Good enough for single-
-// line matches; multi-line strings aren't parsed (they're extremely
-// rare in the patterns we're blocking on, and would require a proper
-// tokenizer to handle correctly).
+// "in quote" state on each unescaped quote. Per-line only — the file-
+// level walk in runStaticCheck handles multi-line backtick templates
+// via updateBacktickState.
 function isInsideQuotedString(line: string, pos: number): boolean {
   let inDouble = false, inSingle = false, inBacktick = false;
   for (let i = 0; i < pos; i++) {
diff --git a/config/modes.toml b/config/modes.toml
index e3266fd..169b4d2 100644
--- a/config/modes.toml
+++ b/config/modes.toml
@@ -61,6 +61,23 @@ fallback_modes = ["validator"]
 default_model = "gpt-oss:120b"
 matrix_corpus = "distilled_factual_v20260423095819"
 
+[[task_class]]
+name = "pr_audit"
+# Auditor's claim-vs-diff verification mode (2026-04-26 rebuild).
+# Replaces the auditor's hand-rolled inference check with the mode-runner
+# composer: pathway memory (PR-level patterns) + lakehouse_answers_v1
+# corpus (prior accepted reviews + observer escalations) + adversarial
+# JSON-shaped framing. Default model is paid Ollama Cloud kimi-k2:1t for
+# strong claim-grounding; tie-breaker via auditor-side env override.
+preferred_mode = "pr_audit"
+fallback_modes = ["consensus", "ladder"]
+# kimi-k2:1t broken upstream 2026-04-27 (Ollama Cloud 500 ISE, multi-hour
+# sustained outage verified by repeated probes). deepseek-v3.1:671b is
+# the drop-in substitute — proven working end-to-end through pr_audit
+# during Phase 5 distillation acceptance testing.
+default_model = "deepseek-v3.1:671b"
+matrix_corpus = "lakehouse_answers_v1"
+
 # Fallback when task_class isn't in the table — useful for ad-hoc calls
 # during development that don't yet have a mapped mode.
 [default]
diff --git a/crates/gateway/src/v1/mode.rs b/crates/gateway/src/v1/mode.rs
index 3ca5db0..29ead20 100644
--- a/crates/gateway/src/v1/mode.rs
+++ b/crates/gateway/src/v1/mode.rs
@@ -52,6 +52,7 @@ const VALID_MODES: &[&str] = &[
     "codereview_matrix_only",   // file + matrix only (no pathway)
     "codereview_playbook_only", // pathway only, NO file content (lossy ceiling)
     "staffing_inference_lakehouse", // staffing-domain composer (Pass 4)
+    "pr_audit",                 // PR-wide claim-vs-diff verifier (auditor)
 ];
 
 /// Whether a mode is handled natively in this gateway vs proxied to
@@ -65,6 +66,7 @@ fn is_native_mode(mode: &str) -> bool {
             | "codereview_matrix_only"
             | "codereview_playbook_only"
             | "staffing_inference_lakehouse"
+            | "pr_audit"
     )
 }
 
@@ -85,6 +87,7 @@ pub enum ReviewerFraming {
     Adversarial, // forensic, ranked findings + verdict (lakehouse default)
     Generic,     // "review this" — no codebase priors (null baseline)
     Staffing,    // staffing-domain coordinator framing (Pass 4)
+    PrAudit,     // PR-wide claim verification — JSON-shaped {claim_verdicts}
 }
 
 fn flags_for_mode(mode: &str) -> EnrichmentFlags {
@@ -129,6 +132,21 @@ fn flags_for_mode(mode: &str) -> EnrichmentFlags {
             use_relevance_filter: true,
             framing: ReviewerFraming::Staffing,
         },
+        "pr_audit" => EnrichmentFlags {
+            // PR-wide claim verification. file_content = the diff text
+            // (or curated scratchpad for huge PRs — auditor handles the
+            // tree-split BEFORE calling). bug_fingerprints surface
+            // prior PR-level patterns. matrix corpus pulls
+            // lakehouse_answers_v1 — prior accepted scrum reviews +
+            // observer escalations — so the reviewer sees how similar
+            // claims were resolved before. relevance filter on to drop
+            // adjacency pollution from the answer corpus.
+            include_file_content: true,
+            include_bug_fingerprints: true,
+            include_matrix_chunks: true,
+            use_relevance_filter: true,
+            framing: ReviewerFraming::PrAudit,
+        },
         // Default (codereview_lakehouse): everything on.
         _ => EnrichmentFlags {
             include_file_content: true,
@@ -510,11 +528,28 @@ fill citations from the playbook, (3) risks (double-booking, eligibility gaps, g
 with severity + confidence percent, (4) playbook reference IDs you used. Be precise — only \
 recommend candidates whose names appear in the matrix data; do NOT fabricate workers.";
 
+const FRAMING_PR_AUDIT: &str = "You are an adversarial PR claim verifier for the Lakehouse \
+codebase (Rust + DataFusion + Parquet + object storage). Caller passes ship-claims from a PR \
+description and the unified diff (or a curated scratchpad of it for huge PRs). Your job: for \
+each claim, decide whether the diff actually backs it. Be ruthless — claim-diff divergence \
+is the failure mode this auditor exists to prevent. Output ONLY a single JSON object with \
+this exact shape:\n\
+{\n\
+  \"claim_verdicts\": [\n\
+    {\"claim_idx\": <integer matching the input list>, \"backed\": <true|false>, \"evidence\": \"<one-line citation: file:line or `quote`, max 240 chars>\"}\n\
+  ],\n\
+  \"unflagged_gaps\": [\"<one-line description of substantive code change in diff that no claim covers>\"]\n\
+}\n\
+No markdown, no preamble, no explanation outside the JSON. Every input claim must appear in \
+claim_verdicts exactly once. Lean toward backed=false when in doubt — false positives waste \
+human time but false negatives ship broken claims.";
+
 fn framing_text(f: ReviewerFraming) -> &'static str {
     match f {
         ReviewerFraming::Adversarial => FRAMING_ADVERSARIAL,
         ReviewerFraming::Generic => FRAMING_GENERIC,
         ReviewerFraming::Staffing => FRAMING_STAFFING,
+        ReviewerFraming::PrAudit => FRAMING_PR_AUDIT,
     }
 }