From 626f18d4914d1bb17f84a5ebc1b82146aeb2fbe9 Mon Sep 17 00:00:00 2001
From: root <root@island37.com>
Date: Sun, 26 Apr 2026 00:01:20 -0500
Subject: [PATCH] =?UTF-8?q?pathway=5Fmemory:=20audit-consensus=20=E2=86=92?=
 =?UTF-8?q?=20retire=20wire?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When observer's hand-review explicitly rejects the output of a
hot-swap-recommended model, the matrix's recommendation was wrong
for this context. Auto-retire the trace so future agents don't
get the same poisoned recommendation in their preamble.

crates/vectord/src/pathway_memory.rs — add `trace_uid` to
HotSwapCandidate response and populate from the matched trace.
This gives consumers single-trace precision for /pathway/retire.

tests/real-world/scrum_master_pipeline.ts:
  - HotSwapCandidate interface gains trace_uid
  - new retirePathwayTrace() helper (fire-and-forget, fall-open)
  - in the obsVerdict reject branch: if hotSwap was active AND
    the rejected model is the hot-swap-recommended one AND
    observer confidence ≥0.7, fire retire and null hotSwap so
    post-loop replay bookkeeping doesn't double-process.
  - hotSwap declared `let` (was const) so it can be nulled

Cycle verdicts ("needs different angle") don't trigger retire —
only outright rejects do. Confidence gate avoids retiring on
heuristic-fallback verdicts that come back without a confidence
number. Closes the "audit-consensus → retire" item from
HANDOVER.md.

Live-tested: insert synthetic trace → /pathway/retire by trace_uid
→ retired counter 1 → 2.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/vectord/src/pathway_memory.rs      |  6 +++
 tests/real-world/scrum_master_pipeline.ts | 46 ++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/crates/vectord/src/pathway_memory.rs b/crates/vectord/src/pathway_memory.rs
index f3bde00..603dfa4 100644
--- a/crates/vectord/src/pathway_memory.rs
+++ b/crates/vectord/src/pathway_memory.rs
@@ -373,6 +373,11 @@ pub struct PathwayMemory {
 #[derive(Debug, Serialize)]
 pub struct HotSwapCandidate {
     pub pathway_id: String,
+    /// trace_uid of the SPECIFIC trace this hot-swap recommendation
+    /// came from. Lets a caller call /pathway/retire with single-trace
+    /// precision when observer rejects the result — the audit-consensus
+    /// → retire wire (HANDOVER §queued, ADR-021).
+    pub trace_uid: String,
     pub similarity: f32,
     pub replay_count: u32,
     pub success_rate: f32,
@@ -754,6 +759,7 @@ impl PathwayMemory {
         let accepted = p.ladder_attempts.iter().find(|a| a.accepted)?;
         Some(HotSwapCandidate {
             pathway_id: p.pathway_id.clone(),
+            trace_uid: p.trace_uid.clone(),
             similarity,
             replay_count: p.replay_count,
             success_rate: p.success_rate(),
diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts
index 9e956e6..509180f 100644
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@@ -224,6 +224,7 @@ function buildQueryVec(taskClass: string, filePath: string, signalClass: string
 
 interface HotSwapCandidate {
   pathway_id: string;
+  trace_uid: string;
   similarity: number;
   replay_count: number;
   success_rate: number;
@@ -231,6 +232,25 @@ interface HotSwapCandidate {
   recommended_model: string;
 }
 
+// Audit-consensus → retire wire (2026-04-25). When observer rejects the
+// output of a hot-swap-recommended model, the matrix's recommendation
+// was wrong for this context — retire the trace so future agents don't
+// get the same poisoned recommendation in their preamble. Server-side
+// retire is idempotent so duplicate calls are safe.
+async function retirePathwayTrace(traceUid: string, reason: string): Promise<void> {
+  if (!traceUid) return;
+  try {
+    await fetch(`${GATEWAY}/vectors/pathway/retire`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify({ trace_uid: traceUid, reason }),
+      signal: AbortSignal.timeout(3000),
+    });
+  } catch {
+    // Fire-and-forget; pathway memory is a hint store, not a hard gate.
+  }
+}
+
 async function queryHotSwap(taskClass: string, filePath: string, signalClass: string | null): Promise<HotSwapCandidate | null> {
   try {
     const query_vec = buildQueryVec(taskClass, filePath, signalClass);
@@ -1361,7 +1381,9 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
   // service unavailable → null candidate → business as usual.
   const signalClass = await lookupSignalClass(rel);
   const taskClass = "scrum_review";
-  const hotSwap = await queryHotSwap(taskClass, rel, signalClass);
+  // mutable so retire-on-reject can null it after firing — prevents
+  // the post-loop replay bookkeeping from re-touching a retired trace.
+  let hotSwap: HotSwapCandidate | null = await queryHotSwap(taskClass, rel, signalClass);
 
   // ADR-021 Phase C: pre-review enrichment. Pull aggregated bug
   // fingerprints the matrix index has learned for this narrow
@@ -1515,6 +1537,28 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
       const reason = `observer ${obsVerdict.verdict}: ${obsVerdict.notes ?? "no notes"} (conf=${obsVerdict.confidence ?? "?"})`;
       history.push({ n, model: rung.model, status: "thin", chars: r.content.length, error: reason });
       pathwayAttempts.push({ rung: i + 1, model: rung.model, latency_ms: attemptMs, accepted: false, reject_reason: reason });
+      // Audit-consensus → retire: if a hot-swap influenced THIS
+      // attempt (we're trying its recommended model) and observer
+      // explicitly rejects, the matrix recommendation is wrong for
+      // this context. Retire the trace so future agents don't repeat
+      // it. Cycle verdicts ("needs different angle") don't trigger
+      // retire — only outright rejects do. Confidence ≥0.7 gate
+      // avoids retiring on heuristic-fallback verdicts (which return
+      // no confidence).
+      if (
+        hotSwap &&
+        obsVerdict.verdict === "reject" &&
+        rung.model === hotSwap.recommended_model &&
+        (obsVerdict.confidence ?? 0) >= 0.7
+      ) {
+        const retireReason = `observer reject on hot-swap replay: ${obsVerdict.notes ?? "no notes"}`;
+        log(`    🗑 retiring pathway ${hotSwap.trace_uid.slice(0, 8)}… (${retireReason})`);
+        retirePathwayTrace(hotSwap.trace_uid, retireReason);
+        // Null out hotSwap so the post-loop replay bookkeeping doesn't
+        // also try to record success/failure against the now-retired
+        // trace.
+        hotSwap = null;
+      }
       qualityRetriesOnCurrentModel++;
       if (qualityRetriesOnCurrentModel > MAX_QUALITY_RETRIES) {
         log(`    ✗ ${reason} — quality retries exhausted on ${rung.model}, advancing fallback`);