pathway_memory: audit-consensus → retire wire
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
When observer's hand-review explicitly rejects the output of a
hot-swap-recommended model, the matrix's recommendation was wrong
for this context. Auto-retire the trace so future agents don't
get the same poisoned recommendation in their preamble.
crates/vectord/src/pathway_memory.rs — add `trace_uid` to
HotSwapCandidate response and populate from the matched trace.
This gives consumers single-trace precision for /pathway/retire.
tests/real-world/scrum_master_pipeline.ts:
- HotSwapCandidate interface gains trace_uid
- new retirePathwayTrace() helper (fire-and-forget, fall-open)
- in the obsVerdict reject branch: if hotSwap was active AND
the rejected model is the hot-swap-recommended one AND
observer confidence ≥0.7, fire retire and null hotSwap so
post-loop replay bookkeeping doesn't double-process.
- hotSwap declared `let` (was const) so it can be nulled
Cycle verdicts ("needs different angle") don't trigger retire —
only outright rejects do. Confidence gate avoids retiring on
heuristic-fallback verdicts that come back without a confidence
number. Closes the "audit-consensus → retire" item from
HANDOVER.md.
Live-tested: insert synthetic trace → /pathway/retire by trace_uid
→ retired counter 1 → 2.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e1ef185868
commit
626f18d491
@ -373,6 +373,11 @@ pub struct PathwayMemory {
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct HotSwapCandidate {
|
||||
pub pathway_id: String,
|
||||
/// trace_uid of the SPECIFIC trace this hot-swap recommendation
|
||||
/// came from. Lets a caller call /pathway/retire with single-trace
|
||||
/// precision when observer rejects the result — the audit-consensus
|
||||
/// → retire wire (HANDOVER §queued, ADR-021).
|
||||
pub trace_uid: String,
|
||||
pub similarity: f32,
|
||||
pub replay_count: u32,
|
||||
pub success_rate: f32,
|
||||
@ -754,6 +759,7 @@ impl PathwayMemory {
|
||||
let accepted = p.ladder_attempts.iter().find(|a| a.accepted)?;
|
||||
Some(HotSwapCandidate {
|
||||
pathway_id: p.pathway_id.clone(),
|
||||
trace_uid: p.trace_uid.clone(),
|
||||
similarity,
|
||||
replay_count: p.replay_count,
|
||||
success_rate: p.success_rate(),
|
||||
|
||||
@ -224,6 +224,7 @@ function buildQueryVec(taskClass: string, filePath: string, signalClass: string
|
||||
|
||||
interface HotSwapCandidate {
|
||||
pathway_id: string;
|
||||
trace_uid: string;
|
||||
similarity: number;
|
||||
replay_count: number;
|
||||
success_rate: number;
|
||||
@ -231,6 +232,25 @@ interface HotSwapCandidate {
|
||||
recommended_model: string;
|
||||
}
|
||||
|
||||
// Audit-consensus → retire wire (2026-04-25). When observer rejects the
|
||||
// output of a hot-swap-recommended model, the matrix's recommendation
|
||||
// was wrong for this context — retire the trace so future agents don't
|
||||
// get the same poisoned recommendation in their preamble. Server-side
|
||||
// retire is idempotent so duplicate calls are safe.
|
||||
async function retirePathwayTrace(traceUid: string, reason: string): Promise<void> {
|
||||
if (!traceUid) return;
|
||||
try {
|
||||
await fetch(`${GATEWAY}/vectors/pathway/retire`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({ trace_uid: traceUid, reason }),
|
||||
signal: AbortSignal.timeout(3000),
|
||||
});
|
||||
} catch {
|
||||
// Fire-and-forget; pathway memory is a hint store, not a hard gate.
|
||||
}
|
||||
}
|
||||
|
||||
async function queryHotSwap(taskClass: string, filePath: string, signalClass: string | null): Promise<HotSwapCandidate | null> {
|
||||
try {
|
||||
const query_vec = buildQueryVec(taskClass, filePath, signalClass);
|
||||
@ -1361,7 +1381,9 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
|
||||
// service unavailable → null candidate → business as usual.
|
||||
const signalClass = await lookupSignalClass(rel);
|
||||
const taskClass = "scrum_review";
|
||||
const hotSwap = await queryHotSwap(taskClass, rel, signalClass);
|
||||
// mutable so retire-on-reject can null it after firing — prevents
|
||||
// the post-loop replay bookkeeping from re-touching a retired trace.
|
||||
let hotSwap: HotSwapCandidate | null = await queryHotSwap(taskClass, rel, signalClass);
|
||||
|
||||
// ADR-021 Phase C: pre-review enrichment. Pull aggregated bug
|
||||
// fingerprints the matrix index has learned for this narrow
|
||||
@ -1515,6 +1537,28 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
|
||||
const reason = `observer ${obsVerdict.verdict}: ${obsVerdict.notes ?? "no notes"} (conf=${obsVerdict.confidence ?? "?"})`;
|
||||
history.push({ n, model: rung.model, status: "thin", chars: r.content.length, error: reason });
|
||||
pathwayAttempts.push({ rung: i + 1, model: rung.model, latency_ms: attemptMs, accepted: false, reject_reason: reason });
|
||||
// Audit-consensus → retire: if a hot-swap influenced THIS
|
||||
// attempt (we're trying its recommended model) and observer
|
||||
// explicitly rejects, the matrix recommendation is wrong for
|
||||
// this context. Retire the trace so future agents don't repeat
|
||||
// it. Cycle verdicts ("needs different angle") don't trigger
|
||||
// retire — only outright rejects do. Confidence ≥0.7 gate
|
||||
// avoids retiring on heuristic-fallback verdicts (which return
|
||||
// no confidence).
|
||||
if (
|
||||
hotSwap &&
|
||||
obsVerdict.verdict === "reject" &&
|
||||
rung.model === hotSwap.recommended_model &&
|
||||
(obsVerdict.confidence ?? 0) >= 0.7
|
||||
) {
|
||||
const retireReason = `observer reject on hot-swap replay: ${obsVerdict.notes ?? "no notes"}`;
|
||||
log(` 🗑 retiring pathway ${hotSwap.trace_uid.slice(0, 8)}… (${retireReason})`);
|
||||
retirePathwayTrace(hotSwap.trace_uid, retireReason);
|
||||
// Null out hotSwap so the post-loop replay bookkeeping doesn't
|
||||
// also try to record success/failure against the now-retired
|
||||
// trace.
|
||||
hotSwap = null;
|
||||
}
|
||||
qualityRetriesOnCurrentModel++;
|
||||
if (qualityRetriesOnCurrentModel > MAX_QUALITY_RETRIES) {
|
||||
log(` ✗ ${reason} — quality retries exhausted on ${rung.model}, advancing fallback`);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user