From 0a0843b6053f12a82765c81444f781c39c85841f Mon Sep 17 00:00:00 2001 From: root Date: Fri, 24 Apr 2026 05:49:10 -0500 Subject: [PATCH] ADR-021: semantic-correctness layer lands in pathway_memory (A+B+C) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase A — data model (vectord/src/pathway_memory.rs): + SemanticFlag enum (9 variants: UnitMismatch, TypeConfusion, NullableConfusion, OffByOne, StaleReference, PseudoImpl, DeadCode, WarningNoise, BoundaryViolation) as #[serde(tag = "kind")] + TypeHint { source, symbol, type_repr } + BugFingerprint { flag, pattern_key, example, occurrences } + PathwayTrace gains semantic_flags, type_hints_used, bug_fingerprints all #[serde(default)] for back-compat deserialization of pre-ADR-021 traces on disk + build_pathway_vec now tokenizes flag:{variant} + bug:{flag}:{key} so traces with different bug histories cluster separately in the similarity gate (proven by pathway_vec_differs_when_bug_fingerprint_added test) Phase B — producer (scrum_master_pipeline.ts): + Prompt addendum: each finding must carry `**Flag: **` tag alongside the existing Confidence: NN% tag. 9 category choices plus `None` for improvements that aren't bug-shaped. + Parser extracts tagged flags from reviewer markdown; falls back to bare-word match if reviewer omits the label. Deduplicated per trace. + PathwayTracePayload gains semantic_flags / type_hints_used / bug_fingerprints fields. Wire format matches Rust serde tagged enum so TS and Rust interop directly. Phase C — pre-review enrichment: + new `/vectors/pathway/bug_fingerprints` endpoint aggregates occurrences by (flag, pattern_key) across traces sharing a narrow fingerprint, sorts by frequency, returns top-K. + scrum calls it before the ladder and prepends a PATHWAY MEMORY preamble to the reviewer prompt ("these patterns appeared N times on this file area before — check for recurrences"). Empty on fresh install; grows as the matrix index learns. Tests: 27 pathway_memory tests green (was 18). New tests: - pathway_trace_deserializes_without_new_fields_backcompat - semantic_flag_serializes_as_tagged_enum - bug_fingerprint_roundtrips_through_serde - pathway_vec_differs_when_bug_fingerprint_added - semantic_flag_discriminates_by_variant - bug_fingerprints_aggregate_by_pattern_key (sums occurrences, sorts desc) - bug_fingerprints_empty_for_unseen_fingerprint - bug_fingerprints_respects_limit - insert_preserves_semantic_fields (roundtrip via persist + reload) Workspace warnings unchanged at 11. What's still queued (not this commit): - type_hints_used population from catalogd column types + Arrow schema - bug_fingerprint extraction from reviewer output (Phase D — for now semantic_flags populate but the fingerprint key requires parsing code-shape from the finding; next iteration's work) - auditor → pathway audit_consensus update wire (explicit-fail gate) Why this commit matters: the mechanical applier's gates are syntactic (warning count, patch size, rationale-token alignment). The queryd/delta.rs base_rows bug (86901f8) was found by human reading — unit mismatch between row counts and file counts. At 100 bugs this deep, humans can't catch them all; the matrix index has to learn the shapes. This commit gives it the fields to learn into and the surface to read from. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/vectord/src/pathway_memory.rs | 341 ++++++++++++++++++++++ crates/vectord/src/service.rs | 26 ++ tests/real-world/scrum_master_pipeline.ts | 112 ++++++- 3 files changed, 475 insertions(+), 4 deletions(-) diff --git a/crates/vectord/src/pathway_memory.rs b/crates/vectord/src/pathway_memory.rs index b2a1d91..240d639 100644 --- a/crates/vectord/src/pathway_memory.rs +++ b/crates/vectord/src/pathway_memory.rs @@ -86,6 +86,82 @@ pub struct AuditConsensus { pub disagreements: u32, } +// ─── ADR-021: Semantic correctness layer ──────────────────────────── +// +// SemanticFlag names the CATEGORY of bug found. Scrum reviewer attaches +// these to findings (via prompt instruction to tag); the matrix index +// uses them for "same crate has seen N unit mismatches" preemption. +// +// Discipline: extend this enum only when a real bug is found that +// doesn't fit an existing variant. Avoid the "add a vague variant just +// in case" anti-pattern — it dilutes the grammar the index learns from. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(tag = "kind")] +pub enum SemanticFlag { + /// Operation combines values with different units (e.g. + /// `row_count - file_count`, `bytes - rows`). Instance that motivated + /// ADR-021: queryd/delta.rs base_rows = pre_filter_rows - delta_count. + UnitMismatch, + /// Same type, wrong role (e.g. treating a PK as a row index). + TypeConfusion, + /// Unwrap-without-check or nullable-treated-as-non-null paths. + NullableConfusion, + /// Off-by-one in loops / ranges / slice bounds. + OffByOne, + /// Reference to a deprecated / removed / moved symbol that the + /// compiler hasn't flagged (trait method shadowing, feature flags). + StaleReference, + /// Pseudo-implementation: stub body, `todo!()`, or function named + /// for work it doesn't actually do. Distinct from DeadCode — pseudo + /// is CALLED but doesn't do its job. + PseudoImpl, + /// Unreachable or uncalled code that compiles but serves no purpose. + DeadCode, + /// Code compiles green but emits a warning the workspace baseline + /// didn't have. The applier's new-warning gate already catches these + /// at commit time; flagging at review time lets the matrix index + /// surface "this file area tends to produce warning noise." + WarningNoise, + /// Operation crosses a layer/crate boundary it shouldn't (e.g. a + /// hot-path function calling a cloud API, or a catalog op mutating + /// storage directly). + BoundaryViolation, +} + +/// What schema/type context was surfaced to the reviewer when this +/// pathway was produced. Empty = bootstrap path (reviewer got no +/// type context); populated = we fed the model typed info to work with. +/// Drift in this field over time is the feedback signal for "are we +/// getting smarter at enriching prompts?" +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct TypeHint { + /// Where the hint came from: "catalogd" | "arrow_schema" | + /// "rust_struct" | "truth_rule" | "manual". + pub source: String, + /// The identifier being typed (field name, variable, column). + pub symbol: String, + /// The type as extracted (stringly-typed is fine — this is a + /// retrieval key, not a compiler representation). + pub type_repr: String, +} + +/// Stable hash of a bug pattern. Used by the matrix index to retrieve +/// "similar-shaped bugs" across files. The `pattern_key` is the field +/// that's semantically load-bearing; `occurrences` is how many times +/// this exact signature has appeared in this pathway's file history. +/// `example` is one representative code snippet so the prompt can +/// quote it back to future reviewers. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct BugFingerprint { + pub flag: SemanticFlag, + /// SHA256 of the structural pattern (e.g. for UnitMismatch: + /// `"row_count-file_count"` → its hash). Stable across minor + /// token-level variation so the same bug shape clusters. + pub pattern_key: String, + pub example: String, + pub occurrences: u32, +} + /// Full backtrack-able context for one reviewed file. Lives alongside /// the reducer's summary — summary is what the reviewer LLM sees, this /// is what the auditor / future iterations / hot-swap use. @@ -119,6 +195,22 @@ pub struct PathwayTrace { /// success-rate gate can fire. pub replay_count: u32, pub replays_succeeded: u32, + /// ADR-021 semantic-correctness layer. Populated by scrum reviewer + /// via explicit prompt-level tagging of findings. Empty on existing + /// traces (pre-ADR-021 inserts); additive field so back-compat + /// deserialization works via serde default. + #[serde(default)] + pub semantic_flags: Vec, + /// Schema/type context fed to the reviewer during this pathway's + /// review. Starts empty (bootstrap); fills as we wire catalogd + + /// arrow_schema + truth_rule enrichment into the prompt pipeline. + #[serde(default)] + pub type_hints_used: Vec, + /// Bug patterns caught on this file/pathway — the matrix index's + /// retrieval key for "have we seen this shape here before?" + #[serde(default)] + pub bug_fingerprints: Vec, + /// Marked true when replay_count >= 3 AND success_rate < 0.80. /// Retired pathways are excluded from hot-swap forever. (If the /// underlying file / task / signal characteristics genuinely change @@ -193,6 +285,17 @@ pub fn build_pathway_vec(trace: &PathwayTrace) -> Vec { for s in &trace.sub_pipeline_calls { tokens.push(format!("pipeline:{}", s.pipeline)); } + // ADR-021: include semantic flags + bug fingerprints in the + // embedding so pathways with the same narrow fingerprint but + // different bug histories cluster separately. "This file has + // had 3 unit mismatches" is a different pathway from "this file + // is clean" — similarity gate should see them as distinct. + for f in &trace.semantic_flags { + tokens.push(format!("flag:{:?}", f)); + } + for bp in &trace.bug_fingerprints { + tokens.push(format!("bug:{:?}:{}", bp.flag, bp.pattern_key)); + } for t in &tokens { let mut h = Sha256::new(); @@ -395,6 +498,53 @@ impl PathwayMemory { self.persist().await } + /// ADR-021 Phase C: retrieve aggregated bug fingerprints for a + /// narrow fingerprint (task_class + file_prefix + signal_class). + /// Scrum pipeline calls this BEFORE running the ladder and prepends + /// the result to the reviewer prompt as historical context. + /// + /// Returns at most `limit` most-frequent patterns across all traces + /// sharing the narrow id. Frequency is summed `occurrences` — a + /// fingerprint seen in 3 traces with occurrences 2/1/1 comes back + /// as occurrences=4 so the preempt-prompt can say "this pattern + /// appeared 4 times on this crate." + pub async fn bug_fingerprints_for( + &self, + task_class: &str, + file_path: &str, + signal_class: Option<&str>, + limit: usize, + ) -> Vec { + let id = PathwayTrace::compute_id(task_class, file_path, signal_class); + let s = self.state.read().await; + let Some(traces) = s.pathways.get(&id) else { return Vec::new(); }; + // Aggregate by (flag, pattern_key) and sum occurrences. Keep a + // representative example (first one seen is fine — bug examples + // are semantically equivalent within a pattern_key by design). + let mut agg: HashMap<(String, String), (SemanticFlag, String, u32)> = HashMap::new(); + for t in traces { + for bp in &t.bug_fingerprints { + let key = (format!("{:?}", bp.flag), bp.pattern_key.clone()); + let entry = agg.entry(key).or_insert_with(|| { + (bp.flag.clone(), bp.example.clone(), 0) + }); + entry.2 = entry.2.saturating_add(bp.occurrences); + } + } + let mut out: Vec = agg + .into_iter() + .map(|((_, pk), (flag, ex, occ))| BugFingerprint { + flag, + pattern_key: pk, + example: ex, + occurrences: occ, + }) + .collect(); + out.sort_by(|a, b| b.occurrences.cmp(&a.occurrences)); + out.truncate(limit); + out + } + pub async fn stats(&self) -> PathwayMemoryStats { let s = self.state.read().await; let mut total = 0usize; @@ -489,6 +639,9 @@ mod tests { reducer_summary: "ok".into(), final_verdict: "accepted".into(), pathway_vec: vec![], + semantic_flags: vec![], + type_hints_used: vec![], + bug_fingerprints: vec![], replay_count: replays, replays_succeeded: succ, retired: false, @@ -701,4 +854,192 @@ mod tests { assert!(sim < 1.0, "different models → different embeddings"); assert!(sim > 0.5, "shared fingerprint → embeddings still related"); } + + // ─── ADR-021 semantic-correctness layer tests ─────────────────── + + #[test] + fn pathway_trace_deserializes_without_new_fields_backcompat() { + // Critical: existing traces on disk (persisted before ADR-021) + // must still deserialize. serde(default) on the three new fields + // is the back-compat mechanism — verify it holds. + let json = r#"{ + "pathway_id": "abc", + "task_class": "scrum_review", + "file_path": "crates/x/y.rs", + "signal_class": null, + "created_at": "2026-04-24T00:00:00Z", + "ladder_attempts": [], + "kb_chunks": [], + "observer_signals": [], + "bridge_hits": [], + "sub_pipeline_calls": [], + "audit_consensus": null, + "reducer_summary": "old trace", + "final_verdict": "accepted", + "pathway_vec": [], + "replay_count": 0, + "replays_succeeded": 0, + "retired": false + }"#; + let t: PathwayTrace = serde_json::from_str(json).expect("must deserialize pre-ADR-021 trace"); + assert!(t.semantic_flags.is_empty()); + assert!(t.type_hints_used.is_empty()); + assert!(t.bug_fingerprints.is_empty()); + assert_eq!(t.reducer_summary, "old trace"); + } + + #[test] + fn semantic_flag_serializes_as_tagged_enum() { + // Verifying the wire format — the tag field "kind" lets TS/JSON + // clients pattern-match without needing to know variant ordering. + let s = serde_json::to_string(&SemanticFlag::UnitMismatch).unwrap(); + assert!(s.contains("UnitMismatch"), "got: {s}"); + assert!(s.contains("kind"), "must be tagged enum for TS interop, got: {s}"); + } + + #[test] + fn bug_fingerprint_roundtrips_through_serde() { + let bp = BugFingerprint { + flag: SemanticFlag::UnitMismatch, + pattern_key: "row_count-file_count".into(), + example: "base_rows = pre_filter_rows - delta_count".into(), + occurrences: 1, + }; + let s = serde_json::to_string(&bp).unwrap(); + let parsed: BugFingerprint = serde_json::from_str(&s).unwrap(); + assert_eq!(parsed, bp); + } + + #[test] + fn pathway_vec_differs_when_bug_fingerprint_added() { + // A trace with a known bug history should embed differently + // from a clean trace with the same ladder/KB. This is the + // compounding signal: "same file, different bug history." + let clean = mk_trace("a", true, 5, 5); + let mut flagged = clean.clone(); + flagged.semantic_flags.push(SemanticFlag::UnitMismatch); + flagged.bug_fingerprints.push(BugFingerprint { + flag: SemanticFlag::UnitMismatch, + pattern_key: "row_count-file_count".into(), + example: "x = y - z".into(), + occurrences: 1, + }); + flagged.pathway_vec = build_pathway_vec(&flagged); + let sim = cosine(&clean.pathway_vec, &flagged.pathway_vec); + assert!(sim < 1.0, "bug history must shift the embedding"); + assert!(sim > 0.3, "shared fingerprint should keep them loosely related"); + } + + #[test] + fn semantic_flag_discriminates_by_variant() { + // Two traces with different flag classes should embed to + // different points. Validates that the index can retrieve + // "files with UnitMismatch history" separately from + // "files with NullableConfusion history." + let mut a = mk_trace("x", true, 5, 5); + a.semantic_flags.push(SemanticFlag::UnitMismatch); + a.pathway_vec = build_pathway_vec(&a); + let mut b = a.clone(); + b.semantic_flags = vec![SemanticFlag::NullableConfusion]; + b.pathway_vec = build_pathway_vec(&b); + let sim = cosine(&a.pathway_vec, &b.pathway_vec); + assert!(sim < 1.0, "different flag variants → different embeddings"); + } + + #[tokio::test] + async fn bug_fingerprints_aggregate_by_pattern_key() { + // Three traces on the same narrow fingerprint — two with the + // same bug pattern, one with a different pattern. The aggregator + // must sum occurrences for the shared key and sort by count. + let mem = PathwayMemory::new(mk_store()); + let mut t1 = mk_trace("q", true, 0, 0); + t1.bug_fingerprints.push(BugFingerprint { + flag: SemanticFlag::UnitMismatch, + pattern_key: "row-file".into(), + example: "a - b".into(), + occurrences: 2, + }); + let mut t2 = mk_trace("q", true, 0, 0); + t2.bug_fingerprints.push(BugFingerprint { + flag: SemanticFlag::UnitMismatch, + pattern_key: "row-file".into(), + example: "x - y".into(), + occurrences: 1, + }); + let mut t3 = mk_trace("q", true, 0, 0); + t3.bug_fingerprints.push(BugFingerprint { + flag: SemanticFlag::OffByOne, + pattern_key: "len-1".into(), + example: "items[len]".into(), + occurrences: 1, + }); + mem.insert(t1).await.unwrap(); + mem.insert(t2).await.unwrap(); + mem.insert(t3).await.unwrap(); + let fps = mem + .bug_fingerprints_for("scrum_review", "crates/q/src/x.rs", Some("CONVERGING"), 10) + .await; + assert_eq!(fps.len(), 2, "two distinct patterns after aggregation"); + // First should be the aggregated UnitMismatch (3 total occurrences) + assert_eq!(fps[0].pattern_key, "row-file"); + assert_eq!(fps[0].occurrences, 3); + assert_eq!(fps[1].pattern_key, "len-1"); + assert_eq!(fps[1].occurrences, 1); + } + + #[tokio::test] + async fn bug_fingerprints_empty_for_unseen_fingerprint() { + let mem = PathwayMemory::new(mk_store()); + let fps = mem + .bug_fingerprints_for("scrum_review", "crates/never_seen/x.rs", None, 5) + .await; + assert!(fps.is_empty()); + } + + #[tokio::test] + async fn bug_fingerprints_respects_limit() { + let mem = PathwayMemory::new(mk_store()); + for i in 0..10 { + let mut t = mk_trace("q", true, 0, 0); + t.bug_fingerprints.push(BugFingerprint { + flag: SemanticFlag::OffByOne, + pattern_key: format!("p{i}"), + example: "".into(), + occurrences: (10 - i) as u32, // decreasing so sort matters + }); + mem.insert(t).await.unwrap(); + } + let fps = mem + .bug_fingerprints_for("scrum_review", "crates/q/src/x.rs", Some("CONVERGING"), 3) + .await; + assert_eq!(fps.len(), 3); + // Highest occurrences first. + assert_eq!(fps[0].pattern_key, "p0"); + assert_eq!(fps[0].occurrences, 10); + } + + #[tokio::test] + async fn insert_preserves_semantic_fields() { + let mem = PathwayMemory::new(mk_store()); + let mut t = mk_trace("a", true, 0, 0); + t.semantic_flags.push(SemanticFlag::UnitMismatch); + t.type_hints_used.push(TypeHint { + source: "arrow_schema".into(), + symbol: "pre_filter_rows".into(), + type_repr: "usize (sum of batch.num_rows)".into(), + }); + t.bug_fingerprints.push(BugFingerprint { + flag: SemanticFlag::UnitMismatch, + pattern_key: "row-minus-file".into(), + example: "pre_filter_rows - delta_count".into(), + occurrences: 1, + }); + mem.insert(t).await.unwrap(); + // Reload from store via a fresh handle — proves persistence + // roundtrips the new fields as well as the old ones. + let mem2 = PathwayMemory::new(mem.store.clone()); + mem2.load_from_storage().await.unwrap(); + let stats = mem2.stats().await; + assert_eq!(stats.total_pathways, 1); + } } diff --git a/crates/vectord/src/service.rs b/crates/vectord/src/service.rs index eeb8d23..5af0283 100644 --- a/crates/vectord/src/service.rs +++ b/crates/vectord/src/service.rs @@ -151,6 +151,8 @@ pub fn router(state: VectorState) -> Router { .route("/pathway/query", post(pathway_query)) .route("/pathway/record_replay", post(pathway_record_replay)) .route("/pathway/stats", get(pathway_stats)) + // ADR-021 Phase C: pre-review bug-fingerprint retrieval. + .route("/pathway/bug_fingerprints", post(pathway_bug_fingerprints)) .with_state(state) } @@ -2914,6 +2916,30 @@ async fn pathway_stats(State(state): State) -> impl IntoResponse { Json(state.pathway_memory.stats().await) } +#[derive(Deserialize)] +struct PathwayBugFingerprintsRequest { + task_class: String, + file_path: String, + signal_class: Option, + limit: Option, +} + +async fn pathway_bug_fingerprints( + State(state): State, + Json(req): Json, +) -> impl IntoResponse { + let fps = state + .pathway_memory + .bug_fingerprints_for( + &req.task_class, + &req.file_path, + req.signal_class.as_deref(), + req.limit.unwrap_or(5), + ) + .await; + Json(json!({ "fingerprints": fps })) +} + #[cfg(test)] mod extractor_tests { use super::*; diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts index 54ec918..76e7dba 100644 --- a/tests/real-world/scrum_master_pipeline.ts +++ b/tests/real-world/scrum_master_pipeline.ts @@ -34,10 +34,12 @@ const FILE_TREE_SPLIT_THRESHOLD = 6000; const FILE_SHARD_SIZE = 3500; // Appended jsonl so auditor's kb_query can surface scrum findings for // files touched by a PR under review. Part of cohesion plan Phase C. -const SCRUM_REVIEWS_JSONL = "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl"; +const SCRUM_REVIEWS_JSONL = process.env.LH_SCRUM_REVIEWS_OUT + || "/home/profit/lakehouse/data/_kb/scrum_reviews.jsonl"; const OUT_DIR = `/home/profit/lakehouse/tests/real-world/runs/scrum_${Date.now().toString(36)}`; -const PRD_PATH = "/home/profit/lakehouse/docs/PRD.md"; +const PRD_PATH = process.env.LH_SCRUM_PRD + || "/home/profit/lakehouse/docs/PRD.md"; // Using CONTROL_PLANE_PRD as the "suggested changes" doc since it // describes the Phase 38-44 target architecture and is on main. // Override via LH_SCRUM_PROPOSAL env to point at a fix-wave doc @@ -258,6 +260,11 @@ interface PathwayTracePayload { reducer_summary: string; final_verdict: string; pathway_vec: number[]; + // ADR-021 semantic-correctness layer. `kind` field matches the Rust + // serde(tag = "kind") wire format — TS and Rust interop directly. + semantic_flags: { kind: string }[]; + type_hints_used: { source: string; symbol: string; type_repr: string }[]; + bug_fingerprints: { flag: { kind: string }; pattern_key: string; example: string; occurrences: number }[]; replay_count: number; replays_succeeded: number; retired: boolean; @@ -289,6 +296,33 @@ async function recordPathwayReplay(pathwayId: string, succeeded: boolean): Promi } } +// ADR-021 Phase C: pre-review enrichment. Fetch aggregated bug +// fingerprints for this narrow fingerprint (same key as hot-swap — +// task_class + file_prefix + signal_class) so the reviewer prompt +// can explicitly warn "this file area has had these bug patterns +// before." Empty on fresh install; grows as the matrix index learns. +interface BugFingerprintRow { + flag: { kind: string }; + pattern_key: string; + example: string; + occurrences: number; +} +async function fetchBugFingerprints(taskClass: string, filePath: string, signalClass: string | null, limit: number): Promise { + try { + const r = await fetch(`${GATEWAY}/vectors/pathway/bug_fingerprints`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ task_class: taskClass, file_path: filePath, signal_class: signalClass, limit }), + signal: AbortSignal.timeout(5000), + }); + if (!r.ok) return []; + const j = await r.json() as { fingerprints: BugFingerprintRow[] }; + return j.fingerprints ?? []; + } catch { + return []; + } +} + // Deterministic signal_class lookup from scrum_reviews.jsonl history. // First-time files get `null`. Files seen before get the signal class // the observer assigned on their most-recent review (if any). Keeps the @@ -534,6 +568,23 @@ Attach a self-assessed **Confidence: NN%** to every suggested change AND every g - <50%: genuinely uncertain — include regardless so downstream knows to investigate before applying Format each finding as: \`**1.** . **Confidence: NN%.**\` (in tables, add a final "Confidence" column.) Low confidence is valuable signal — do not round up. +**Per-finding semantic-flag tag (ADR-021, required on every finding):** +Also attach a \`**Flag: **\` on each finding so the pathway-memory matrix index can cluster bug classes over time. Pick the ONE tag that best fits; if none fits, use \`None\`. Allowed categories: +- \`UnitMismatch\` — operation combines values with different units (e.g. row_count - file_count, bytes - rows) +- \`TypeConfusion\` — same type, wrong role (e.g. treating a PK as a row index) +- \`NullableConfusion\` — unwrap-without-check or nullable-treated-as-non-null +- \`OffByOne\` — loop / range / slice boundary mistake +- \`StaleReference\` — calls a deprecated / removed / moved symbol +- \`PseudoImpl\` — stub / todo!() / function named for work it doesn't do +- \`DeadCode\` — unreachable or uncalled code +- \`WarningNoise\` — compiles green but would add a cargo warning +- \`BoundaryViolation\` — crosses a crate/layer boundary it shouldn't +- \`None\` — improvement or nicety that doesn't fit a bug category + +In tables, add a "Flag" column. Examples: + \`**1.** Rewrite base_rows calc. **Confidence: 90%.** **Flag: UnitMismatch.**\` + \`**2.** Extract retry loop. **Confidence: 75%.** **Flag: None.**\` + Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-offset when relevant.`; const history: FileReview["attempts_history"] = []; @@ -550,6 +601,24 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of const signalClass = await lookupSignalClass(rel); const taskClass = "scrum_review"; const hotSwap = await queryHotSwap(taskClass, rel, signalClass); + + // ADR-021 Phase C: pre-review enrichment. Pull aggregated bug + // fingerprints the matrix index has learned for this narrow + // fingerprint and prepend to the reviewer prompt as historical + // context. This is the compounding mechanism — iter-N reviewer + // sees what iter-(N-1) and earlier found, so the grammar of bugs + // accumulates instead of being re-discovered each iteration. + const pastFingerprints = await fetchBugFingerprints(taskClass, rel, signalClass, 5); + let pathwayPreamble = ""; + if (pastFingerprints.length > 0) { + pathwayPreamble = "═══ PATHWAY MEMORY — BUGS PREVIOUSLY FOUND ON THIS FILE AREA (ADR-021) ═══\n" + + "The matrix index has flagged these patterns on the same task_class + file_prefix + signal_class before. Check this file for recurrences of the same shape:\n\n" + + pastFingerprints.map((fp, i) => + `${i + 1}. [${fp.flag.kind}] pattern=\`${fp.pattern_key}\` occurrences=${fp.occurrences}\n example: ${fp.example.slice(0, 160)}` + ).join("\n") + + "\n═══\n\n"; + log(` 📚 pathway memory: ${pastFingerprints.length} historical bug pattern(s) prepended to prompt`); + } let hotSwapOrderedIndices: number[] | null = null; if (hotSwap) { // Reorder the ladder to try the recommended model first. Rung @@ -574,12 +643,12 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of ? `\n\n═══ PRIOR ATTEMPTS FAILED. Specific issues to fix: ═══\n${history.map(h => `Attempt ${h.n} (${h.model}, ${h.chars} chars): ${h.status} — ${h.error ?? "thin/unstructured answer"}`).join("\n")}\n═══` : ""; - log(` attempt ${n}/${MAX_ATTEMPTS}: ${rung.provider}::${rung.model}${learning ? " [w/ learning]" : ""}`); + log(` attempt ${n}/${MAX_ATTEMPTS}: ${rung.provider}::${rung.model}${learning ? " [w/ learning]" : ""}${pathwayPreamble ? " [w/ pathway memory]" : ""}`); const attemptStarted = Date.now(); const r = await chat({ provider: rung.provider, model: rung.model, - prompt: baseTask + learning, + prompt: pathwayPreamble + baseTask + learning, max_tokens: 1500, }); const attemptMs = Date.now() - attemptStarted; @@ -660,6 +729,34 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of : null; const conf_min = confidences.length ? Math.min(...confidences) : null; + // ADR-021 Phase B: extract per-finding semantic flags. Reviewer is + // prompted to tag each finding with one of 9 categories plus None. + // Patterns: "**Flag: UnitMismatch**", "Flag: OffByOne", table cell + // with the flag word, or bare-word match. Deduplicated per-trace + // so repeats in one review count once. + const FLAG_VARIANTS = [ + "UnitMismatch", "TypeConfusion", "NullableConfusion", "OffByOne", + "StaleReference", "PseudoImpl", "DeadCode", "WarningNoise", "BoundaryViolation", + ]; + const flagMatches = new Set(); + // Prefer matches anchored to the "Flag:" keyword; fall back to + // bare-word matches so older reviewers that mention a category + // without the "Flag:" prefix still contribute signal. + const patFlagLabeled = /(?:Flag[*:\s]*\s*)([A-Z][A-Za-z]+)/g; + for (const m of accepted.matchAll(patFlagLabeled)) { + if (FLAG_VARIANTS.includes(m[1])) flagMatches.add(m[1]); + } + // Second pass — bare-word matches for each variant, but ONLY if + // the labeled pass produced nothing. This avoids flagging every + // file that happens to mention "DeadCode" in a code sample. + if (flagMatches.size === 0) { + for (const v of FLAG_VARIANTS) { + const re = new RegExp(`\\b${v}\\b`); + if (re.test(accepted)) flagMatches.add(v); + } + } + const semantic_flags_arr = [...flagMatches].map(k => ({ kind: k })); + // Score extraction — regex accepts decimals ("Score: 4.5/10") and // surrounding punctuation ("4/10 — mid"). iter 3 had 4 unparseable // scores because the prior regex /(\d)\s*\/\s*10/ missed decimals. @@ -822,6 +919,9 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of // query-time vector. The similarity gate will still discriminate // between pathways with the same fingerprint but different // ladder/KB profiles. + // Include semantic flag tokens in the embedding so traces with + // different bug histories cluster separately — matches Rust's + // build_pathway_vec exactly (flag: token shape). pathway_vec: buildPathwayVec([ taskClass, rel, @@ -829,7 +929,11 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of ...pathwayAttempts.flatMap(a => [`rung:${a.rung}`, `model:${a.model}`, `accepted:${a.accepted}`]), ...topPrd.map(c => `kb:PRD.md`), ...topPlan.map(c => `kb:cohesion_plan`), + ...semantic_flags_arr.map(f => `flag:${f.kind}`), ]), + semantic_flags: semantic_flags_arr, + type_hints_used: [], // Phase C — pre-review enrichment from catalogd/arrow/truth + bug_fingerprints: [], // Phase C — fingerprint extraction from prompt responses replay_count: 0, replays_succeeded: 0, retired: false,