From ee31424d0c7ad484ea35049de069670b608aee18 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 24 Apr 2026 06:02:07 -0500 Subject: [PATCH] ADR-021 Phase D: bug_fingerprint pattern extraction from reviewer output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fills the gap between Phase B (flags tagged) and Phase C (preamble quotes past fingerprints): parses each reviewer line that mentions a Flag variant, collects backtick-quoted identifiers, canonicalizes them (sorted alphabetically, top 3), and emits a stable pattern_key of shape `{Flag}:{tok1}-{tok2}-{tok3}`. Stability by design: canonical sort means "row_count + QueryResponse" and "QueryResponse + row_count" produce the same key, so variation in reviewer prose doesn't fragment the index. Top-3 cap keeps keys short while retaining enough signal to separate different bugs of the same category. Dry-run validation on iter-8 delta.rs output (crates/queryd prefix) extracted 10 semantically meaningful fingerprints including: - UnitMismatch:base_rows-checked_add-checked_sub - DeadCode:queryd::delta::write_delta (P9-001 dead-function finding) - BoundaryViolation:can_access-log_query-masked_columns (P13-001 gap) - NullableConfusion:CompactResult-DeltaError-IntegerOverflow Cross-cutting signal: kimi-k2:1t's finding #5 explicitly quoted the seeded pathway memory preamble ("Pathway memory flags row_count- file_count unit mismatch") and proposed overflow-checked arithmetic as the fix. That is the compounding loop in action — prior bug context shifted the reviewer's attention toward a specific instance of the same class, which produces a specific pattern_key that will compound further on the next iter. Filter: identifier-shaped tokens only (A-Za-z_ / :: paths / snake_case / CamelCase). Skips punctuation, prose quotes, and tokens <3 chars so generic nouns and partial words don't pollute the index. What's still queued (Phase E): - type_hints_used population from catalogd column types + Arrow schema - auditor → pathway audit_consensus update wire (strict-audit gate activation) Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/real-world/scrum_master_pipeline.ts | 80 ++++++++++++++++++++++- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts index 76e7dba..f5cfff4 100644 --- a/tests/real-world/scrum_master_pipeline.ts +++ b/tests/real-world/scrum_master_pipeline.ts @@ -757,6 +757,82 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of } const semantic_flags_arr = [...flagMatches].map(k => ({ kind: k })); + // ADR-021 Phase D: bug_fingerprint extraction. + // + // Walk per-finding rows (either table format with columns + // `Change | Flag | Confidence` OR bullet-list with inline + // `**Flag: X.**` tag) and pair each flag with the surrounding + // finding text. Then derive a stable pattern_key from code + // identifiers the finding cites in backticks, so future reviews + // of similar bugs cluster under the same key. + // + // v1 is heuristic (regex + identifier extraction + canonical + // sort). It's intentionally NOT a semantic extractor — just a + // deterministic "take the top code-shaped tokens and hash them + // with the flag." Stability comes from sorting tokens alphabetically + // before hashing so "row_count + QueryResponse" and "QueryResponse + // + row_count" produce the same key. + const bug_fingerprints_arr: Array<{ + flag: { kind: string }; + pattern_key: string; + example: string; + occurrences: number; + }> = []; + { + // Split into candidate finding blocks. Both formats are row- + // oriented, so a line split is a reasonable starting point. + // Findings tend to be one-line table rows OR multi-line bullets + // starting with **N.** — we handle both by looking at any line + // that mentions a Flag variant and treating it as a finding. + const lines = accepted.split(/\r?\n/); + const seenKeys = new Set(); + for (const line of lines) { + // Find the flag variant on this line (if any). + let variantOnLine: string | null = null; + for (const v of FLAG_VARIANTS) { + const re = new RegExp(`\\b${v}\\b`); + if (re.test(line)) { variantOnLine = v; break; } + } + if (!variantOnLine) continue; + + // Extract identifier-shaped backtick-quoted tokens. These are + // the names the finding is about — field names, type names, + // function names, path expressions (A::B). + const codeTokens: string[] = []; + for (const m of line.matchAll(/`([^`]+)`/g)) { + const raw = m[1].trim(); + // Filter to things that look like identifiers or paths. Skip + // punctuation, spaces, SQL keywords, and things that look + // like prose quotes. + if (!/^[A-Za-z_][A-Za-z0-9_:]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?$/.test(raw)) continue; + if (raw.length < 3) continue; + codeTokens.push(raw); + } + if (codeTokens.length === 0) continue; + + // Canonicalize: dedupe, sort alphabetically, take top 3. + // Alphabetical sort gives stability across "A then B" / "B then A" + // variants. Top 3 keeps the key short while retaining enough + // signal for different bugs to separate. + const uniqTokens = [...new Set(codeTokens)].sort().slice(0, 3); + const pattern_key = `${variantOnLine}:${uniqTokens.join("-")}`; + if (seenKeys.has(pattern_key)) continue; + seenKeys.add(pattern_key); + + // Example: the finding line, trimmed + truncated. Preserves + // just enough context that the pre-review preamble in the + // next iter can quote it back to the reviewer meaningfully. + const example = line.replace(/\s+/g, " ").trim().slice(0, 200); + + bug_fingerprints_arr.push({ + flag: { kind: variantOnLine }, + pattern_key, + example, + occurrences: 1, + }); + } + } + // Score extraction — regex accepts decimals ("Score: 4.5/10") and // surrounding punctuation ("4/10 — mid"). iter 3 had 4 unparseable // scores because the prior regex /(\d)\s*\/\s*10/ missed decimals. @@ -932,8 +1008,8 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of ...semantic_flags_arr.map(f => `flag:${f.kind}`), ]), semantic_flags: semantic_flags_arr, - type_hints_used: [], // Phase C — pre-review enrichment from catalogd/arrow/truth - bug_fingerprints: [], // Phase C — fingerprint extraction from prompt responses + type_hints_used: [], // Phase E — pre-review enrichment from catalogd/arrow/truth + bug_fingerprints: bug_fingerprints_arr, // ADR-021 Phase D replay_count: 0, replays_succeeded: 0, retired: false,