From ee31424d0c7ad484ea35049de069670b608aee18 Mon Sep 17 00:00:00 2001
From: root <root@island37.com>
Date: Fri, 24 Apr 2026 06:02:07 -0500
Subject: [PATCH] ADR-021 Phase D: bug_fingerprint pattern extraction from
 reviewer output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fills the gap between Phase B (flags tagged) and Phase C (preamble
quotes past fingerprints): parses each reviewer line that mentions a
Flag variant, collects backtick-quoted identifiers, canonicalizes them
(sorted alphabetically, top 3), and emits a stable pattern_key of
shape `{Flag}:{tok1}-{tok2}-{tok3}`.

Stability by design: canonical sort means "row_count + QueryResponse"
and "QueryResponse + row_count" produce the same key, so variation in
reviewer prose doesn't fragment the index. Top-3 cap keeps keys short
while retaining enough signal to separate different bugs of the same
category.

Dry-run validation on iter-8 delta.rs output (crates/queryd prefix)
extracted 10 semantically meaningful fingerprints including:
  - UnitMismatch:base_rows-checked_add-checked_sub
  - DeadCode:queryd::delta::write_delta (P9-001 dead-function finding)
  - BoundaryViolation:can_access-log_query-masked_columns (P13-001 gap)
  - NullableConfusion:CompactResult-DeltaError-IntegerOverflow

Cross-cutting signal: kimi-k2:1t's finding #5 explicitly quoted the
seeded pathway memory preamble ("Pathway memory flags row_count-
file_count unit mismatch") and proposed overflow-checked arithmetic as
the fix. That is the compounding loop in action — prior bug context
shifted the reviewer's attention toward a specific instance of the
same class, which produces a specific pattern_key that will compound
further on the next iter.

Filter: identifier-shaped tokens only (A-Za-z_ / :: paths / snake_case
/ CamelCase). Skips punctuation, prose quotes, and tokens <3 chars so
generic nouns and partial words don't pollute the index.

What's still queued (Phase E):
  - type_hints_used population from catalogd column types + Arrow schema
  - auditor → pathway audit_consensus update wire (strict-audit gate
    activation)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/real-world/scrum_master_pipeline.ts | 80 ++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 2 deletions(-)

diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts
index 76e7dba..f5cfff4 100644
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@@ -757,6 +757,82 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
     }
     const semantic_flags_arr = [...flagMatches].map(k => ({ kind: k }));
 
+    // ADR-021 Phase D: bug_fingerprint extraction.
+    //
+    // Walk per-finding rows (either table format with columns
+    // `Change | Flag | Confidence` OR bullet-list with inline
+    // `**Flag: X.**` tag) and pair each flag with the surrounding
+    // finding text. Then derive a stable pattern_key from code
+    // identifiers the finding cites in backticks, so future reviews
+    // of similar bugs cluster under the same key.
+    //
+    // v1 is heuristic (regex + identifier extraction + canonical
+    // sort). It's intentionally NOT a semantic extractor — just a
+    // deterministic "take the top code-shaped tokens and hash them
+    // with the flag." Stability comes from sorting tokens alphabetically
+    // before hashing so "row_count + QueryResponse" and "QueryResponse
+    // + row_count" produce the same key.
+    const bug_fingerprints_arr: Array<{
+      flag: { kind: string };
+      pattern_key: string;
+      example: string;
+      occurrences: number;
+    }> = [];
+    {
+      // Split into candidate finding blocks. Both formats are row-
+      // oriented, so a line split is a reasonable starting point.
+      // Findings tend to be one-line table rows OR multi-line bullets
+      // starting with **N.** — we handle both by looking at any line
+      // that mentions a Flag variant and treating it as a finding.
+      const lines = accepted.split(/\r?\n/);
+      const seenKeys = new Set<string>();
+      for (const line of lines) {
+        // Find the flag variant on this line (if any).
+        let variantOnLine: string | null = null;
+        for (const v of FLAG_VARIANTS) {
+          const re = new RegExp(`\\b${v}\\b`);
+          if (re.test(line)) { variantOnLine = v; break; }
+        }
+        if (!variantOnLine) continue;
+
+        // Extract identifier-shaped backtick-quoted tokens. These are
+        // the names the finding is about — field names, type names,
+        // function names, path expressions (A::B).
+        const codeTokens: string[] = [];
+        for (const m of line.matchAll(/`([^`]+)`/g)) {
+          const raw = m[1].trim();
+          // Filter to things that look like identifiers or paths. Skip
+          // punctuation, spaces, SQL keywords, and things that look
+          // like prose quotes.
+          if (!/^[A-Za-z_][A-Za-z0-9_:]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?$/.test(raw)) continue;
+          if (raw.length < 3) continue;
+          codeTokens.push(raw);
+        }
+        if (codeTokens.length === 0) continue;
+
+        // Canonicalize: dedupe, sort alphabetically, take top 3.
+        // Alphabetical sort gives stability across "A then B" / "B then A"
+        // variants. Top 3 keeps the key short while retaining enough
+        // signal for different bugs to separate.
+        const uniqTokens = [...new Set(codeTokens)].sort().slice(0, 3);
+        const pattern_key = `${variantOnLine}:${uniqTokens.join("-")}`;
+        if (seenKeys.has(pattern_key)) continue;
+        seenKeys.add(pattern_key);
+
+        // Example: the finding line, trimmed + truncated. Preserves
+        // just enough context that the pre-review preamble in the
+        // next iter can quote it back to the reviewer meaningfully.
+        const example = line.replace(/\s+/g, " ").trim().slice(0, 200);
+
+        bug_fingerprints_arr.push({
+          flag: { kind: variantOnLine },
+          pattern_key,
+          example,
+          occurrences: 1,
+        });
+      }
+    }
+
     // Score extraction — regex accepts decimals ("Score: 4.5/10") and
     // surrounding punctuation ("4/10 — mid"). iter 3 had 4 unparseable
     // scores because the prior regex /(\d)\s*\/\s*10/ missed decimals.
@@ -932,8 +1008,8 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
           ...semantic_flags_arr.map(f => `flag:${f.kind}`),
         ]),
         semantic_flags: semantic_flags_arr,
-        type_hints_used: [], // Phase C — pre-review enrichment from catalogd/arrow/truth
-        bug_fingerprints: [], // Phase C — fingerprint extraction from prompt responses
+        type_hints_used: [], // Phase E — pre-review enrichment from catalogd/arrow/truth
+        bug_fingerprints: bug_fingerprints_arr, // ADR-021 Phase D
         replay_count: 0,
         replays_succeeded: 0,
         retired: false,