2026-04-27 15:55:24 +00:00
1 changed files with 78 additions and 2 deletions
--- a/tests/real-world/scrum_master_pipeline.ts
+++ b/tests/real-world/scrum_master_pipeline.ts
@ -757,6 +757,82 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
    }
    const semantic_flags_arr = [...flagMatches].map(k => ({ kind: k }));
    // ADR-021 Phase D: bug_fingerprint extraction.
    //
    // Walk per-finding rows (either table format with columns
    // `Change | Flag | Confidence` OR bullet-list with inline
    // `**Flag: X.**` tag) and pair each flag with the surrounding
    // finding text. Then derive a stable pattern_key from code
    // identifiers the finding cites in backticks, so future reviews
    // of similar bugs cluster under the same key.
    //
    // v1 is heuristic (regex + identifier extraction + canonical
    // sort). It's intentionally NOT a semantic extractor — just a
    // deterministic "take the top code-shaped tokens and hash them
    // with the flag." Stability comes from sorting tokens alphabetically
    // before hashing so "row_count + QueryResponse" and "QueryResponse
    // + row_count" produce the same key.
    const bug_fingerprints_arr: Array<{
      flag: { kind: string };
      pattern_key: string;
      example: string;
      occurrences: number;
    }> = [];
    {
      // Split into candidate finding blocks. Both formats are row-
      // oriented, so a line split is a reasonable starting point.
      // Findings tend to be one-line table rows OR multi-line bullets
      // starting with **N.** — we handle both by looking at any line
      // that mentions a Flag variant and treating it as a finding.
      const lines = accepted.split(/\r?\n/);
      const seenKeys = new Set<string>();
      for (const line of lines) {
        // Find the flag variant on this line (if any).
        let variantOnLine: string | null = null;
        for (const v of FLAG_VARIANTS) {
          const re = new RegExp(`\\b${v}\\b`);
          if (re.test(line)) { variantOnLine = v; break; }
        }
        if (!variantOnLine) continue;
        // Extract identifier-shaped backtick-quoted tokens. These are
        // the names the finding is about — field names, type names,
        // function names, path expressions (A::B).
        const codeTokens: string[] = [];
        for (const m of line.matchAll(/`([^`]+)`/g)) {
          const raw = m[1].trim();
          // Filter to things that look like identifiers or paths. Skip
          // punctuation, spaces, SQL keywords, and things that look
          // like prose quotes.
          if (!/^[A-Za-z_][A-Za-z0-9_:]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?$/.test(raw)) continue;
          if (raw.length < 3) continue;
          codeTokens.push(raw);
        }
        if (codeTokens.length === 0) continue;
        // Canonicalize: dedupe, sort alphabetically, take top 3.
        // Alphabetical sort gives stability across "A then B" / "B then A"
        // variants. Top 3 keeps the key short while retaining enough
        // signal for different bugs to separate.
        const uniqTokens = [...new Set(codeTokens)].sort().slice(0, 3);
        const pattern_key = `${variantOnLine}:${uniqTokens.join("-")}`;
        if (seenKeys.has(pattern_key)) continue;
        seenKeys.add(pattern_key);
        // Example: the finding line, trimmed + truncated. Preserves
        // just enough context that the pre-review preamble in the
        // next iter can quote it back to the reviewer meaningfully.
        const example = line.replace(/\s+/g, " ").trim().slice(0, 200);
        bug_fingerprints_arr.push({
          flag: { kind: variantOnLine },
          pattern_key,
          example,
          occurrences: 1,
        });
      }
    }
    // Score extraction — regex accepts decimals ("Score: 4.5/10") and
    // surrounding punctuation ("4/10 — mid"). iter 3 had 4 unparseable
    // scores because the prior regex /(\d)\s*\/\s*10/ missed decimals.
@ -932,8 +1008,8 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of
          ...semantic_flags_arr.map(f => `flag:${f.kind}`),
        ]),
        semantic_flags: semantic_flags_arr,
-        type_hints_used: [], // Phase C — pre-review enrichment from catalogd/arrow/truth
+        type_hints_used: [], // Phase E — pre-review enrichment from catalogd/arrow/truth
-        bug_fingerprints: [], // Phase C — fingerprint extraction from prompt responses
+        bug_fingerprints: bug_fingerprints_arr, // ADR-021 Phase D
        replay_count: 0,
        replays_succeeded: 0,
        retired: false,