diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts index f5cfff4..d61f9b2 100644 --- a/tests/real-world/scrum_master_pipeline.ts +++ b/tests/real-world/scrum_master_pipeline.ts @@ -795,26 +795,57 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of } if (!variantOnLine) continue; - // Extract identifier-shaped backtick-quoted tokens. These are - // the names the finding is about — field names, type names, - // function names, path expressions (A::B). + // Extract identifier-shaped tokens from backticks. We try two + // levels: (a) whole-backtick match if it's a clean identifier + // or path, (b) for complex content like function signatures + // (`Foo::bar(&self) -> u64`) pull out the longest identifier + // substrings so we still capture the callable. const codeTokens: string[] = []; + const idRe = /[A-Za-z_][A-Za-z0-9_]*(?:::[A-Za-z_][A-Za-z0-9_]*)*/g; for (const m of line.matchAll(/`([^`]+)`/g)) { const raw = m[1].trim(); - // Filter to things that look like identifiers or paths. Skip - // punctuation, spaces, SQL keywords, and things that look - // like prose quotes. - if (!/^[A-Za-z_][A-Za-z0-9_:]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?$/.test(raw)) continue; - if (raw.length < 3) continue; - codeTokens.push(raw); + // Whole-backtick identifier or dotted path? (`row_count`, + // `AccessControl::can_access`, `foo.bar`). + if (/^[A-Za-z_][A-Za-z0-9_:]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?$/.test(raw)) { + if (raw.length >= 3) codeTokens.push(raw); + continue; + } + // Fallback: scan for identifier substrings, take the longest + // meaningful ones (usually the function or type name comes + // first in a signature like `Foo::bar(&self)`). + const ids = [...raw.matchAll(idRe)] + .map(x => x[0]) + .filter(id => id.length >= 3); + // Prefer ::-qualified paths first (they're more specific), + // then the top-2 longest; keeps the key stable under + // signature variation. + const ranked = ids + .map(id => ({ id, score: (id.includes("::") ? 1000 : 0) + id.length })) + .sort((a, b) => b.score - a.score) + .slice(0, 2) + .map(x => x.id); + codeTokens.push(...ranked); } - if (codeTokens.length === 0) continue; + // Remove the flag variant name itself if it got captured (kimi + // and other reviewers often wrap the flag column in backticks). + // Also drop Rust + common keywords that slip through the + // identifier regex — "self", "mut", "async", "await", "pub" + // aren't bug-shape signal, they're grammar. + const FLAG_SET = new Set(FLAG_VARIANTS); + const KEYWORDS = new Set([ + "self", "Self", "mut", "async", "await", "pub", "fn", "let", + "const", "static", "impl", "trait", "struct", "enum", "use", + "mod", "crate", "super", "match", "return", "Some", "None", + "Ok", "Err", "true", "false", + ]); + const filtered = codeTokens.filter(t => !FLAG_SET.has(t) && !KEYWORDS.has(t)); + if (filtered.length === 0) continue; // Canonicalize: dedupe, sort alphabetically, take top 3. // Alphabetical sort gives stability across "A then B" / "B then A" // variants. Top 3 keeps the key short while retaining enough // signal for different bugs to separate. - const uniqTokens = [...new Set(codeTokens)].sort().slice(0, 3); + const uniqTokens = [...new Set(filtered)].sort().slice(0, 3); const pattern_key = `${variantOnLine}:${uniqTokens.join("-")}`; if (seenKeys.has(pattern_key)) continue; seenKeys.add(pattern_key);