From ed856205582faea05a86c81bcf75d6a31822e2bd Mon Sep 17 00:00:00 2001 From: root Date: Fri, 24 Apr 2026 13:22:50 -0500 Subject: [PATCH] scrum: filter table-header words from bug_fingerprint extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Iter 11 surfaced "DeadCode:Flag" in the matrix — a noisy pattern_key where "Flag" is the table column HEADER kimi produces for structured review output, not an actual Rust identifier. Kimi's standard format on recent iters: | # | Change | Flag | Confidence | | 1 | Wire AgentIdentity into.. | Boundary.. | 92% | The extractor's KEYWORDS set already filtered Rust grammar words (self, mut, async, etc) and the FLAG_VARIANTS themselves. Adding markdown-layout words (Flag, Change, Confidence, PRD, Plan) closes the last common noise class. One-line addition — empirically validated against the iter 11 vectord trace that produced DeadCode:Flag. Future iters won't reproduce that specific noise. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/real-world/scrum_master_pipeline.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts index 4239c55..3b877db 100644 --- a/tests/real-world/scrum_master_pipeline.ts +++ b/tests/real-world/scrum_master_pipeline.ts @@ -842,6 +842,11 @@ Respond with markdown. Be specific, not generic. Cite file-region + PRD-chunk-of "const", "static", "impl", "trait", "struct", "enum", "use", "mod", "crate", "super", "match", "return", "Some", "None", "Ok", "Err", "true", "false", + // Markdown table column headers kimi outputs for structured + // reviews — "Flag" / "Change" / "Confidence" are layout words, + // not identifiers. Seen as noise in iter 11 vectord extraction + // ("DeadCode:Flag" pattern_key). + "Flag", "Change", "Confidence", "PRD", "Plan", ]); const filtered = codeTokens.filter(t => !FLAG_SET.has(t) && !KEYWORDS.has(t)); if (filtered.length === 0) continue;