diff --git a/crates/queryd/src/service.rs b/crates/queryd/src/service.rs index ae3c3eb..7f0028c 100644 --- a/crates/queryd/src/service.rs +++ b/crates/queryd/src/service.rs @@ -12,8 +12,6 @@ use serde::{Deserialize, Serialize}; use crate::context::QueryEngine; use crate::delta; use crate::paged::ResultStore; -use tracing; -use std::collections::HashSet; #[derive(Clone)] pub struct QueryState { diff --git a/tests/real-world/scrum_applier.ts b/tests/real-world/scrum_applier.ts index 9df3c6d..0c048af 100644 --- a/tests/real-world/scrum_applier.ts +++ b/tests/real-world/scrum_applier.ts @@ -184,9 +184,15 @@ HARD CONSTRAINTS (violations → patch rejected): • function signature changes • new modules, new traits, new dependencies • any change that requires editing another file to keep it compiling + • adding a 'use' import whose symbol is NOT referenced in the same patch + or already referenced in the surrounding file (no "import and hope") + • rationale that describes functionality not present in the diff + (if rationale says "destructive SQL filter", the diff must contain SQL/filter tokens) 7. Each "new_string" MUST compile in isolation with the same surrounding code. 8. Max 2 patches per file — quality over quantity. - 9. If you cannot produce at least one high-confidence mechanical patch under these constraints, output {"patches": []}. Don't guess. + 9. The "rationale" field MUST use vocabulary that appears in the new_string. + Generic rationales like "improve code" or "add missing feature" are rejected. + 10. If you cannot produce at least one high-confidence mechanical patch under these constraints, output {"patches": []}. Don't guess. ─── SOURCE (${source.length} bytes) ─── ${source.slice(0, 14000)} @@ -254,9 +260,13 @@ async function applyPatches(file: string, patches: Patch[]): Promise<{ applied: return { applied, rejected }; } -async function cargoCheck(): Promise { +async function cargoCheck(): Promise<{ green: boolean; warnings: number }> { const r = await sh(["cargo", "check", "--workspace"]); - return r.code === 0; + // Count warnings: cargo emits "warning:" lines on stderr (and sometimes stdout). + // We count unique "warning: " opening lines, not continuation context. + const combined = r.stdout + r.stderr; + const warnings = (combined.match(/^warning: /gm) || []).length; + return { green: r.code === 0, warnings }; } async function gitCommit(file: string, patches: Patch[]): Promise { @@ -274,6 +284,36 @@ async function revertFile(file: string): Promise { await sh(["git", "checkout", "--", file]); } +// Cheap, conservative check that the rationale actually describes the +// diff. Tokenizes the new_string (what's actually being added), drops +// Rust keywords + common filler, and requires the rationale to share +// at least one meaningful token. Catches rationale-diff divergence like +// "Add destructive SQL filter" for a diff that only adds `use tracing;`. +const STOPWORDS = new Set([ + "use", "let", "pub", "mut", "fn", "self", "the", "a", "and", "or", "in", + "to", "for", "of", "on", "at", "add", "added", "adds", "new", "is", + "as", "if", "else", "match", "return", "crate", "mod", "struct", + "trait", "impl", "this", "that", "with", "from", "into", "by", "be", + "it", "its", "not", "one", "two", "line", "lines", +]); +function tokenize(s: string): Set { + const out = new Set(); + for (const raw of s.toLowerCase().split(/[^a-z0-9_]+/)) { + if (raw.length >= 4 && !STOPWORDS.has(raw)) out.add(raw); + } + return out; +} +function rationaleMatchesDiff(p: Patch): boolean { + // Only check tokens introduced by the patch (what's in new but not old). + const newToks = tokenize(p.new_string); + const oldToks = tokenize(p.old_string); + const added = new Set([...newToks].filter((t) => !oldToks.has(t))); + if (added.size === 0) return true; // pure deletion — rationale unconstrained + const rationaleToks = tokenize(p.rationale); + for (const t of added) if (rationaleToks.has(t)) return true; + return false; +} + async function main() { log(`starting · min_conf=${MIN_CONF} max_files=${MAX_FILES} model=${MODEL} commit=${COMMIT}`); @@ -300,8 +340,18 @@ async function main() { log(`${eligible.length} pass confidence gate + deny-list`); log(`taking top ${Math.min(MAX_FILES, eligible.length)} by confidence`); + // Establish pre-run warning baseline so post-patch cargo check can + // reject patches that keep the build green but add new warnings. + // This gate exists because 96b46cd landed unused imports through the + // green-only gate — compiled fine, left a permanent dead-code warning. + log(`baseline cargo check (warning count)...`); + let baselineWarnings = (await cargoCheck()).warnings; + log(`baseline warnings = ${baselineWarnings}`); + let committedFiles = 0; let revertedFiles = 0; + let warningReverts = 0; + let rationaleReverts = 0; for (const r of eligible.slice(0, MAX_FILES)) { log(`${r.file} (conf_avg=${r.confidence_avg} tier=${r.gradient_tier})`); @@ -328,7 +378,7 @@ async function main() { } log(` running cargo check...`); - const green = await cargoCheck(); + const { green, warnings } = await cargoCheck(); if (!green) { log(` ✗ build red — reverting ${r.file}`); await revertFile(r.file); @@ -337,12 +387,54 @@ async function main() { continue; } - log(` ✓ build green`); - const ok = await gitCommit(r.file, patches.slice(0, applied)); - if (ok) { - committedFiles++; + // New-warning gate. If a patch keeps the build green but adds a + // warning (unused import, dead_code after removed allow, etc), treat + // it the same as build red: revert. The cargo-green gate alone let + // 96b46cd land unused imports; this closes that door. + if (warnings > baselineWarnings) { + log(` ✗ warnings ${baselineWarnings} → ${warnings} (+${warnings - baselineWarnings}) — reverting ${r.file}`); + await revertFile(r.file); + warningReverts++; await auditLog({ - action: COMMIT ? "committed" : "dry_run_committed", + action: "warnings_increased_reverted", + file: r.file, + patches_applied: applied, + warnings_before: baselineWarnings, + warnings_after: warnings, + }); + continue; + } + + // Rationale-diff alignment heuristic. At least one non-stopword + // token from the applied patches' combined new_string must appear + // in the rationale, OR the rationale must name the file's module. + // This catches cases like "Add destructive SQL filter" rationale + // with a diff that only adds `use tracing`. + const applied_patches = patches.slice(0, applied); + const rationale_ok = applied_patches.some((p) => rationaleMatchesDiff(p)); + if (!rationale_ok) { + log(` ✗ rationale doesn't name anything in diff — reverting ${r.file}`); + await revertFile(r.file); + rationaleReverts++; + await auditLog({ + action: "rationale_mismatch_reverted", + file: r.file, + patches_applied: applied, + rationales: applied_patches.map((p) => p.rationale), + }); + continue; + } + + log(` ✓ build green (warnings ${warnings}/${baselineWarnings})`); + + // Dry-run MUST revert the file after a successful check so the + // workspace doesn't accumulate unpushed changes across dry-runs. + // Previously this left modified files, silently polluting state. + if (!COMMIT) { + log(` (dry-run) would commit ${r.file} — reverting workspace change`); + await revertFile(r.file); + await auditLog({ + action: "dry_run_would_commit", file: r.file, patches_applied: applied, patches_rejected: rejected.length, @@ -350,10 +442,30 @@ async function main() { gradient_tier: r.gradient_tier, reviewer_model: r.accepted_model, }); + continue; + } + + const ok = await gitCommit(r.file, applied_patches); + if (ok) { + committedFiles++; + // Advance the warning baseline so a later patch on a different + // file is compared against the new (committed) state, not the + // pre-run state — otherwise every subsequent patch trips the gate. + baselineWarnings = warnings; + await auditLog({ + action: "committed", + file: r.file, + patches_applied: applied, + patches_rejected: rejected.length, + confidence_avg: r.confidence_avg, + gradient_tier: r.gradient_tier, + reviewer_model: r.accepted_model, + warnings_after: warnings, + }); } } - log(`DONE · committed=${committedFiles} reverted=${revertedFiles}`); + log(`DONE · committed=${committedFiles} build_red=${revertedFiles} warning_revert=${warningReverts} rationale_revert=${rationaleReverts}`); } await main();