cleanup: remove unused HashSet import from 96b46cd + tighten applier gates

96b46cd ("first auto-applied commit") added `use tracing;` and `use std::collections::HashSet;` to queryd/service.rs under a commit message claiming to add a destructive SQL filter. HashSet was unused — cargo check passed (warnings aren't errors) but the workspace now carries a permanent `unused_imports` warning. `use tracing;` is redundant but not flagged by the compiler, leave it. This is an honest postmortem of the rationale-diff divergence problem: emitter claimed one thing, diffed another. The cargo-green gate alone can't catch that. Applier hardening in this commit addresses all three failure modes: - new-warning gate: reject patches that keep build green but add warnings (baseline → post-patch diff) - rationale-diff token alignment heuristic: reject patches whose rationale shares no vocabulary with the actual new_string - dry-run workspace revert: COMMIT=0 was silently leaving files modified between runs; now reverts after each cargo check - prompt additions: forbid unused-symbol imports; require rationale vocabulary to appear in the diff Next-iter applier runs should produce cleaner commits or none at all. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 04:25:53 -05:00 · 2026-04-24 04:25:53 -05:00 · 5e8d87bf34
commit 5e8d87bf34
parent 25ea3de836
2 changed files with 122 additions and 12 deletions
--- a/crates/queryd/src/service.rs
+++ b/crates/queryd/src/service.rs
@ -12,8 +12,6 @@ use serde::{Deserialize, Serialize};
 use crate::context::QueryEngine;
 use crate::delta;
 use crate::paged::ResultStore;
 use tracing;
 use std::collections::HashSet;
 #[derive(Clone)]
 pub struct QueryState {
--- a/tests/real-world/scrum_applier.ts
+++ b/tests/real-world/scrum_applier.ts
@ -184,9 +184,15 @@ HARD CONSTRAINTS (violations → patch rejected):
       • function signature changes
       • new modules, new traits, new dependencies
       • any change that requires editing another file to keep it compiling
       • adding a 'use' import whose symbol is NOT referenced in the same patch
         or already referenced in the surrounding file (no "import and hope")
       • rationale that describes functionality not present in the diff
         (if rationale says "destructive SQL filter", the diff must contain SQL/filter tokens)
  7. Each "new_string" MUST compile in isolation with the same surrounding code.
  8. Max 2 patches per file — quality over quantity.
-  9. If you cannot produce at least one high-confidence mechanical patch under these constraints, output {"patches": []}. Don't guess.
+  9. The "rationale" field MUST use vocabulary that appears in the new_string.
     Generic rationales like "improve code" or "add missing feature" are rejected.
  10. If you cannot produce at least one high-confidence mechanical patch under these constraints, output {"patches": []}. Don't guess.
 ─── SOURCE (${source.length} bytes) ───
 ${source.slice(0, 14000)}
@ -254,9 +260,13 @@ async function applyPatches(file: string, patches: Patch[]): Promise<{ applied:
  return { applied, rejected };
 }
-async function cargoCheck(): Promise<boolean> {
+async function cargoCheck(): Promise<{ green: boolean; warnings: number }> {
  const r = await sh(["cargo", "check", "--workspace"]);
-  return r.code === 0;
+  // Count warnings: cargo emits "warning:" lines on stderr (and sometimes stdout).
  // We count unique "warning: <msg>" opening lines, not continuation context.
  const combined = r.stdout + r.stderr;
  const warnings = (combined.match(/^warning: /gm) || []).length;
  return { green: r.code === 0, warnings };
 }
 async function gitCommit(file: string, patches: Patch[]): Promise<boolean> {
@ -274,6 +284,36 @@ async function revertFile(file: string): Promise<void> {
  await sh(["git", "checkout", "--", file]);
 }
 // Cheap, conservative check that the rationale actually describes the
 // diff. Tokenizes the new_string (what's actually being added), drops
 // Rust keywords + common filler, and requires the rationale to share
 // at least one meaningful token. Catches rationale-diff divergence like
 // "Add destructive SQL filter" for a diff that only adds `use tracing;`.
 const STOPWORDS = new Set([
  "use", "let", "pub", "mut", "fn", "self", "the", "a", "and", "or", "in",
  "to", "for", "of", "on", "at", "add", "added", "adds", "new", "is",
  "as", "if", "else", "match", "return", "crate", "mod", "struct",
  "trait", "impl", "this", "that", "with", "from", "into", "by", "be",
  "it", "its", "not", "one", "two", "line", "lines",
 ]);
 function tokenize(s: string): Set<string> {
  const out = new Set<string>();
  for (const raw of s.toLowerCase().split(/[^a-z0-9_]+/)) {
    if (raw.length >= 4 && !STOPWORDS.has(raw)) out.add(raw);
  }
  return out;
 }
 function rationaleMatchesDiff(p: Patch): boolean {
  // Only check tokens introduced by the patch (what's in new but not old).
  const newToks = tokenize(p.new_string);
  const oldToks = tokenize(p.old_string);
  const added = new Set([...newToks].filter((t) => !oldToks.has(t)));
  if (added.size === 0) return true;  // pure deletion — rationale unconstrained
  const rationaleToks = tokenize(p.rationale);
  for (const t of added) if (rationaleToks.has(t)) return true;
  return false;
 }
 async function main() {
  log(`starting · min_conf=${MIN_CONF} max_files=${MAX_FILES} model=${MODEL} commit=${COMMIT}`);
@ -300,8 +340,18 @@ async function main() {
  log(`${eligible.length} pass confidence gate + deny-list`);
  log(`taking top ${Math.min(MAX_FILES, eligible.length)} by confidence`);
  // Establish pre-run warning baseline so post-patch cargo check can
  // reject patches that keep the build green but add new warnings.
  // This gate exists because 96b46cd landed unused imports through the
  // green-only gate — compiled fine, left a permanent dead-code warning.
  log(`baseline cargo check (warning count)...`);
  let baselineWarnings = (await cargoCheck()).warnings;
  log(`baseline warnings = ${baselineWarnings}`);
  let committedFiles = 0;
  let revertedFiles = 0;
  let warningReverts = 0;
  let rationaleReverts = 0;
  for (const r of eligible.slice(0, MAX_FILES)) {
    log(`${r.file} (conf_avg=${r.confidence_avg} tier=${r.gradient_tier})`);
@ -328,7 +378,7 @@ async function main() {
    }
    log(`  running cargo check...`);
-    const green = await cargoCheck();
+    const { green, warnings } = await cargoCheck();
    if (!green) {
      log(`  ✗ build red — reverting ${r.file}`);
      await revertFile(r.file);
@ -337,12 +387,54 @@ async function main() {
      continue;
    }
-    log(`  ✓ build green`);
+    // New-warning gate. If a patch keeps the build green but adds a
-    const ok = await gitCommit(r.file, patches.slice(0, applied));
+    // warning (unused import, dead_code after removed allow, etc), treat
-    if (ok) {
+    // it the same as build red: revert. The cargo-green gate alone let
-      committedFiles++;
+    // 96b46cd land unused imports; this closes that door.
    if (warnings > baselineWarnings) {
      log(`  ✗ warnings ${baselineWarnings} → ${warnings} (+${warnings - baselineWarnings}) — reverting ${r.file}`);
      await revertFile(r.file);
      warningReverts++;
      await auditLog({
-        action: COMMIT ? "committed" : "dry_run_committed",
+        action: "warnings_increased_reverted",
        file: r.file,
        patches_applied: applied,
        warnings_before: baselineWarnings,
        warnings_after: warnings,
      });
      continue;
    }
    // Rationale-diff alignment heuristic. At least one non-stopword
    // token from the applied patches' combined new_string must appear
    // in the rationale, OR the rationale must name the file's module.
    // This catches cases like "Add destructive SQL filter" rationale
    // with a diff that only adds `use tracing`.
    const applied_patches = patches.slice(0, applied);
    const rationale_ok = applied_patches.some((p) => rationaleMatchesDiff(p));
    if (!rationale_ok) {
      log(`  ✗ rationale doesn't name anything in diff — reverting ${r.file}`);
      await revertFile(r.file);
      rationaleReverts++;
      await auditLog({
        action: "rationale_mismatch_reverted",
        file: r.file,
        patches_applied: applied,
        rationales: applied_patches.map((p) => p.rationale),
      });
      continue;
    }
    log(`  ✓ build green (warnings ${warnings}/${baselineWarnings})`);
    // Dry-run MUST revert the file after a successful check so the
    // workspace doesn't accumulate unpushed changes across dry-runs.
    // Previously this left modified files, silently polluting state.
    if (!COMMIT) {
      log(`  (dry-run) would commit ${r.file} — reverting workspace change`);
      await revertFile(r.file);
      await auditLog({
        action: "dry_run_would_commit",
        file: r.file,
        patches_applied: applied,
        patches_rejected: rejected.length,
@ -350,10 +442,30 @@ async function main() {
        gradient_tier: r.gradient_tier,
        reviewer_model: r.accepted_model,
      });
      continue;
    }
    const ok = await gitCommit(r.file, applied_patches);
    if (ok) {
      committedFiles++;
      // Advance the warning baseline so a later patch on a different
      // file is compared against the new (committed) state, not the
      // pre-run state — otherwise every subsequent patch trips the gate.
      baselineWarnings = warnings;
      await auditLog({
        action: "committed",
        file: r.file,
        patches_applied: applied,
        patches_rejected: rejected.length,
        confidence_avg: r.confidence_avg,
        gradient_tier: r.gradient_tier,
        reviewer_model: r.accepted_model,
        warnings_after: warnings,
      });
    }
  }
-  log(`DONE · committed=${committedFiles} reverted=${revertedFiles}`);
+  log(`DONE · committed=${committedFiles} build_red=${revertedFiles} warning_revert=${warningReverts} rationale_revert=${rationaleReverts}`);
 }
 await main();