cleanup: remove unused HashSet import from 96b46cd + tighten applier gates
Some checks failed
lakehouse/auditor 1 blocking issue: cloud: claim not backed — "journal event verified live (total_events_created 0→1 after probe)."
Some checks failed
lakehouse/auditor 1 blocking issue: cloud: claim not backed — "journal event verified live (total_events_created 0→1 after probe)."
96b46cd ("first auto-applied commit") added `use tracing;` and
`use std::collections::HashSet;` to queryd/service.rs under a commit
message claiming to add a destructive SQL filter. HashSet was unused —
cargo check passed (warnings aren't errors) but the workspace now
carries a permanent `unused_imports` warning. `use tracing;` is
redundant but not flagged by the compiler, leave it.
This is an honest postmortem of the rationale-diff divergence problem:
emitter claimed one thing, diffed another. The cargo-green gate alone
can't catch that.
Applier hardening in this commit addresses all three failure modes:
- new-warning gate: reject patches that keep build green but add
warnings (baseline → post-patch diff)
- rationale-diff token alignment heuristic: reject patches whose
rationale shares no vocabulary with the actual new_string
- dry-run workspace revert: COMMIT=0 was silently leaving files
modified between runs; now reverts after each cargo check
- prompt additions: forbid unused-symbol imports; require rationale
vocabulary to appear in the diff
Next-iter applier runs should produce cleaner commits or none at all.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
25ea3de836
commit
5e8d87bf34
@ -12,8 +12,6 @@ use serde::{Deserialize, Serialize};
|
|||||||
use crate::context::QueryEngine;
|
use crate::context::QueryEngine;
|
||||||
use crate::delta;
|
use crate::delta;
|
||||||
use crate::paged::ResultStore;
|
use crate::paged::ResultStore;
|
||||||
use tracing;
|
|
||||||
use std::collections::HashSet;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct QueryState {
|
pub struct QueryState {
|
||||||
|
|||||||
@ -184,9 +184,15 @@ HARD CONSTRAINTS (violations → patch rejected):
|
|||||||
• function signature changes
|
• function signature changes
|
||||||
• new modules, new traits, new dependencies
|
• new modules, new traits, new dependencies
|
||||||
• any change that requires editing another file to keep it compiling
|
• any change that requires editing another file to keep it compiling
|
||||||
|
• adding a 'use' import whose symbol is NOT referenced in the same patch
|
||||||
|
or already referenced in the surrounding file (no "import and hope")
|
||||||
|
• rationale that describes functionality not present in the diff
|
||||||
|
(if rationale says "destructive SQL filter", the diff must contain SQL/filter tokens)
|
||||||
7. Each "new_string" MUST compile in isolation with the same surrounding code.
|
7. Each "new_string" MUST compile in isolation with the same surrounding code.
|
||||||
8. Max 2 patches per file — quality over quantity.
|
8. Max 2 patches per file — quality over quantity.
|
||||||
9. If you cannot produce at least one high-confidence mechanical patch under these constraints, output {"patches": []}. Don't guess.
|
9. The "rationale" field MUST use vocabulary that appears in the new_string.
|
||||||
|
Generic rationales like "improve code" or "add missing feature" are rejected.
|
||||||
|
10. If you cannot produce at least one high-confidence mechanical patch under these constraints, output {"patches": []}. Don't guess.
|
||||||
|
|
||||||
─── SOURCE (${source.length} bytes) ───
|
─── SOURCE (${source.length} bytes) ───
|
||||||
${source.slice(0, 14000)}
|
${source.slice(0, 14000)}
|
||||||
@ -254,9 +260,13 @@ async function applyPatches(file: string, patches: Patch[]): Promise<{ applied:
|
|||||||
return { applied, rejected };
|
return { applied, rejected };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function cargoCheck(): Promise<boolean> {
|
async function cargoCheck(): Promise<{ green: boolean; warnings: number }> {
|
||||||
const r = await sh(["cargo", "check", "--workspace"]);
|
const r = await sh(["cargo", "check", "--workspace"]);
|
||||||
return r.code === 0;
|
// Count warnings: cargo emits "warning:" lines on stderr (and sometimes stdout).
|
||||||
|
// We count unique "warning: <msg>" opening lines, not continuation context.
|
||||||
|
const combined = r.stdout + r.stderr;
|
||||||
|
const warnings = (combined.match(/^warning: /gm) || []).length;
|
||||||
|
return { green: r.code === 0, warnings };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function gitCommit(file: string, patches: Patch[]): Promise<boolean> {
|
async function gitCommit(file: string, patches: Patch[]): Promise<boolean> {
|
||||||
@ -274,6 +284,36 @@ async function revertFile(file: string): Promise<void> {
|
|||||||
await sh(["git", "checkout", "--", file]);
|
await sh(["git", "checkout", "--", file]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cheap, conservative check that the rationale actually describes the
|
||||||
|
// diff. Tokenizes the new_string (what's actually being added), drops
|
||||||
|
// Rust keywords + common filler, and requires the rationale to share
|
||||||
|
// at least one meaningful token. Catches rationale-diff divergence like
|
||||||
|
// "Add destructive SQL filter" for a diff that only adds `use tracing;`.
|
||||||
|
const STOPWORDS = new Set([
|
||||||
|
"use", "let", "pub", "mut", "fn", "self", "the", "a", "and", "or", "in",
|
||||||
|
"to", "for", "of", "on", "at", "add", "added", "adds", "new", "is",
|
||||||
|
"as", "if", "else", "match", "return", "crate", "mod", "struct",
|
||||||
|
"trait", "impl", "this", "that", "with", "from", "into", "by", "be",
|
||||||
|
"it", "its", "not", "one", "two", "line", "lines",
|
||||||
|
]);
|
||||||
|
function tokenize(s: string): Set<string> {
|
||||||
|
const out = new Set<string>();
|
||||||
|
for (const raw of s.toLowerCase().split(/[^a-z0-9_]+/)) {
|
||||||
|
if (raw.length >= 4 && !STOPWORDS.has(raw)) out.add(raw);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
function rationaleMatchesDiff(p: Patch): boolean {
|
||||||
|
// Only check tokens introduced by the patch (what's in new but not old).
|
||||||
|
const newToks = tokenize(p.new_string);
|
||||||
|
const oldToks = tokenize(p.old_string);
|
||||||
|
const added = new Set([...newToks].filter((t) => !oldToks.has(t)));
|
||||||
|
if (added.size === 0) return true; // pure deletion — rationale unconstrained
|
||||||
|
const rationaleToks = tokenize(p.rationale);
|
||||||
|
for (const t of added) if (rationaleToks.has(t)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
log(`starting · min_conf=${MIN_CONF} max_files=${MAX_FILES} model=${MODEL} commit=${COMMIT}`);
|
log(`starting · min_conf=${MIN_CONF} max_files=${MAX_FILES} model=${MODEL} commit=${COMMIT}`);
|
||||||
|
|
||||||
@ -300,8 +340,18 @@ async function main() {
|
|||||||
log(`${eligible.length} pass confidence gate + deny-list`);
|
log(`${eligible.length} pass confidence gate + deny-list`);
|
||||||
log(`taking top ${Math.min(MAX_FILES, eligible.length)} by confidence`);
|
log(`taking top ${Math.min(MAX_FILES, eligible.length)} by confidence`);
|
||||||
|
|
||||||
|
// Establish pre-run warning baseline so post-patch cargo check can
|
||||||
|
// reject patches that keep the build green but add new warnings.
|
||||||
|
// This gate exists because 96b46cd landed unused imports through the
|
||||||
|
// green-only gate — compiled fine, left a permanent dead-code warning.
|
||||||
|
log(`baseline cargo check (warning count)...`);
|
||||||
|
let baselineWarnings = (await cargoCheck()).warnings;
|
||||||
|
log(`baseline warnings = ${baselineWarnings}`);
|
||||||
|
|
||||||
let committedFiles = 0;
|
let committedFiles = 0;
|
||||||
let revertedFiles = 0;
|
let revertedFiles = 0;
|
||||||
|
let warningReverts = 0;
|
||||||
|
let rationaleReverts = 0;
|
||||||
|
|
||||||
for (const r of eligible.slice(0, MAX_FILES)) {
|
for (const r of eligible.slice(0, MAX_FILES)) {
|
||||||
log(`${r.file} (conf_avg=${r.confidence_avg} tier=${r.gradient_tier})`);
|
log(`${r.file} (conf_avg=${r.confidence_avg} tier=${r.gradient_tier})`);
|
||||||
@ -328,7 +378,7 @@ async function main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
log(` running cargo check...`);
|
log(` running cargo check...`);
|
||||||
const green = await cargoCheck();
|
const { green, warnings } = await cargoCheck();
|
||||||
if (!green) {
|
if (!green) {
|
||||||
log(` ✗ build red — reverting ${r.file}`);
|
log(` ✗ build red — reverting ${r.file}`);
|
||||||
await revertFile(r.file);
|
await revertFile(r.file);
|
||||||
@ -337,12 +387,54 @@ async function main() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
log(` ✓ build green`);
|
// New-warning gate. If a patch keeps the build green but adds a
|
||||||
const ok = await gitCommit(r.file, patches.slice(0, applied));
|
// warning (unused import, dead_code after removed allow, etc), treat
|
||||||
if (ok) {
|
// it the same as build red: revert. The cargo-green gate alone let
|
||||||
committedFiles++;
|
// 96b46cd land unused imports; this closes that door.
|
||||||
|
if (warnings > baselineWarnings) {
|
||||||
|
log(` ✗ warnings ${baselineWarnings} → ${warnings} (+${warnings - baselineWarnings}) — reverting ${r.file}`);
|
||||||
|
await revertFile(r.file);
|
||||||
|
warningReverts++;
|
||||||
await auditLog({
|
await auditLog({
|
||||||
action: COMMIT ? "committed" : "dry_run_committed",
|
action: "warnings_increased_reverted",
|
||||||
|
file: r.file,
|
||||||
|
patches_applied: applied,
|
||||||
|
warnings_before: baselineWarnings,
|
||||||
|
warnings_after: warnings,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rationale-diff alignment heuristic. At least one non-stopword
|
||||||
|
// token from the applied patches' combined new_string must appear
|
||||||
|
// in the rationale, OR the rationale must name the file's module.
|
||||||
|
// This catches cases like "Add destructive SQL filter" rationale
|
||||||
|
// with a diff that only adds `use tracing`.
|
||||||
|
const applied_patches = patches.slice(0, applied);
|
||||||
|
const rationale_ok = applied_patches.some((p) => rationaleMatchesDiff(p));
|
||||||
|
if (!rationale_ok) {
|
||||||
|
log(` ✗ rationale doesn't name anything in diff — reverting ${r.file}`);
|
||||||
|
await revertFile(r.file);
|
||||||
|
rationaleReverts++;
|
||||||
|
await auditLog({
|
||||||
|
action: "rationale_mismatch_reverted",
|
||||||
|
file: r.file,
|
||||||
|
patches_applied: applied,
|
||||||
|
rationales: applied_patches.map((p) => p.rationale),
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
log(` ✓ build green (warnings ${warnings}/${baselineWarnings})`);
|
||||||
|
|
||||||
|
// Dry-run MUST revert the file after a successful check so the
|
||||||
|
// workspace doesn't accumulate unpushed changes across dry-runs.
|
||||||
|
// Previously this left modified files, silently polluting state.
|
||||||
|
if (!COMMIT) {
|
||||||
|
log(` (dry-run) would commit ${r.file} — reverting workspace change`);
|
||||||
|
await revertFile(r.file);
|
||||||
|
await auditLog({
|
||||||
|
action: "dry_run_would_commit",
|
||||||
file: r.file,
|
file: r.file,
|
||||||
patches_applied: applied,
|
patches_applied: applied,
|
||||||
patches_rejected: rejected.length,
|
patches_rejected: rejected.length,
|
||||||
@ -350,10 +442,30 @@ async function main() {
|
|||||||
gradient_tier: r.gradient_tier,
|
gradient_tier: r.gradient_tier,
|
||||||
reviewer_model: r.accepted_model,
|
reviewer_model: r.accepted_model,
|
||||||
});
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ok = await gitCommit(r.file, applied_patches);
|
||||||
|
if (ok) {
|
||||||
|
committedFiles++;
|
||||||
|
// Advance the warning baseline so a later patch on a different
|
||||||
|
// file is compared against the new (committed) state, not the
|
||||||
|
// pre-run state — otherwise every subsequent patch trips the gate.
|
||||||
|
baselineWarnings = warnings;
|
||||||
|
await auditLog({
|
||||||
|
action: "committed",
|
||||||
|
file: r.file,
|
||||||
|
patches_applied: applied,
|
||||||
|
patches_rejected: rejected.length,
|
||||||
|
confidence_avg: r.confidence_avg,
|
||||||
|
gradient_tier: r.gradient_tier,
|
||||||
|
reviewer_model: r.accepted_model,
|
||||||
|
warnings_after: warnings,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log(`DONE · committed=${committedFiles} reverted=${revertedFiles}`);
|
log(`DONE · committed=${committedFiles} build_red=${revertedFiles} warning_revert=${warningReverts} rationale_revert=${rationaleReverts}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
await main();
|
await main();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user