root d77622fc6b distillation: fix 7 grounding bugs found by Kimi audit
Kimi For Coding (api.kimi.com, kimi-for-coding) ran a forensic audit on
distillation v1.0.0 with full file content. 7/7 flags verified real on
grep. Substrate now matches what v1.0.0 claimed: deterministic, no
schema bypasses, Rust tests compile.

Fixes:
- mode.rs:1035,1042  matrix_corpus Some/None -> vec![..]/vec![]; cargo
                     check --tests now compiles (was silently broken;
                     only bun tests were running)
- scorer.ts:30       SCORER_VERSION env override removed - identical
                     input now produces identical version stamp, not
                     env-dependent drift
- transforms.ts:181  auto_apply wall-clock fallback (new Date()) ->
                     deterministic recorded_at fallback
- replay.ts:378      recorded_run_id Date.now() -> sha256(recorded_at);
                     replay rows now reproducible given recorded_at
- receipts.ts:454,495  input_hash_match hardcoded true was misleading
                       telemetry; bumped DRIFT_REPORT_SCHEMA_VERSION 1->2,
                       field is now boolean|null with honest null when
                       not computed at this layer
- score_runs.ts:89-100,159  dedup keyed only on sig_hash made
                            scorer-version bumps invisible. Composite
                            sig_hash:scorer_version forces re-scoring
- export_sft.ts:126  (ev as any).contractor bypass emitted "<contractor>"
                     placeholder for every contract_analyses SFT row.
                     Added typed EvidenceRecord.metadata bucket;
                     transforms.ts populates metadata.contractor;
                     exporter reads typed value

Verification (all green):
  cargo check -p gateway --tests   compiles
  bun test tests/distillation/     145 pass / 0 fail
  bun acceptance                   22/22 invariants
  bun audit-full                   16/16 required checks

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 05:34:31 -05:00

308 lines
13 KiB
TypeScript

// scorer.ts — pure deterministic Success Scorer.
//
// Takes one EvidenceRecord, returns category + reasons + sub_scores.
// NO I/O, NO LLM, NO clock reads, NO mutable state. The only randomness
// allowed is none. Identical input → identical output forever.
//
// Three-class strategy (see docs/recon/local-distillation-recon.md +
// data/_kb/evidence_health.md for the source taxonomy):
//
// CLASS A — verdict-bearing
// scrum_reviews, observer_reviews, audits, contract_analyses
// Direct scoring from existing markers/observer_verdict
//
// CLASS B — telemetry-rich
// auto_apply, outcomes, mode_experiments
// Markers exist but partial; needs_human_review fills the gap
//
// CLASS C — pure-extraction (no native scoring signal)
// distilled_*, audit_facts, observer_escalations
// Default needs_human_review; v2 will JOIN to parent verdict
//
// scorer_version is stamped on every output. Bumping it lets a
// downstream re-scoring detect drift between historical runs.
import type { EvidenceRecord } from "../../auditor/schemas/distillation/evidence_record";
import type { ScoreCategory, ScoredRun } from "../../auditor/schemas/distillation/scored_run";
import { SCORED_RUN_SCHEMA_VERSION } from "../../auditor/schemas/distillation/scored_run";
import { canonicalSha256 } from "../../auditor/schemas/distillation/types";
// Hardcoded — the deterministic-output contract requires this. Bump the
// literal in the same commit as any scoring-rule change so the version
// stamp moves atomically with logic. Env override removed 2026-04-27
// after Kimi audit flagged identical-input-different-version drift.
export const SCORER_VERSION = "v1.0.0";
export interface ScoreOutput {
category: ScoreCategory;
reasons: string[];
sub_scores: ScoredRun["sub_scores"];
}
// Map source_file (from provenance) → source class. Centralized so
// adding a new source is one-line.
type SourceClass = "verdict" | "telemetry" | "extraction";
function sourceClassFor(source_file: string): SourceClass {
// Strip data/_kb/ prefix and .jsonl suffix to compare by stem
const stem = source_file.replace(/^data\/_kb\//, "").replace(/\.jsonl$/, "");
switch (stem) {
case "scrum_reviews":
case "observer_reviews":
case "audits":
case "contract_analyses":
return "verdict";
case "auto_apply":
case "outcomes":
case "mode_experiments":
return "telemetry";
case "distilled_facts":
case "distilled_procedures":
case "distilled_config_hints":
case "audit_facts":
case "observer_escalations":
return "extraction";
default:
// Unknown source — route to extraction (most conservative —
// forces needs_human_review until a transform is added).
return "extraction";
}
}
// ─── Class A: verdict-bearing ─────────────────────────────────────
function scoreScrumReview(r: EvidenceRecord): ScoreOutput {
const reasons: string[] = [];
const subs: ScoredRun["sub_scores"] = {};
const successMarker = (r.success_markers ?? []).find(m => m.startsWith("accepted_on_attempt_"));
if (!successMarker) {
reasons.push("scrum_review missing accepted_on_attempt_* success marker");
return { category: "needs_human_review", reasons, sub_scores: subs };
}
const attempt = Number(successMarker.replace("accepted_on_attempt_", ""));
subs.accepted_on_attempt = attempt;
if (attempt === 1) {
reasons.push("scrum: accepted on first attempt");
return { category: "accepted", reasons, sub_scores: subs };
}
if (attempt <= 3) {
reasons.push(`scrum: accepted after ${attempt} attempts`);
return { category: "partially_accepted", reasons, sub_scores: subs };
}
reasons.push(`scrum: accepted only after ${attempt} attempts (high-cost path)`);
return { category: "partially_accepted", reasons, sub_scores: subs };
}
function scoreObserverReview(r: EvidenceRecord): ScoreOutput {
const reasons: string[] = [];
const subs: ScoredRun["sub_scores"] = {};
const v = r.observer_verdict;
if (v === "accept") {
subs.observer_verdict = "accept";
reasons.push("observer accepted the reviewed attempt");
return { category: "accepted", reasons, sub_scores: subs };
}
if (v === "reject") {
subs.observer_verdict = "reject";
reasons.push("observer rejected the reviewed attempt");
return { category: "rejected", reasons, sub_scores: subs };
}
if (v === "cycle") {
subs.observer_verdict = "cycle";
reasons.push("observer flagged the attempt as cycling — partial signal");
return { category: "partially_accepted", reasons, sub_scores: subs };
}
reasons.push(`observer_verdict missing or unrecognized: ${JSON.stringify(v ?? null)}`);
return { category: "needs_human_review", reasons, sub_scores: subs };
}
function scoreAudit(r: EvidenceRecord): ScoreOutput {
// audits.jsonl is the auditor's per-finding stream (not PR verdicts).
// Phase 2 transform encodes severity into markers:
// audit_severity_{info,low} → accepted (minor finding)
// audit_severity_medium → partially_accepted
// audit_severity_{high,critical} → rejected (real problem)
// Older "approved"/"blocked"/"request_changes" markers also handled
// for back-compat with any pre-fix materializations on disk.
const reasons: string[] = [];
const subs: ScoredRun["sub_scores"] = {};
const succ = r.success_markers ?? [];
const fail = r.failure_markers ?? [];
if (succ.includes("approved")) {
reasons.push("audit overall=approved (legacy marker)");
return { category: "accepted", reasons, sub_scores: subs };
}
if (fail.includes("blocked")) {
reasons.push("audit overall=block (legacy marker)");
return { category: "rejected", reasons, sub_scores: subs };
}
if (fail.includes("request_changes")) {
reasons.push("audit overall=request_changes (legacy marker)");
return { category: "partially_accepted", reasons, sub_scores: subs };
}
// Severity-derived markers (current Phase 2 transform):
const sevSucc = succ.find(m => m.startsWith("audit_severity_"));
const sevFail = fail.find(m => m.startsWith("audit_severity_"));
if (sevSucc) {
reasons.push(`${sevSucc} → minor finding`);
return { category: "accepted", reasons, sub_scores: subs };
}
if (sevFail === "audit_severity_medium") {
reasons.push("audit_severity_medium → finding warrants review");
return { category: "partially_accepted", reasons, sub_scores: subs };
}
if (sevFail === "audit_severity_high" || sevFail === "audit_severity_critical") {
reasons.push(`${sevFail} → blocking finding`);
return { category: "rejected", reasons, sub_scores: subs };
}
reasons.push("audit row has no severity or overall marker");
return { category: "needs_human_review", reasons, sub_scores: subs };
}
function scoreContractAnalysis(r: EvidenceRecord): ScoreOutput {
const reasons: string[] = [];
const subs: ScoredRun["sub_scores"] = {};
const v = r.observer_verdict;
// failure_markers takes precedence: explicit rejection beats absent verdict
if ((r.failure_markers ?? []).includes("observer_rejected") || v === "reject") {
subs.observer_verdict = "reject";
reasons.push("contract analysis: observer rejected");
return { category: "rejected", reasons, sub_scores: subs };
}
if (v === "accept") {
subs.observer_verdict = "accept";
reasons.push("contract analysis: observer accepted");
return { category: "accepted", reasons, sub_scores: subs };
}
if (v === "cycle") {
subs.observer_verdict = "cycle";
reasons.push("contract analysis: observer cycled (partial)");
return { category: "partially_accepted", reasons, sub_scores: subs };
}
reasons.push("contract analysis: no observer verdict signal");
return { category: "needs_human_review", reasons, sub_scores: subs };
}
// ─── Class B: telemetry-rich ──────────────────────────────────────
function scoreAutoApply(r: EvidenceRecord): ScoreOutput {
const reasons: string[] = [];
const subs: ScoredRun["sub_scores"] = {};
if ((r.success_markers ?? []).includes("committed")) {
subs.cargo_green = true;
reasons.push("auto_apply: patch committed (cargo green + warning baseline + rationale alignment passed)");
return { category: "accepted", reasons, sub_scores: subs };
}
const failures = (r.failure_markers ?? []);
const reverted = failures.find(f => f.includes("reverted"));
if (reverted) {
if (reverted.includes("build_red")) subs.cargo_green = false;
reasons.push(`auto_apply: ${reverted}`);
return { category: "rejected", reasons, sub_scores: subs };
}
// no_patches / dry_run / all_rejected — not a failure of code, but no commit either
reasons.push("auto_apply: no commit + no revert (likely no_patches or dry_run)");
return { category: "needs_human_review", reasons, sub_scores: subs };
}
function scoreOutcomes(r: EvidenceRecord): ScoreOutput {
const reasons: string[] = [];
const subs: ScoredRun["sub_scores"] = {};
if ((r.success_markers ?? []).includes("all_events_ok")) {
reasons.push("outcomes: all events ok");
return { category: "accepted", reasons, sub_scores: subs };
}
// Validation results may carry partial signal
const gap = r.validation_results?.gap_signals as number | undefined;
if (typeof gap === "number" && gap > 0) {
reasons.push(`outcomes: ${gap} gap signal(s) detected`);
return { category: "partially_accepted", reasons, sub_scores: subs };
}
reasons.push("outcomes: no decisive marker — defer to human");
return { category: "needs_human_review", reasons, sub_scores: subs };
}
function scoreModeExperiment(r: EvidenceRecord): ScoreOutput {
const reasons: string[] = [];
const subs: ScoredRun["sub_scores"] = {};
// mode_experiments at Phase 2 lacks markers (transform doesn't derive
// them yet). v1 derivation: a non-empty response with reasonable
// latency is at least partially_accepted; otherwise needs_human_review.
// Anything stronger needs the grounding-from-mode_compare hook in
// Phase 4 / re-scoring.
if (typeof r.text !== "string" || r.text.trim().length === 0) {
reasons.push("mode_experiment: empty response text");
return { category: "rejected", reasons, sub_scores: subs };
}
if (typeof r.latency_ms === "number" && r.latency_ms > 120_000) {
reasons.push(`mode_experiment: latency ${r.latency_ms}ms exceeds 2-minute soft cap`);
return { category: "partially_accepted", reasons, sub_scores: subs };
}
reasons.push("mode_experiment: response present, latency within bounds; verdict not yet wired");
return { category: "needs_human_review", reasons, sub_scores: subs };
}
// ─── Class C: pure-extraction ────────────────────────────────────
function scoreExtraction(r: EvidenceRecord): ScoreOutput {
// Phase 3 v1: extraction-class records have no native scoring
// signal. Default to needs_human_review with an explicit reason.
// Phase 3 v2 will JOIN to a parent verdict-bearing record.
const reasons = ["extraction-class source has no native scoring signal — JOIN to parent verdict pending Phase 3 v2"];
return { category: "needs_human_review", reasons, sub_scores: {} };
}
// ─── Dispatch ─────────────────────────────────────────────────────
export function scoreRecord(record: EvidenceRecord): ScoreOutput {
const cls = sourceClassFor(record.provenance.source_file);
const stem = record.provenance.source_file.replace(/^data\/_kb\//, "").replace(/\.jsonl$/, "");
if (cls === "verdict") {
if (stem === "scrum_reviews") return scoreScrumReview(record);
if (stem === "observer_reviews") return scoreObserverReview(record);
if (stem === "audits") return scoreAudit(record);
if (stem === "contract_analyses") return scoreContractAnalysis(record);
}
if (cls === "telemetry") {
if (stem === "auto_apply") return scoreAutoApply(record);
if (stem === "outcomes") return scoreOutcomes(record);
if (stem === "mode_experiments") return scoreModeExperiment(record);
}
return scoreExtraction(record);
}
// Build a complete ScoredRun. Caller supplies recorded_at + the
// source file / line offset to populate provenance.
export async function buildScoredRun(
record: EvidenceRecord,
source_file_relpath: string,
line_offset: number,
recorded_at: string,
): Promise<ScoredRun> {
const out = scoreRecord(record);
// Compute provenance.sig_hash over the EvidenceRecord (not raw source);
// ScoredRun traces to the materialized evidence row, not the raw stream.
const sig_hash = await canonicalSha256(record);
return {
schema_version: SCORED_RUN_SCHEMA_VERSION,
evidence_run_id: record.run_id,
evidence_task_id: record.task_id,
category: out.category,
reasons: out.reasons,
scored_at: recorded_at,
scorer_version: SCORER_VERSION,
sub_scores: out.sub_scores,
provenance: {
source_file: source_file_relpath,
line_offset,
sig_hash,
recorded_at,
},
};
}