root d77622fc6b distillation: fix 7 grounding bugs found by Kimi audit
Kimi For Coding (api.kimi.com, kimi-for-coding) ran a forensic audit on
distillation v1.0.0 with full file content. 7/7 flags verified real on
grep. Substrate now matches what v1.0.0 claimed: deterministic, no
schema bypasses, Rust tests compile.

Fixes:
- mode.rs:1035,1042  matrix_corpus Some/None -> vec![..]/vec![]; cargo
                     check --tests now compiles (was silently broken;
                     only bun tests were running)
- scorer.ts:30       SCORER_VERSION env override removed - identical
                     input now produces identical version stamp, not
                     env-dependent drift
- transforms.ts:181  auto_apply wall-clock fallback (new Date()) ->
                     deterministic recorded_at fallback
- replay.ts:378      recorded_run_id Date.now() -> sha256(recorded_at);
                     replay rows now reproducible given recorded_at
- receipts.ts:454,495  input_hash_match hardcoded true was misleading
                       telemetry; bumped DRIFT_REPORT_SCHEMA_VERSION 1->2,
                       field is now boolean|null with honest null when
                       not computed at this layer
- score_runs.ts:89-100,159  dedup keyed only on sig_hash made
                            scorer-version bumps invisible. Composite
                            sig_hash:scorer_version forces re-scoring
- export_sft.ts:126  (ev as any).contractor bypass emitted "<contractor>"
                     placeholder for every contract_analyses SFT row.
                     Added typed EvidenceRecord.metadata bucket;
                     transforms.ts populates metadata.contractor;
                     exporter reads typed value

Verification (all green):
  cargo check -p gateway --tests   compiles
  bun test tests/distillation/     145 pass / 0 fail
  bun acceptance                   22/22 invariants
  bun audit-full                   16/16 required checks

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 05:34:31 -05:00

86 lines
3.1 KiB
TypeScript

// drift_report.ts — comparison of a current run summary vs the
// previous run summary on disk. Spec calls this "drift detection";
// concretely it answers: did the pipeline behave the same way as
// last time, and if not, was the change explained by an input change
// or did it appear out of nowhere (silent drift)?
//
// Severity:
// ok — within 20% on every metric, no hash surprises
// warn — record-count or category swing > 20%, OR new error class
// alert — output_hash differs while input_hash is identical
// (deterministic violation — same input → different output)
import {
ValidationResult, requireString, requireIsoTimestamp,
} from "./types";
import type { StageName } from "./stage_receipt";
export const DRIFT_REPORT_SCHEMA_VERSION = 2;
export const DRIFT_THRESHOLD_PCT = 0.20;
export type DriftSeverity = "ok" | "warn" | "alert";
export interface StageDrift {
stage: StageName;
delta_records_in: number; // current - prior
delta_records_out: number;
delta_accepted: number;
delta_quarantined: number;
pct_change_out: number | null; // null when prior had 0 records
// null when input_hash isn't materialized into the stage summary —
// schema v1 lied and reported `true` here. v2 is honest: callers
// that want determinism enforcement must read the full StageReceipt
// off disk and compute input_hash equality there.
input_hash_match: boolean | null;
output_hash_match: boolean;
// alert if input_hash matches but output_hash diverges
deterministic_violation: boolean;
notes: string[];
}
export interface DriftReport {
schema_version: number;
run_id: string;
prior_run_id: string | null; // null when no prior run on disk
generated_at: string;
severity: DriftSeverity;
stages: StageDrift[];
// Top-level swings the human reader should see immediately.
flags: string[];
}
export function validateDriftReport(input: unknown): ValidationResult<DriftReport> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) {
return { valid: false, errors: ["expected object"] };
}
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== DRIFT_REPORT_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${DRIFT_REPORT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.run_id, "run_id", errors) && ok;
if (r.prior_run_id !== null && typeof r.prior_run_id !== "string") {
errors.push("prior_run_id: must be string or null");
ok = false;
}
ok = requireIsoTimestamp(r.generated_at, "generated_at", errors) && ok;
if (!["ok", "warn", "alert"].includes(r.severity as string)) {
errors.push(`severity: must be ok|warn|alert, got ${JSON.stringify(r.severity)}`);
ok = false;
}
if (!Array.isArray(r.stages)) {
errors.push("stages: expected array");
ok = false;
}
if (!Array.isArray(r.flags)) {
errors.push("flags: expected array");
ok = false;
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as DriftReport };
}