matrix-agent-validated/auditor/types.ts

// Shared types for the claim-auditor. Every field exists for a reason;
// if something can't be verified from a check, it goes into `evidence`
// so the verdict is inspectable, not a black box.

export type CheckKind = "static" | "dynamic" | "inference" | "kb_query";

export type Severity = "info" | "warn" | "block";

export interface Claim {
  // Verbatim phrase that raised the claim — e.g. "Phase 38 shipped",
  // "verified end-to-end", "works after restart". Used as the "what
  // does the author assert" input to downstream checks.
  text: string;
  // Where it came from. `commit_sha` is the short hash; `location`
  // is a file:line for in-diff claims, or "pr_body" / "commit_message".
  commit_sha: string;
  location: string;
  // Heuristic rating of how strong the claim is. "green+tested"
  // is strong; "should work" is weak. Drives sensitivity — stronger
  // claims get harder-blocked on weak evidence.
  //
  // "empirical" is a separate class: runtime measurements like
  // "N cloud calls" / "306s end-to-end" / "accepted on attempt N".
  // These cannot be verified from a static diff — only from the test
  // output that produced them. Inference skips diff-verification for
  // empirical claims; they become info-level context unless a future
  // runtime_evidence check contradicts them.
  strength: "weak" | "moderate" | "strong" | "empirical";
}

export interface Finding {
  check: CheckKind;
  severity: Severity;
  claim_text?: string;
  // Free-form short description: "field added but never read", "no
  // test covers this code path", "cloud model says placeholder".
  summary: string;
  // Concrete evidence: file paths, line numbers, log excerpts, test
  // output, cloud-model verdict. No handwaving.
  evidence: string[];
}

export interface Verdict {
  pr_number: number;
  head_sha: string;
  audited_at: string;
  overall: "approve" | "request_changes" | "block";
  findings: Finding[];
  // Real numbers that downstream policy can gate on. e.g. if the
  // hybrid test produced latency numbers or token counts, they
  // surface here so /auditor/history is queryable.
  metrics: Record<string, number>;
  // Short one-line justification for the `overall` verdict. What
  // gets posted as the commit-status description in Gitea (max 140
  // chars) must fit here.
  one_liner: string;
}

export interface PrSnapshot {
  number: number;
  head_sha: string;
  base_sha: string;
  title: string;
  body: string;
  state: "open" | "closed" | "merged";
  author: string;
  // Array of commit messages in the PR (not diffs — those are
  // fetched on-demand per-check).
  commits: Array<{ sha: string; message: string; author: string }>;
  // File paths touched by the PR, with lines-added / lines-removed.
  files: Array<{ path: string; additions: number; deletions: number }>;
}