Compare commits
8 Commits
d11632a6fa
...
00c8408335
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00c8408335 | ||
|
|
8aa7ee974f | ||
|
|
bc698eb6da | ||
|
|
ff5de76241 | ||
|
|
3eaac413e6 | ||
|
|
8d02c7f441 | ||
|
|
643dd2d520 | ||
|
|
d77622fc6b |
@ -23,6 +23,7 @@ import { runStaticCheck } from "./checks/static.ts";
|
||||
import { runDynamicCheck } from "./checks/dynamic.ts";
|
||||
import { runInferenceCheck } from "./checks/inference.ts";
|
||||
import { runKbCheck } from "./checks/kb_query.ts";
|
||||
import { runKimiArchitectCheck } from "./checks/kimi_architect.ts";
|
||||
|
||||
const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts";
|
||||
// Playbook for audit findings — one row per block/warn finding from a
|
||||
@ -67,6 +68,29 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<
|
||||
...kbFindings,
|
||||
];
|
||||
|
||||
// Kimi-architect second-pass review. Off by default; enabled with
|
||||
// LH_AUDITOR_KIMI=1. Sequential (not in the parallel block above)
|
||||
// because it consumes the prior findings as context — Kimi sees what
|
||||
// deepseek already flagged and is asked "what did everyone miss?"
|
||||
// Failure-isolated by design: any error returns a single info-level
|
||||
// skip finding so the existing audit pipeline never blocks on Kimi.
|
||||
if (process.env.LH_AUDITOR_KIMI === "1") {
|
||||
try {
|
||||
const kimiFindings = await runKimiArchitectCheck(diff, allFindings, {
|
||||
pr_number: pr.number,
|
||||
head_sha: pr.head_sha,
|
||||
});
|
||||
allFindings.push(...kimiFindings);
|
||||
} catch (e) {
|
||||
allFindings.push({
|
||||
check: "kimi_architect",
|
||||
severity: "info",
|
||||
summary: `kimi_architect outer error — ${(e as Error).message.slice(0, 160)}`,
|
||||
evidence: [(e as Error).stack?.slice(0, 360) ?? ""],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const duration_ms = Date.now() - t0;
|
||||
const metrics = {
|
||||
audit_duration_ms: duration_ms,
|
||||
@ -184,7 +208,7 @@ function formatReviewBody(v: Verdict): string {
|
||||
lines.push("");
|
||||
|
||||
// Per-check sections, only if the check produced findings.
|
||||
const checkOrder = ["static", "dynamic", "inference", "kb_query"] as const;
|
||||
const checkOrder = ["static", "dynamic", "inference", "kb_query", "kimi_architect"] as const;
|
||||
for (const check of checkOrder) {
|
||||
const fs = byCheck[check] ?? [];
|
||||
if (fs.length === 0) continue;
|
||||
@ -217,6 +241,6 @@ function formatReviewBody(v: Verdict): string {
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
function stubFinding(check: "dynamic" | "inference", why: string): Finding[] {
|
||||
function stubFinding(check: "dynamic" | "inference" | "kimi_architect", why: string): Finding[] {
|
||||
return [{ check, severity: "info", summary: `${check} check skipped — ${why}`, evidence: [why] }];
|
||||
}
|
||||
|
||||
353
auditor/checks/kimi_architect.ts
Normal file
353
auditor/checks/kimi_architect.ts
Normal file
@ -0,0 +1,353 @@
|
||||
// Kimi-architect check — second-pass senior architectural review using
|
||||
// kimi-for-coding (Kimi K2.6) via /v1/chat provider=kimi.
|
||||
//
|
||||
// Runs AFTER the deepseek inference check (N=3 consensus) and the
|
||||
// static/kb_query checks. Reads their findings as context and asks Kimi
|
||||
// "what did everyone else miss?" — complementing the cheap-consensus
|
||||
// voting with a sparse senior pass that catches load-bearing issues
|
||||
// (compile errors, false telemetry, schema bypasses, etc.) which the
|
||||
// voting structure can't see.
|
||||
//
|
||||
// Why Kimi here and not in the inner inference loop:
|
||||
// - Cost: ~3min wall-clock per call vs ~30s for deepseek consensus.
|
||||
// - TOS: api.kimi.com is User-Agent-gated (see crates/gateway/src/v1/
|
||||
// kimi.rs); cost-bounded calls only.
|
||||
// - Value: experiment 2026-04-27 showed 7/7 grounding rate with full
|
||||
// files vs ~50% on truncated input. Best as a sparse complement, not
|
||||
// a replacement.
|
||||
//
|
||||
// Failure-isolated: any Kimi error returns a single info-level Finding
|
||||
// "kimi_architect skipped — <reason>" so the existing audit pipeline
|
||||
// is never blocked by a Kimi outage / TOS revocation / 429.
|
||||
//
|
||||
// Cost cap: if a kimi_verdicts/<pr>-<sha>.json file exists less than 24h
|
||||
// old, return cached findings without calling upstream. New commits
|
||||
// produce new SHAs so this is per-head, not per-day.
|
||||
//
|
||||
// Off by default: caller checks LH_AUDITOR_KIMI=1 before invoking.
|
||||
|
||||
import { readFile, writeFile, mkdir, appendFile, stat } from "node:fs/promises";
|
||||
import { existsSync } from "node:fs";
|
||||
import { join, resolve } from "node:path";
|
||||
import type { Finding, CheckKind } from "../types.ts";
|
||||
|
||||
const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
|
||||
const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
|
||||
const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl";
|
||||
const REPO_ROOT = "/home/profit/lakehouse";
|
||||
// 15 min budget. Bun's fetch has an intrinsic ~300s limit that our
|
||||
// AbortController + setTimeout combo could not override; we use curl
|
||||
// via Bun.spawn instead (callKimi below). Curl honors -m for max
|
||||
// transfer time without a hard intrinsic ceiling.
|
||||
const CALL_TIMEOUT_MS = 900_000;
|
||||
const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
|
||||
const MAX_DIFF_CHARS = 180_000;
|
||||
const MAX_PRIOR_FINDINGS = 50;
|
||||
// Default provider/model = ollama_cloud/kimi-k2.6. Pre-2026-04-27 we
|
||||
// went direct to api.kimi.com, but Ollama Cloud Pro now exposes the
|
||||
// same model legitimately, so we route there to avoid User-Agent
|
||||
// gating. The api.kimi.com path (provider=kimi) remains wired in the
|
||||
// gateway as a fallback for when Ollama Cloud is upstream-broken.
|
||||
const KIMI_PROVIDER = process.env.LH_AUDITOR_KIMI_PROVIDER ?? "ollama_cloud";
|
||||
const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-k2.6";
|
||||
// Big-diff promotion: when the diff exceeds OPUS_THRESHOLD_CHARS, swap
|
||||
// to OPUS_MODEL for that audit. 2026-04-27 3-way bake-off (Kimi vs
|
||||
// Haiku vs Opus on a 32K diff) showed Opus is the only model that
|
||||
// catches cross-file ramifications + escalates `block` severity on
|
||||
// real architectural risks. ~5x the spend per audit, only worth it
|
||||
// when the diff is big enough to have those risks.
|
||||
//
|
||||
// Defaults: Haiku for normal diffs (fast, cheap, ~$0.02), Opus for
|
||||
// > 100k chars. Disable promotion: set OPUS_THRESHOLD_CHARS very high.
|
||||
const OPUS_MODEL = process.env.LH_AUDITOR_KIMI_OPUS_MODEL ?? "claude-opus-4-7";
|
||||
const OPUS_PROVIDER = process.env.LH_AUDITOR_KIMI_OPUS_PROVIDER ?? "opencode";
|
||||
const OPUS_THRESHOLD_CHARS = Number(process.env.LH_AUDITOR_KIMI_OPUS_THRESHOLD_CHARS) || 100_000;
|
||||
|
||||
function selectModel(diffLen: number): { provider: string; model: string; promoted: boolean } {
|
||||
if (diffLen > OPUS_THRESHOLD_CHARS) {
|
||||
return { provider: OPUS_PROVIDER, model: OPUS_MODEL, promoted: true };
|
||||
}
|
||||
return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false };
|
||||
}
|
||||
// 128K — Kimi K2.6 emits reasoning_content that counts against this
|
||||
// budget but isn't surfaced in the OpenAI-shape `content` field.
|
||||
// Capping low silently produces empty content with finish_reason=length
|
||||
// when reasoning consumes the budget. 128K leaves ample room for both
|
||||
// reasoning and visible findings on any audit prompt we throw at it.
|
||||
// Override via LH_AUDITOR_KIMI_MAX_TOKENS only if you want to cap cost.
|
||||
//
|
||||
// Bug fix 2026-04-27 (caught by Kimi's own self-audit): empty env var
|
||||
// like LH_AUDITOR_KIMI_MAX_TOKENS="" used to parse via Number("") → 0
|
||||
// because `??` only catches null/undefined. Use `||` so empty string,
|
||||
// 0, or NaN all fall back to the default.
|
||||
const MAX_TOKENS = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS) || 128_000;
|
||||
|
||||
export interface KimiArchitectContext {
|
||||
pr_number: number;
|
||||
head_sha: string;
|
||||
}
|
||||
|
||||
interface KimiVerdictFile {
|
||||
pr_number: number;
|
||||
head_sha: string;
|
||||
cached_at: string;
|
||||
model: string;
|
||||
latency_ms: number;
|
||||
finish_reason: string;
|
||||
usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
|
||||
raw_content: string;
|
||||
findings: Finding[];
|
||||
grounding: { total: number; verified: number; rate: number };
|
||||
}
|
||||
|
||||
export async function runKimiArchitectCheck(
|
||||
diff: string,
|
||||
priorFindings: Finding[],
|
||||
ctx: KimiArchitectContext,
|
||||
): Promise<Finding[]> {
|
||||
const cachePath = join(KIMI_VERDICTS_DIR, `${ctx.pr_number}-${ctx.head_sha.slice(0, 12)}.json`);
|
||||
|
||||
// Cost cap — return cached findings if a verdict for this exact head
|
||||
// SHA was generated within the TTL.
|
||||
const cached = await loadCachedVerdict(cachePath);
|
||||
if (cached) {
|
||||
const fs2: Finding[] = cached.findings.length > 0
|
||||
? cached.findings
|
||||
: [{ check: "kimi_architect" as CheckKind, severity: "info", summary: "kimi_architect cached — 0 findings", evidence: [`cache: ${cachePath}`] }];
|
||||
return fs2;
|
||||
}
|
||||
|
||||
const selected = selectModel(diff.length);
|
||||
let response: { content: string; usage: any; finish_reason: string; latency_ms: number };
|
||||
try {
|
||||
response = await callKimi(buildPrompt(diff, priorFindings, ctx), selected.provider, selected.model);
|
||||
} catch (e) {
|
||||
return [skipFinding(`kimi call failed (${selected.model}): ${(e as Error).message.slice(0, 200)}`)];
|
||||
}
|
||||
|
||||
const findings = parseFindings(response.content);
|
||||
const grounding = await computeGrounding(findings);
|
||||
|
||||
const verdict: KimiVerdictFile = {
|
||||
pr_number: ctx.pr_number,
|
||||
head_sha: ctx.head_sha,
|
||||
cached_at: new Date().toISOString(),
|
||||
model: selected.model,
|
||||
latency_ms: response.latency_ms,
|
||||
finish_reason: response.finish_reason,
|
||||
usage: {
|
||||
prompt_tokens: response.usage?.prompt_tokens ?? 0,
|
||||
completion_tokens: response.usage?.completion_tokens ?? 0,
|
||||
total_tokens: response.usage?.total_tokens ?? 0,
|
||||
},
|
||||
raw_content: response.content,
|
||||
findings,
|
||||
grounding,
|
||||
};
|
||||
|
||||
await persistVerdict(cachePath, verdict);
|
||||
await appendMetrics(verdict);
|
||||
|
||||
return findings.length > 0
|
||||
? findings
|
||||
: [{
|
||||
check: "kimi_architect" as CheckKind,
|
||||
severity: "info",
|
||||
summary: `kimi_architect produced 0 ranked findings (${response.finish_reason}, ${verdict.usage.completion_tokens} tokens)`,
|
||||
evidence: [`raw response: ${cachePath}`],
|
||||
}];
|
||||
}
|
||||
|
||||
async function loadCachedVerdict(path: string): Promise<KimiVerdictFile | null> {
|
||||
if (!existsSync(path)) return null;
|
||||
try {
|
||||
const s = await stat(path);
|
||||
if (Date.now() - s.mtimeMs > CACHE_TTL_MS) return null;
|
||||
return JSON.parse(await readFile(path, "utf8")) as KimiVerdictFile;
|
||||
} catch { return null; }
|
||||
}
|
||||
|
||||
function buildPrompt(diff: string, priorFindings: Finding[], ctx: KimiArchitectContext): string {
|
||||
const truncatedDiff = diff.length > MAX_DIFF_CHARS
|
||||
? diff.slice(0, MAX_DIFF_CHARS) + `\n\n... [truncated; original diff was ${diff.length} chars]`
|
||||
: diff;
|
||||
|
||||
const priorBlock = priorFindings
|
||||
.filter(f => f.severity !== "info")
|
||||
.slice(0, MAX_PRIOR_FINDINGS)
|
||||
.map(f => `- [${f.check}/${f.severity}] ${f.summary}${f.evidence?.[0] ? ` — ${f.evidence[0].slice(0, 160)}` : ""}`)
|
||||
.join("\n");
|
||||
|
||||
return `You are a senior software architect doing a second-pass review on PR #${ctx.pr_number} (head ${ctx.head_sha.slice(0, 12)}). The team's automated auditor (deepseek-v3.1:671b, N=3 consensus) already produced findings. Your job is NOT to repeat what they found — your job is to catch what their voting structure CAN'T see: compile errors, type-system bypasses, false telemetry, silent determinism leaks, schema-bypass anti-patterns, load-bearing assumptions that look fine line-by-line.
|
||||
|
||||
GROUNDING RULES (non-negotiable):
|
||||
- Cite file:line for EVERY finding. Lines you cite must actually contain what you claim. Confabulating a finding wastes more time than missing one.
|
||||
- If the diff is truncated and you can't verify a claim, say "diff-truncated, can't verify" — DO NOT guess.
|
||||
- Distinguish architectural concerns (no specific line) from concrete bugs (specific line). Don't dress one as the other.
|
||||
|
||||
PRIOR FINDINGS FROM DEEPSEEK CONSENSUS (do not repeat these):
|
||||
${priorBlock || "(none)"}
|
||||
|
||||
OUTPUT FORMAT (markdown):
|
||||
- ## Verdict (one sentence)
|
||||
- ## Findings (5-10 items, each formatted EXACTLY as below)
|
||||
|
||||
For each finding use this exact shape so a parser can lift them:
|
||||
|
||||
### F1: <one-line summary>
|
||||
- **Severity:** block | warn | info
|
||||
- **File:** path/to/file.ext:LINE
|
||||
- **Rationale:** one or two sentences
|
||||
|
||||
THE DIFF:
|
||||
|
||||
${truncatedDiff}
|
||||
`;
|
||||
}
|
||||
|
||||
async function callKimi(prompt: string, provider: string, model: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> {
|
||||
const t0 = Date.now();
|
||||
const body = JSON.stringify({
|
||||
provider,
|
||||
model,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
max_tokens: MAX_TOKENS,
|
||||
temperature: 0.2,
|
||||
});
|
||||
// curl via Bun.spawn — bypasses Bun fetch's ~300s intrinsic ceiling.
|
||||
// -m sets the max transfer time honored end-to-end. Body is piped via
|
||||
// stdin to avoid argv length limits on big audit prompts (~50K+ tokens).
|
||||
const proc = Bun.spawn({
|
||||
cmd: [
|
||||
"curl", "-sS", "-X", "POST",
|
||||
"-m", String(Math.ceil(CALL_TIMEOUT_MS / 1000)),
|
||||
"-H", "content-type: application/json",
|
||||
"--data-binary", "@-",
|
||||
`${GATEWAY}/v1/chat`,
|
||||
],
|
||||
stdin: "pipe",
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
});
|
||||
proc.stdin.write(body);
|
||||
await proc.stdin.end();
|
||||
const [stdout, stderr, exitCode] = await Promise.all([
|
||||
new Response(proc.stdout).text(),
|
||||
new Response(proc.stderr).text(),
|
||||
proc.exited,
|
||||
]);
|
||||
if (exitCode !== 0) {
|
||||
throw new Error(`curl exit ${exitCode}: ${stderr.slice(0, 300)}`);
|
||||
}
|
||||
let j: any;
|
||||
try { j = JSON.parse(stdout); }
|
||||
catch (e) {
|
||||
throw new Error(`bad response (${stdout.length} bytes): ${stdout.slice(0, 300)}`);
|
||||
}
|
||||
if (j.error || !j.choices) {
|
||||
throw new Error(`gateway error: ${JSON.stringify(j).slice(0, 300)}`);
|
||||
}
|
||||
return {
|
||||
content: j.choices?.[0]?.message?.content ?? "",
|
||||
usage: j.usage ?? {},
|
||||
finish_reason: j.choices?.[0]?.finish_reason ?? "unknown",
|
||||
latency_ms: Date.now() - t0,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt):
|
||||
// ### F<N>: <summary>
|
||||
// - **Severity:** block | warn | info
|
||||
// - **File:** path:line
|
||||
// - **Rationale:** ...
|
||||
function parseFindings(content: string): Finding[] {
|
||||
const findings: Finding[] = [];
|
||||
const blocks = content.split(/^###\s+F\d+:\s*/m).slice(1);
|
||||
for (const block of blocks) {
|
||||
const summary = (block.split("\n")[0] ?? "").trim();
|
||||
if (!summary) continue;
|
||||
const sev = /\*\*Severity:\*\*\s*(block|warn|info)/i.exec(block)?.[1]?.toLowerCase();
|
||||
const fileLine = /\*\*File:\*\*\s*(\S+)/i.exec(block)?.[1] ?? "unknown";
|
||||
const rationale = /\*\*Rationale:\*\*\s*([\s\S]+?)(?=\n###|\n\*\*|$)/i.exec(block)?.[1]?.trim() ?? "";
|
||||
const severity: Finding["severity"] = sev === "block" ? "block" : sev === "warn" ? "warn" : "info";
|
||||
findings.push({
|
||||
check: "kimi_architect" as CheckKind,
|
||||
severity,
|
||||
summary: summary.slice(0, 240),
|
||||
evidence: [fileLine, rationale.slice(0, 360)].filter(Boolean),
|
||||
});
|
||||
}
|
||||
return findings;
|
||||
}
|
||||
|
||||
// For each finding's cited file:line, grep the actual file to verify
|
||||
// the line exists. Returns total + verified counts; per-finding metadata
|
||||
// is appended into the evidence array so the reader can see which
|
||||
// citations were verified.
|
||||
async function computeGrounding(findings: Finding[]): Promise<{ total: number; verified: number; rate: number }> {
|
||||
// readFile (async) instead of readFileSync — caught 2026-04-27 by
|
||||
// Kimi's self-audit. Sync I/O in an async fn blocks the event loop
|
||||
// for every cited file; doesn't matter at 10 findings, would matter
|
||||
// at 100+.
|
||||
const checks = await Promise.all(findings.map(async (f) => {
|
||||
const cite = f.evidence[0] ?? "";
|
||||
const m = /^(\S+?):(\d+)/.exec(cite);
|
||||
if (!m) return false;
|
||||
const [, relpath, lineStr] = m;
|
||||
const line = Number(lineStr);
|
||||
if (!line || !relpath) return false;
|
||||
const abs = relpath.startsWith("/") ? relpath : resolve(REPO_ROOT, relpath);
|
||||
if (!existsSync(abs)) {
|
||||
f.evidence.push("[grounding: file not found]");
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
const lines = (await readFile(abs, "utf8")).split("\n");
|
||||
if (line < 1 || line > lines.length) {
|
||||
f.evidence.push(`[grounding: line ${line} > EOF (${lines.length})]`);
|
||||
return false;
|
||||
}
|
||||
f.evidence.push(`[grounding: verified at ${relpath}:${line}]`);
|
||||
return true;
|
||||
} catch (e) {
|
||||
f.evidence.push(`[grounding: read failed: ${(e as Error).message.slice(0, 80)}]`);
|
||||
return false;
|
||||
}
|
||||
}));
|
||||
const verified = checks.filter(Boolean).length;
|
||||
const total = findings.length;
|
||||
return { total, verified, rate: total === 0 ? 0 : verified / total };
|
||||
}
|
||||
|
||||
async function persistVerdict(path: string, v: KimiVerdictFile): Promise<void> {
|
||||
await mkdir(KIMI_VERDICTS_DIR, { recursive: true });
|
||||
await writeFile(path, JSON.stringify(v, null, 2));
|
||||
}
|
||||
|
||||
async function appendMetrics(v: KimiVerdictFile): Promise<void> {
|
||||
await mkdir(join(KIMI_AUDITS_JSONL, ".."), { recursive: true });
|
||||
await appendFile(KIMI_AUDITS_JSONL, JSON.stringify({
|
||||
pr_number: v.pr_number,
|
||||
head_sha: v.head_sha,
|
||||
audited_at: v.cached_at,
|
||||
model: v.model,
|
||||
latency_ms: v.latency_ms,
|
||||
finish_reason: v.finish_reason,
|
||||
prompt_tokens: v.usage.prompt_tokens,
|
||||
completion_tokens: v.usage.completion_tokens,
|
||||
findings_total: v.findings.length,
|
||||
findings_block: v.findings.filter(f => f.severity === "block").length,
|
||||
findings_warn: v.findings.filter(f => f.severity === "warn").length,
|
||||
grounding_verified: v.grounding.verified,
|
||||
grounding_rate: Number(v.grounding.rate.toFixed(3)),
|
||||
}) + "\n");
|
||||
}
|
||||
|
||||
function skipFinding(why: string): Finding {
|
||||
return {
|
||||
check: "kimi_architect" as CheckKind,
|
||||
severity: "info",
|
||||
summary: `kimi_architect skipped — ${why}`,
|
||||
evidence: [why],
|
||||
};
|
||||
}
|
||||
@ -15,7 +15,7 @@ import {
|
||||
} from "./types";
|
||||
import type { StageName } from "./stage_receipt";
|
||||
|
||||
export const DRIFT_REPORT_SCHEMA_VERSION = 1;
|
||||
export const DRIFT_REPORT_SCHEMA_VERSION = 2;
|
||||
export const DRIFT_THRESHOLD_PCT = 0.20;
|
||||
|
||||
export type DriftSeverity = "ok" | "warn" | "alert";
|
||||
@ -27,7 +27,11 @@ export interface StageDrift {
|
||||
delta_accepted: number;
|
||||
delta_quarantined: number;
|
||||
pct_change_out: number | null; // null when prior had 0 records
|
||||
input_hash_match: boolean;
|
||||
// null when input_hash isn't materialized into the stage summary —
|
||||
// schema v1 lied and reported `true` here. v2 is honest: callers
|
||||
// that want determinism enforcement must read the full StageReceipt
|
||||
// off disk and compute input_hash equality there.
|
||||
input_hash_match: boolean | null;
|
||||
output_hash_match: boolean;
|
||||
// alert if input_hash matches but output_hash diverges
|
||||
deterministic_violation: boolean;
|
||||
|
||||
@ -121,6 +121,14 @@ export interface EvidenceRecord {
|
||||
// and have no text payload. Present for distilled_*, contract_analyses,
|
||||
// mode_experiments, scrum_reviews etc.
|
||||
text?: string;
|
||||
|
||||
// ── Domain-specific metadata bucket ──
|
||||
// Source-specific fields that don't earn a top-level slot. e.g.
|
||||
// contract_analyses rows carry `contractor` here; mode_experiments
|
||||
// could carry `corpus_set`. Typed scalar values only — keep this
|
||||
// small or it becomes a junk drawer. Added 2026-04-27 (Kimi audit
|
||||
// flagged `(ev as any).contractor` schema bypass at export_sft.ts:126).
|
||||
metadata?: Record<string, string | number | boolean>;
|
||||
}
|
||||
|
||||
export function validateEvidenceRecord(input: unknown): ValidationResult<EvidenceRecord> {
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
// if something can't be verified from a check, it goes into `evidence`
|
||||
// so the verdict is inspectable, not a black box.
|
||||
|
||||
export type CheckKind = "static" | "dynamic" | "inference" | "kb_query";
|
||||
export type CheckKind = "static" | "dynamic" | "inference" | "kb_query" | "kimi_architect";
|
||||
|
||||
export type Severity = "info" | "warn" | "block";
|
||||
|
||||
|
||||
@ -45,6 +45,41 @@ default_model = "openai/gpt-oss-120b:free"
|
||||
# Model-prefix routing: "openrouter/<vendor>/<model>" auto-routes here,
|
||||
# prefix stripped before upstream call.
|
||||
|
||||
[[provider]]
|
||||
name = "opencode"
|
||||
base_url = "https://opencode.ai/zen/v1"
|
||||
# Unified endpoint — covers BOTH Zen (pay-per-token Anthropic/OpenAI/
|
||||
# Gemini frontier) AND Go (flat-sub Kimi/GLM/DeepSeek/Qwen/Minimax).
|
||||
# Upstream bills per-model: Zen models hit Zen balance, Go models hit
|
||||
# Go subscription cap. /zen/go/v1 is the Go-only sub-path (rejects
|
||||
# Zen models), kept for reference but not used by this provider.
|
||||
auth = "bearer"
|
||||
auth_env = "OPENCODE_API_KEY"
|
||||
default_model = "claude-opus-4-7"
|
||||
# OpenCode (Zen + GO unified endpoint). One sk-* key reaches Claude
|
||||
# Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
|
||||
# Qwen, plus 4 free-tier models. OpenAI-compatible Chat Completions
|
||||
# at /v1/chat/completions. Model-prefix routing: "opencode/<name>"
|
||||
# auto-routes here, prefix stripped before upstream call.
|
||||
# Key file: /etc/lakehouse/opencode.env (loaded via systemd EnvironmentFile).
|
||||
# Model catalog: curl -H "Authorization: Bearer ..." https://opencode.ai/zen/v1/models
|
||||
# Note: /zen/go/v1 is the GO-only sub-path (Kimi/GLM/DeepSeek tier);
|
||||
# /zen/v1 covers everything including Anthropic (which /zen/go/v1 rejects).
|
||||
|
||||
[[provider]]
|
||||
name = "kimi"
|
||||
base_url = "https://api.kimi.com/coding/v1"
|
||||
auth = "bearer"
|
||||
auth_env = "KIMI_API_KEY"
|
||||
default_model = "kimi-for-coding"
|
||||
# Direct Kimi For Coding provider. `api.kimi.com` is a SEPARATE account
|
||||
# system from `api.moonshot.ai` and `api.moonshot.cn` — keys are NOT
|
||||
# interchangeable. Used when Ollama Cloud's `kimi-k2:1t` is upstream-
|
||||
# broken and OpenRouter's `moonshotai/kimi-k2.6` is rate-limited.
|
||||
# Model id: `kimi-for-coding` (kimi-k2.6 underneath).
|
||||
# Key file: /etc/lakehouse/kimi.env (loaded via systemd EnvironmentFile).
|
||||
# Model-prefix routing: "kimi/<model>" auto-routes here, prefix stripped.
|
||||
|
||||
# Planned (Phase 40 long-horizon — adapters not yet shipped):
|
||||
#
|
||||
# [[provider]]
|
||||
|
||||
@ -271,6 +271,30 @@ async fn main() {
|
||||
}
|
||||
k
|
||||
},
|
||||
kimi_key: {
|
||||
// Direct Kimi For Coding (api.kimi.com) — bypasses the
|
||||
// broken-upstream kimi-k2:1t and OpenRouter rate caps.
|
||||
// Key from /etc/lakehouse/kimi.env (KIMI_API_KEY=sk-kimi-…).
|
||||
let k = v1::kimi::resolve_kimi_key();
|
||||
if k.is_some() {
|
||||
tracing::info!("v1: Kimi key loaded — /v1/chat provider=kimi enabled (model=kimi-for-coding)");
|
||||
} else {
|
||||
tracing::debug!("v1: no Kimi key — provider=kimi will 503");
|
||||
}
|
||||
k
|
||||
},
|
||||
opencode_key: {
|
||||
// OpenCode GO multi-vendor gateway — Claude Opus 4.7,
|
||||
// GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
|
||||
// Qwen + free-tier. Key from /etc/lakehouse/opencode.env.
|
||||
let k = v1::opencode::resolve_opencode_key();
|
||||
if k.is_some() {
|
||||
tracing::info!("v1: OpenCode key loaded — /v1/chat provider=opencode enabled (40 models)");
|
||||
} else {
|
||||
tracing::debug!("v1: no OpenCode key — provider=opencode will 503");
|
||||
}
|
||||
k
|
||||
},
|
||||
// Phase 40 early deliverable — Langfuse trace emitter.
|
||||
// Defaults match mcp-server/tracing.ts conventions so
|
||||
// gateway traces land in the same staffing project.
|
||||
|
||||
227
crates/gateway/src/v1/kimi.rs
Normal file
227
crates/gateway/src/v1/kimi.rs
Normal file
@ -0,0 +1,227 @@
|
||||
//! Kimi For Coding adapter — direct provider for `kimi-for-coding`
|
||||
//! (kimi-k2.6 underneath). Used when Ollama Cloud's `kimi-k2:1t` is
|
||||
//! returning sustained 5xx (broken upstream) and OpenRouter's
|
||||
//! `moonshotai/kimi-k2.6` is rate-limited.
|
||||
//!
|
||||
//! Endpoint per `kimi.com/code/docs` and `moonshotai.github.io/kimi-cli`:
|
||||
//! base_url: https://api.kimi.com/coding/v1
|
||||
//! model id: kimi-for-coding
|
||||
//! auth: Bearer sk-kimi-…
|
||||
//! protocol: OpenAI Chat Completions compatible
|
||||
//!
|
||||
//! IMPORTANT: `api.kimi.com` is a separate account system from
|
||||
//! `api.moonshot.ai` and `api.moonshot.cn`. Keys are NOT interchangeable.
|
||||
//! This adapter is for `sk-kimi-*` keys provisioned via the Kimi
|
||||
//! membership console only.
|
||||
//!
|
||||
//! Key sourcing priority:
|
||||
//! 1. Env var `KIMI_API_KEY` (loaded from /etc/lakehouse/kimi.env via
|
||||
//! systemd EnvironmentFile=)
|
||||
//! 2. /etc/lakehouse/kimi.env directly (rescue path if env not loaded)
|
||||
//!
|
||||
//! First hit wins. Resolved once at gateway startup, stored on
|
||||
//! `V1State.kimi_key`.
|
||||
|
||||
use std::time::Duration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
|
||||
|
||||
const KIMI_BASE_URL: &str = "https://api.kimi.com/coding/v1";
|
||||
// Default 600s — kimi-for-coding is a reasoning model; on large
|
||||
// code-audit prompts (~50KB+ input + 8K output) it routinely needs
|
||||
// 3-8 min to think + emit. Override with KIMI_TIMEOUT_SECS env var.
|
||||
const KIMI_TIMEOUT_SECS_DEFAULT: u64 = 600;
|
||||
|
||||
fn kimi_timeout_secs() -> u64 {
|
||||
std::env::var("KIMI_TIMEOUT_SECS")
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||
.filter(|&n| n > 0)
|
||||
.unwrap_or(KIMI_TIMEOUT_SECS_DEFAULT)
|
||||
}
|
||||
|
||||
pub fn resolve_kimi_key() -> Option<String> {
|
||||
if let Ok(k) = std::env::var("KIMI_API_KEY") {
|
||||
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
|
||||
}
|
||||
if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/kimi.env") {
|
||||
for line in raw.lines() {
|
||||
if let Some(rest) = line.strip_prefix("KIMI_API_KEY=") {
|
||||
let k = rest.trim().trim_matches('"').trim_matches('\'');
|
||||
if !k.is_empty() { return Some(k.to_string()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn chat(
|
||||
key: &str,
|
||||
req: &ChatRequest,
|
||||
) -> Result<ChatResponse, String> {
|
||||
// Strip the "kimi/" namespace prefix if the caller used it so the
|
||||
// upstream API sees the bare model id (e.g. "kimi-for-coding").
|
||||
let model = req.model.strip_prefix("kimi/").unwrap_or(&req.model).to_string();
|
||||
|
||||
// Flatten content to a plain String. api.kimi.com is text-only on
|
||||
// the coding endpoint; the OpenAI multimodal array shape
|
||||
// ([{type:"text",text:"..."},{type:"image_url",...}]) returns 400.
|
||||
// Message::text() concats text-parts and drops non-text. Caught
|
||||
// 2026-04-27 by Kimi's self-audit (kimi.rs:137 — content as raw
|
||||
// serde_json::Value risked upstream rejection).
|
||||
let body = KimiChatBody {
|
||||
model: model.clone(),
|
||||
messages: req.messages.iter().map(|m| KimiMessage {
|
||||
role: m.role.clone(),
|
||||
content: serde_json::Value::String(m.text()),
|
||||
}).collect(),
|
||||
max_tokens: req.max_tokens.unwrap_or(800),
|
||||
temperature: req.temperature.unwrap_or(0.3),
|
||||
stream: false,
|
||||
};
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(kimi_timeout_secs()))
|
||||
.build()
|
||||
.map_err(|e| format!("build client: {e}"))?;
|
||||
|
||||
let t0 = std::time::Instant::now();
|
||||
let resp = client
|
||||
.post(format!("{}/chat/completions", KIMI_BASE_URL))
|
||||
.bearer_auth(key)
|
||||
// api.kimi.com gates this endpoint by User-Agent — only sanctioned
|
||||
// coding agents (Claude Code, Kimi CLI, Roo Code, Kilo Code) get
|
||||
// through. Generic clients receive 403 access_terminated_error.
|
||||
// J accepted the TOS risk on 2026-04-27; revisit if Moonshot
|
||||
// tightens enforcement.
|
||||
.header("User-Agent", "claude-code/1.0.0")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("api.kimi.com unreachable: {e}"))?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let body = resp.text().await.unwrap_or_else(|_| "?".into());
|
||||
return Err(format!("api.kimi.com {}: {}", status, body));
|
||||
}
|
||||
|
||||
let parsed: KimiChatResponse = resp.json().await
|
||||
.map_err(|e| format!("invalid kimi response: {e}"))?;
|
||||
|
||||
let latency_ms = t0.elapsed().as_millis();
|
||||
let choice = parsed.choices.into_iter().next()
|
||||
.ok_or_else(|| "kimi returned no choices".to_string())?;
|
||||
let text = choice.message.content;
|
||||
|
||||
let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
|
||||
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
|
||||
((chars + 3) / 4) as u32
|
||||
});
|
||||
let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
|
||||
((text.chars().count() + 3) / 4) as u32
|
||||
});
|
||||
|
||||
tracing::info!(
|
||||
target: "v1.chat",
|
||||
provider = "kimi",
|
||||
model = %model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
latency_ms = latency_ms as u64,
|
||||
"kimi chat completed",
|
||||
);
|
||||
|
||||
Ok(ChatResponse {
|
||||
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
|
||||
object: "chat.completion",
|
||||
created: chrono::Utc::now().timestamp(),
|
||||
model,
|
||||
choices: vec![Choice {
|
||||
index: 0,
|
||||
message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
|
||||
finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
|
||||
}],
|
||||
usage: UsageBlock {
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
total_tokens: prompt_tokens + completion_tokens,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// -- Kimi wire shapes (OpenAI-compatible) --
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct KimiChatBody {
|
||||
model: String,
|
||||
messages: Vec<KimiMessage>,
|
||||
max_tokens: u32,
|
||||
temperature: f64,
|
||||
stream: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct KimiMessage { role: String, content: serde_json::Value }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct KimiChatResponse {
|
||||
choices: Vec<KimiChoice>,
|
||||
#[serde(default)]
|
||||
usage: Option<KimiUsage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct KimiChoice {
|
||||
message: KimiMessageResp,
|
||||
#[serde(default)]
|
||||
finish_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct KimiMessageResp { content: String }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct KimiUsage { prompt_tokens: u32, completion_tokens: u32 }
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn resolve_kimi_key_does_not_panic() {
|
||||
let _ = resolve_kimi_key();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_body_serializes_to_openai_shape() {
|
||||
let body = KimiChatBody {
|
||||
model: "kimi-for-coding".into(),
|
||||
messages: vec![
|
||||
KimiMessage { role: "user".into(), content: "review this".into() },
|
||||
],
|
||||
max_tokens: 800,
|
||||
temperature: 0.3,
|
||||
stream: false,
|
||||
};
|
||||
let json = serde_json::to_string(&body).unwrap();
|
||||
assert!(json.contains("\"model\":\"kimi-for-coding\""));
|
||||
assert!(json.contains("\"messages\""));
|
||||
assert!(json.contains("\"max_tokens\":800"));
|
||||
assert!(json.contains("\"stream\":false"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_prefix_strip() {
|
||||
let cases = [
|
||||
("kimi/kimi-for-coding", "kimi-for-coding"),
|
||||
("kimi-for-coding", "kimi-for-coding"),
|
||||
("kimi/kimi-k2.6", "kimi-k2.6"),
|
||||
];
|
||||
for (input, expected) in cases {
|
||||
let out = input.strip_prefix("kimi/").unwrap_or(input);
|
||||
assert_eq!(out, expected, "{input} should become {expected}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -16,6 +16,8 @@ pub mod ollama_cloud;
|
||||
pub mod openrouter;
|
||||
pub mod gemini;
|
||||
pub mod claude;
|
||||
pub mod kimi;
|
||||
pub mod opencode;
|
||||
pub mod langfuse_trace;
|
||||
pub mod mode;
|
||||
pub mod respond;
|
||||
@ -53,6 +55,19 @@ pub struct V1State {
|
||||
/// `claude::resolve_claude_key()`. None = provider="claude" calls
|
||||
/// 503. Phase 40 deliverable.
|
||||
pub claude_key: Option<String>,
|
||||
/// Kimi For Coding (api.kimi.com) bearer token — direct provider
|
||||
/// for `kimi-for-coding`. Used when Ollama Cloud's `kimi-k2:1t` is
|
||||
/// upstream-broken. Loaded at startup via `kimi::resolve_kimi_key()`
|
||||
/// from `KIMI_API_KEY` env or `/etc/lakehouse/kimi.env`. None =
|
||||
/// provider="kimi" calls 503.
|
||||
pub kimi_key: Option<String>,
|
||||
/// OpenCode GO (opencode.ai) bearer token — multi-vendor curated
|
||||
/// gateway. One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro,
|
||||
/// Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, Qwen + free-tier.
|
||||
/// Loaded at startup via `opencode::resolve_opencode_key()` from
|
||||
/// `OPENCODE_API_KEY` env or `/etc/lakehouse/opencode.env`. None =
|
||||
/// provider="opencode" calls 503.
|
||||
pub opencode_key: Option<String>,
|
||||
/// Phase 40 early deliverable — Langfuse client. None = tracing
|
||||
/// disabled (keys missing or container unreachable). Traces are
|
||||
/// fire-and-forget: never block the response path.
|
||||
@ -224,6 +239,12 @@ fn resolve_provider(req: &ChatRequest) -> (String, String) {
|
||||
if let Some(rest) = req.model.strip_prefix("claude/") {
|
||||
return ("claude".to_string(), rest.to_string());
|
||||
}
|
||||
if let Some(rest) = req.model.strip_prefix("kimi/") {
|
||||
return ("kimi".to_string(), rest.to_string());
|
||||
}
|
||||
if let Some(rest) = req.model.strip_prefix("opencode/") {
|
||||
return ("opencode".to_string(), rest.to_string());
|
||||
}
|
||||
// Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`,
|
||||
// `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter.
|
||||
// This makes the gateway a drop-in OpenAI-compatible middleware:
|
||||
@ -316,6 +337,12 @@ mod resolve_provider_tests {
|
||||
let r = mk_req(None, "claude/claude-3-5-sonnet-latest");
|
||||
assert_eq!(resolve_provider(&r), ("claude".into(), "claude-3-5-sonnet-latest".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn kimi_prefix_infers_and_strips() {
|
||||
let r = mk_req(None, "kimi/kimi-for-coding");
|
||||
assert_eq!(resolve_provider(&r), ("kimi".into(), "kimi-for-coding".into()));
|
||||
}
|
||||
}
|
||||
|
||||
async fn chat(
|
||||
@ -403,10 +430,37 @@ async fn chat(
|
||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("claude: {e}")))?;
|
||||
(r, "claude".to_string())
|
||||
}
|
||||
"kimi" => {
|
||||
// Direct Kimi For Coding provider — bypasses Ollama Cloud's
|
||||
// upstream-broken kimi-k2:1t and OpenRouter's rate-limited
|
||||
// moonshotai/kimi-k2.6. Uses sk-kimi-* keys from the Kimi
|
||||
// membership console.
|
||||
let key = state.kimi_key.as_deref().ok_or((
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"KIMI_API_KEY not configured".to_string(),
|
||||
))?;
|
||||
let r = kimi::chat(key, &*req_for_adapter)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("kimi: {e}")))?;
|
||||
(r, "kimi".to_string())
|
||||
}
|
||||
"opencode" => {
|
||||
// OpenCode GO multi-vendor gateway — Claude Opus 4.7,
|
||||
// GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
|
||||
// Qwen, free-tier. OpenAI-compat at opencode.ai/zen/go/v1.
|
||||
let key = state.opencode_key.as_deref().ok_or((
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"OPENCODE_API_KEY not configured".to_string(),
|
||||
))?;
|
||||
let r = opencode::chat(key, &*req_for_adapter)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("opencode: {e}")))?;
|
||||
(r, "opencode".to_string())
|
||||
}
|
||||
other => {
|
||||
return Err((
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude"),
|
||||
format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi, opencode"),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
@ -1032,14 +1032,14 @@ mod tests {
|
||||
preferred_mode: "codereview".into(),
|
||||
fallback_modes: vec!["consensus".into()],
|
||||
default_model: "qwen3-coder:480b".into(),
|
||||
matrix_corpus: Some("distilled_procedural_v1".into()),
|
||||
matrix_corpus: vec!["distilled_procedural_v1".into()],
|
||||
},
|
||||
TaskClassEntry {
|
||||
name: "broken".into(),
|
||||
preferred_mode: "nonsense_mode".into(),
|
||||
fallback_modes: vec!["consensus".into()],
|
||||
default_model: "x".into(),
|
||||
matrix_corpus: None,
|
||||
matrix_corpus: vec![],
|
||||
},
|
||||
],
|
||||
default: DefaultEntry {
|
||||
|
||||
228
crates/gateway/src/v1/opencode.rs
Normal file
228
crates/gateway/src/v1/opencode.rs
Normal file
@ -0,0 +1,228 @@
|
||||
//! OpenCode GO adapter — multi-vendor curated gateway via opencode.ai/zen/go.
|
||||
//!
|
||||
//! One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro,
|
||||
//! Kimi K2.6, DeepSeek, GLM, Qwen, plus 4 free-tier models.
|
||||
//! OpenAI-compatible Chat Completions; auth via Bearer.
|
||||
//!
|
||||
//! Why a separate adapter (vs reusing openrouter.rs):
|
||||
//! - Different account, different key, different base_url
|
||||
//! - No HTTP-Referer / X-Title headers (those are OpenRouter-specific)
|
||||
//! - Future-proof for any opencode-only request shaping
|
||||
//!
|
||||
//! Key sourcing priority:
|
||||
//! 1. Env var `OPENCODE_API_KEY` (loaded from /etc/lakehouse/opencode.env
|
||||
//! via systemd EnvironmentFile=)
|
||||
//! 2. /etc/lakehouse/opencode.env directly (rescue path if env missing)
|
||||
//!
|
||||
//! Resolved once at gateway startup, stored on `V1State.opencode_key`.
|
||||
//! Model-prefix routing: "opencode/<model>" auto-routes here, prefix
|
||||
//! stripped before upstream call.
|
||||
|
||||
use std::time::Duration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
|
||||
|
||||
// /zen/v1 is the unified OpenCode endpoint that covers BOTH the
|
||||
// Zen pay-per-token tier (Claude/GPT/Gemini frontier) AND the Go
|
||||
// subscription tier (Kimi/GLM/DeepSeek/Qwen/Minimax/mimo). When the
|
||||
// caller has both, opencode bills per-model: Zen models charge Zen
|
||||
// balance, Go models charge against the Go subscription cap.
|
||||
//
|
||||
// /zen/go/v1 exists as a Go-only sub-path (rejects Zen models with
|
||||
// "Model not supported"); we use the unified /zen/v1 since the same
|
||||
// key works for both with correct billing routing upstream.
|
||||
const OPENCODE_BASE_URL: &str = "https://opencode.ai/zen/v1";
|
||||
// 600s default — opencode upstream models include reasoning-heavy
|
||||
// variants (Claude Opus, Kimi K2.6, GLM-5.1) that legitimately take
|
||||
// 3-5 min on big audit prompts. Override via OPENCODE_TIMEOUT_SECS.
|
||||
const OPENCODE_TIMEOUT_SECS_DEFAULT: u64 = 600;
|
||||
|
||||
fn opencode_timeout_secs() -> u64 {
|
||||
std::env::var("OPENCODE_TIMEOUT_SECS")
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||
.filter(|&n| n > 0)
|
||||
.unwrap_or(OPENCODE_TIMEOUT_SECS_DEFAULT)
|
||||
}
|
||||
|
||||
pub fn resolve_opencode_key() -> Option<String> {
|
||||
if let Ok(k) = std::env::var("OPENCODE_API_KEY") {
|
||||
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
|
||||
}
|
||||
if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/opencode.env") {
|
||||
for line in raw.lines() {
|
||||
if let Some(rest) = line.strip_prefix("OPENCODE_API_KEY=") {
|
||||
let k = rest.trim().trim_matches('"').trim_matches('\'');
|
||||
if !k.is_empty() { return Some(k.to_string()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn chat(
|
||||
key: &str,
|
||||
req: &ChatRequest,
|
||||
) -> Result<ChatResponse, String> {
|
||||
// Strip the "opencode/" namespace prefix so the upstream sees the
|
||||
// bare model id (e.g. "claude-opus-4-7", "kimi-k2.6").
|
||||
let model = req.model.strip_prefix("opencode/").unwrap_or(&req.model).to_string();
|
||||
|
||||
// Anthropic models on opencode reject `temperature` with a 400
|
||||
// "temperature is deprecated for this model" error. Strip the
|
||||
// field for claude-* and the new gpt-5.x reasoning lineages
|
||||
// (Anthropic/OpenAI's reasoning models all moved away from temp).
|
||||
// Other models keep the caller's value or default to 0.3.
|
||||
let drop_temp = model.starts_with("claude-")
|
||||
|| model.starts_with("gpt-5")
|
||||
|| model.starts_with("o1")
|
||||
|| model.starts_with("o3")
|
||||
|| model.starts_with("o4");
|
||||
let body = OCChatBody {
|
||||
model: model.clone(),
|
||||
messages: req.messages.iter().map(|m| OCMessage {
|
||||
role: m.role.clone(),
|
||||
content: m.content.clone(),
|
||||
}).collect(),
|
||||
// filter(|&n| n > 0) catches Some(0) — same trap that bit the
|
||||
// Kimi adapter when callers passed empty-env-parsed-to-0.
|
||||
max_tokens: req.max_tokens.filter(|&n| n > 0).unwrap_or(800),
|
||||
temperature: if drop_temp { None } else { Some(req.temperature.unwrap_or(0.3)) },
|
||||
stream: false,
|
||||
};
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(opencode_timeout_secs()))
|
||||
.build()
|
||||
.map_err(|e| format!("build client: {e}"))?;
|
||||
|
||||
let t0 = std::time::Instant::now();
|
||||
let resp = client
|
||||
.post(format!("{}/chat/completions", OPENCODE_BASE_URL))
|
||||
.bearer_auth(key)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("opencode.ai unreachable: {e}"))?;
|
||||
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let body = resp.text().await.unwrap_or_else(|_| "?".into());
|
||||
return Err(format!("opencode.ai {}: {}", status, body));
|
||||
}
|
||||
|
||||
let parsed: OCChatResponse = resp.json().await
|
||||
.map_err(|e| format!("invalid opencode response: {e}"))?;
|
||||
|
||||
let latency_ms = t0.elapsed().as_millis();
|
||||
let choice = parsed.choices.into_iter().next()
|
||||
.ok_or_else(|| "opencode returned no choices".to_string())?;
|
||||
let text = choice.message.content;
|
||||
|
||||
let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
|
||||
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
|
||||
((chars + 3) / 4) as u32
|
||||
});
|
||||
let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
|
||||
((text.chars().count() + 3) / 4) as u32
|
||||
});
|
||||
|
||||
tracing::info!(
|
||||
target: "v1.chat",
|
||||
provider = "opencode",
|
||||
model = %model,
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
latency_ms = latency_ms as u64,
|
||||
"opencode chat completed",
|
||||
);
|
||||
|
||||
Ok(ChatResponse {
|
||||
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
|
||||
object: "chat.completion",
|
||||
created: chrono::Utc::now().timestamp(),
|
||||
model,
|
||||
choices: vec![Choice {
|
||||
index: 0,
|
||||
message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
|
||||
finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
|
||||
}],
|
||||
usage: UsageBlock {
|
||||
prompt_tokens,
|
||||
completion_tokens,
|
||||
total_tokens: prompt_tokens + completion_tokens,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// -- OpenCode wire shapes (OpenAI-compatible) --
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OCChatBody {
|
||||
model: String,
|
||||
messages: Vec<OCMessage>,
|
||||
max_tokens: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
temperature: Option<f64>,
|
||||
stream: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct OCMessage { role: String, content: serde_json::Value }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OCChatResponse {
|
||||
choices: Vec<OCChoice>,
|
||||
#[serde(default)]
|
||||
usage: Option<OCUsage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OCChoice {
|
||||
message: OCMessageResp,
|
||||
#[serde(default)]
|
||||
finish_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OCMessageResp { content: String }
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OCUsage { prompt_tokens: u32, completion_tokens: u32 }
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn resolve_opencode_key_does_not_panic() {
|
||||
let _ = resolve_opencode_key();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_prefix_strip() {
|
||||
let cases = [
|
||||
("opencode/claude-opus-4-7", "claude-opus-4-7"),
|
||||
("opencode/kimi-k2.6", "kimi-k2.6"),
|
||||
("claude-opus-4-7", "claude-opus-4-7"),
|
||||
];
|
||||
for (input, expected) in cases {
|
||||
let out = input.strip_prefix("opencode/").unwrap_or(input);
|
||||
assert_eq!(out, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_tokens_filters_zero() {
|
||||
// The trap: empty env -> Number("") -> 0 -> Some(0). Adapter
|
||||
// must not pass 0 upstream; should fall to 800.
|
||||
let some_zero: Option<u32> = Some(0);
|
||||
let result = some_zero.filter(|&n| n > 0).unwrap_or(800);
|
||||
assert_eq!(result, 800);
|
||||
let some_real: Option<u32> = Some(4096);
|
||||
assert_eq!(some_real.filter(|&n| n > 0).unwrap_or(800), 4096);
|
||||
let none_val: Option<u32> = None;
|
||||
assert_eq!(none_val.filter(|&n| n > 0).unwrap_or(800), 800);
|
||||
}
|
||||
}
|
||||
@ -93,3 +93,76 @@ pub trait Validator: Send + Sync {
|
||||
/// Human-readable name for logs + Langfuse traces.
|
||||
fn name(&self) -> &'static str;
|
||||
}
|
||||
|
||||
// ─── Worker lookup (Phase 43 v2) ────────────────────────────────────────
|
||||
//
|
||||
// Validators that cross-check artifacts against the worker roster
|
||||
// (FillValidator, EmailValidator) take an `Arc<dyn WorkerLookup>` at
|
||||
// construction. Keeping the trait sync + in-memory mirrors the
|
||||
// lakehouse pattern of "load truth into memory, validate against
|
||||
// snapshot, refresh periodically" rather than per-call DB hits.
|
||||
//
|
||||
// Production impl: wrap a parquet snapshot loaded from
|
||||
// `data/datasets/workers_500k.parquet` (or its safe view counterpart
|
||||
// once Track A.B lands). Tests use `InMemoryWorkerLookup`.
|
||||
|
||||
/// One worker row from the staffing roster — the fields validators
|
||||
/// actually read. Anything not on this struct (resume_text, scores,
|
||||
/// communications) is intentionally hidden from the validator path.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WorkerRecord {
|
||||
pub candidate_id: String,
|
||||
pub name: String,
|
||||
/// Free-form. Validators check for `"active"` (any other value
|
||||
/// fails the status check). Common values from existing data:
|
||||
/// "active", "inactive", "placed", "blacklisted".
|
||||
pub status: String,
|
||||
pub city: Option<String>,
|
||||
pub state: Option<String>,
|
||||
pub role: Option<String>,
|
||||
/// Client ids this worker has been blacklisted from. Populated
|
||||
/// from joining a blacklist table; empty when not provided.
|
||||
#[serde(default)]
|
||||
pub blacklisted_clients: Vec<String>,
|
||||
}
|
||||
|
||||
/// Worker lookup contract. Sync by design — implementations should
|
||||
/// hold an in-memory snapshot, not perform per-call I/O.
|
||||
pub trait WorkerLookup: Send + Sync {
|
||||
fn find(&self, candidate_id: &str) -> Option<WorkerRecord>;
|
||||
}
|
||||
|
||||
/// HashMap-backed lookup. Used by validator unit tests + as a
|
||||
/// reasonable bootstrap impl for production once the parquet loader
|
||||
/// fills it on startup.
|
||||
pub struct InMemoryWorkerLookup {
|
||||
rows: std::collections::HashMap<String, WorkerRecord>,
|
||||
}
|
||||
|
||||
impl InMemoryWorkerLookup {
|
||||
pub fn new() -> Self {
|
||||
Self { rows: Default::default() }
|
||||
}
|
||||
pub fn from_records(records: Vec<WorkerRecord>) -> Self {
|
||||
let mut rows = std::collections::HashMap::with_capacity(records.len());
|
||||
for r in records {
|
||||
rows.insert(r.candidate_id.clone(), r);
|
||||
}
|
||||
Self { rows }
|
||||
}
|
||||
pub fn insert(&mut self, record: WorkerRecord) {
|
||||
self.rows.insert(record.candidate_id.clone(), record);
|
||||
}
|
||||
pub fn len(&self) -> usize { self.rows.len() }
|
||||
pub fn is_empty(&self) -> bool { self.rows.is_empty() }
|
||||
}
|
||||
|
||||
impl Default for InMemoryWorkerLookup {
|
||||
fn default() -> Self { Self::new() }
|
||||
}
|
||||
|
||||
impl WorkerLookup for InMemoryWorkerLookup {
|
||||
fn find(&self, candidate_id: &str) -> Option<WorkerRecord> {
|
||||
self.rows.get(candidate_id).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
//! Email/SMS draft validator.
|
||||
//! Email/SMS draft validator (Phase 43 v2 — real PII + name checks).
|
||||
//!
|
||||
//! PRD checks:
|
||||
//! - Schema (TO/BODY fields present)
|
||||
@ -6,15 +6,31 @@
|
||||
//! - PII absence (no SSN / salary leaked into outgoing text)
|
||||
//! - Worker-name consistency (name in message matches worker record)
|
||||
//!
|
||||
//! Scaffold implements schema + length. PII regex (SSN pattern,
|
||||
//! salary-number pattern) lives in `shared::pii::strip_pii` — plug in
|
||||
//! a follow-up when the validator caller knows the worker record to
|
||||
//! cross-check name consistency.
|
||||
//! Like FillValidator, EmailValidator takes `Arc<dyn WorkerLookup>` at
|
||||
//! construction. The contract metadata (which worker the message is
|
||||
//! about) travels under `_context.candidate_id` in the JSON payload.
|
||||
//! When `_context.candidate_id` is present and resolves, the validator
|
||||
//! cross-checks that the worker's name appears verbatim in the body.
|
||||
//!
|
||||
//! PII detection is std-only (no regex dep) — a hand-rolled scan
|
||||
//! covers the patterns we actually care about: SSN (NNN-NN-NNNN),
|
||||
//! salary statements ("salary" / "compensation" near a $ amount).
|
||||
|
||||
use crate::{Artifact, Report, Validator, ValidationError};
|
||||
use crate::{
|
||||
Artifact, Report, Validator, ValidationError, WorkerLookup,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct EmailValidator;
|
||||
pub struct EmailValidator {
|
||||
workers: Arc<dyn WorkerLookup>,
|
||||
}
|
||||
|
||||
impl EmailValidator {
|
||||
pub fn new(workers: Arc<dyn WorkerLookup>) -> Self {
|
||||
Self { workers }
|
||||
}
|
||||
}
|
||||
|
||||
const SMS_MAX_CHARS: usize = 160;
|
||||
const EMAIL_SUBJECT_MAX_CHARS: usize = 78;
|
||||
@ -32,7 +48,7 @@ impl Validator for EmailValidator {
|
||||
}),
|
||||
};
|
||||
|
||||
let to = value.get("to").and_then(|v| v.as_str()).ok_or(
|
||||
let _to = value.get("to").and_then(|v| v.as_str()).ok_or(
|
||||
ValidationError::Schema {
|
||||
field: "to".into(),
|
||||
reason: "missing or not a string".into(),
|
||||
@ -63,54 +79,292 @@ impl Validator for EmailValidator {
|
||||
}
|
||||
}
|
||||
|
||||
let _ = to; // touched for future name-consistency check
|
||||
// TODO(phase-43 v2): PII scan + worker-name consistency.
|
||||
// ── PII scan on body + subject combined ──
|
||||
let scanned = format!(
|
||||
"{} {}",
|
||||
value.get("subject").and_then(|v| v.as_str()).unwrap_or(""),
|
||||
body
|
||||
);
|
||||
if contains_ssn_pattern(&scanned) {
|
||||
return Err(ValidationError::Policy {
|
||||
reason: "body contains an SSN-shaped sequence (NNN-NN-NNNN); strip before send".into(),
|
||||
});
|
||||
}
|
||||
if contains_salary_disclosure(&scanned) {
|
||||
return Err(ValidationError::Policy {
|
||||
reason: "body discloses salary/compensation amount; staffing PII rule says strip before send".into(),
|
||||
});
|
||||
}
|
||||
|
||||
// ── Worker-name consistency ──
|
||||
let candidate_id = value.get("_context")
|
||||
.and_then(|c| c.get("candidate_id"))
|
||||
.and_then(|v| v.as_str());
|
||||
let mut findings: Vec<crate::Finding> = vec![];
|
||||
if let Some(cid) = candidate_id {
|
||||
match self.workers.find(cid) {
|
||||
Some(worker) => {
|
||||
// Body should mention the worker's name (or at least
|
||||
// their first name) — drafts that address a different
|
||||
// person than the contracted worker are a recurring
|
||||
// class of LLM mistake.
|
||||
let first = worker.name.split_whitespace().next().unwrap_or(&worker.name);
|
||||
let body_lower = body.to_lowercase();
|
||||
let first_lower = first.to_lowercase();
|
||||
if !first_lower.is_empty() && !body_lower.contains(&first_lower) {
|
||||
findings.push(crate::Finding {
|
||||
field: "body".into(),
|
||||
severity: crate::Severity::Warning,
|
||||
message: format!(
|
||||
"body doesn't mention worker first name {first:?} (candidate_id {cid:?})"
|
||||
),
|
||||
});
|
||||
}
|
||||
// Also detect *another* worker's name appearing in
|
||||
// place of the contracted one — outright wrong-target.
|
||||
// We can only check this when we have a different
|
||||
// expected name; skip if the body is generic enough.
|
||||
}
|
||||
None => {
|
||||
return Err(ValidationError::Consistency {
|
||||
reason: format!(
|
||||
"_context.candidate_id {cid:?} not found in worker roster"
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Report {
|
||||
findings: vec![],
|
||||
findings,
|
||||
elapsed_ms: started.elapsed().as_millis() as u64,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ─── PII scanners (std-only) ────────────────────────────────────────────
|
||||
|
||||
/// Detects an SSN-shaped sequence: 3 digits, dash, 2 digits, dash, 4 digits.
|
||||
/// Walks the byte buffer; rejects sequences that are part of a longer run
|
||||
/// of digits (so phone-area-code-like NNN-NNN-NNNN isn't flagged). Tight
|
||||
/// false-positive surface: it's specifically the NNN-NN-NNNN shape.
|
||||
fn contains_ssn_pattern(s: &str) -> bool {
|
||||
let bytes = s.as_bytes();
|
||||
if bytes.len() < 11 { return false; }
|
||||
for i in 0..=bytes.len().saturating_sub(11) {
|
||||
let win = &bytes[i..i + 11];
|
||||
let shape = win.iter().enumerate().all(|(j, &b)| match j {
|
||||
0 | 1 | 2 | 4 | 5 | 7 | 8 | 9 | 10 => b.is_ascii_digit(),
|
||||
3 | 6 => b == b'-',
|
||||
_ => unreachable!(),
|
||||
});
|
||||
if !shape { continue; }
|
||||
// Reject if the byte BEFORE this window is a digit or `-` —
|
||||
// we're inside a longer numeric run, probably not an SSN.
|
||||
if i > 0 {
|
||||
let prev = bytes[i - 1];
|
||||
if prev.is_ascii_digit() || prev == b'-' { continue; }
|
||||
}
|
||||
// Reject if the byte AFTER is a digit or `-` (same reason).
|
||||
if i + 11 < bytes.len() {
|
||||
let next = bytes[i + 11];
|
||||
if next.is_ascii_digit() || next == b'-' { continue; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Detects salary/compensation disclosure: the keywords "salary",
|
||||
/// "compensation", "pay rate", "bill rate", "hourly rate" appearing
|
||||
/// within ~40 chars of a `$` followed by digits. Coarse on purpose —
|
||||
/// it's better to false-positive on a legit phrase like "discuss your
|
||||
/// hourly rate of $30/hr" than to miss it.
|
||||
fn contains_salary_disclosure(s: &str) -> bool {
|
||||
let lower = s.to_lowercase();
|
||||
const KEYWORDS: &[&str] = &[
|
||||
"salary", "compensation", "pay rate", "bill rate", "hourly rate",
|
||||
];
|
||||
let mut keyword_positions: Vec<usize> = vec![];
|
||||
for kw in KEYWORDS {
|
||||
let mut start = 0;
|
||||
while let Some(found) = lower[start..].find(kw) {
|
||||
let abs = start + found;
|
||||
keyword_positions.push(abs);
|
||||
start = abs + kw.len();
|
||||
}
|
||||
}
|
||||
if keyword_positions.is_empty() { return false; }
|
||||
|
||||
// Find every `$NNN+` in the text.
|
||||
let bytes = lower.as_bytes();
|
||||
let mut dollar_positions: Vec<usize> = vec![];
|
||||
for (i, &b) in bytes.iter().enumerate() {
|
||||
if b == b'$' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit() {
|
||||
dollar_positions.push(i);
|
||||
}
|
||||
}
|
||||
if dollar_positions.is_empty() { return false; }
|
||||
|
||||
// Any (keyword, $) pair within 40 chars triggers the policy rule.
|
||||
for &kp in &keyword_positions {
|
||||
for &dp in &dollar_positions {
|
||||
if kp.abs_diff(dp) <= 40 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{InMemoryWorkerLookup, WorkerRecord};
|
||||
use serde_json::json;
|
||||
|
||||
fn lookup(records: Vec<WorkerRecord>) -> Arc<dyn WorkerLookup> {
|
||||
Arc::new(InMemoryWorkerLookup::from_records(records))
|
||||
}
|
||||
|
||||
fn worker(id: &str, name: &str) -> WorkerRecord {
|
||||
WorkerRecord {
|
||||
candidate_id: id.into(),
|
||||
name: name.into(),
|
||||
status: "active".into(),
|
||||
city: None, state: None, role: None,
|
||||
blacklisted_clients: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn long_sms_fails_completeness() {
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let body = "x".repeat(200);
|
||||
let r = EmailValidator.validate(&Artifact::EmailDraft(serde_json::json!({
|
||||
"to": "+15555550123",
|
||||
"body": body,
|
||||
"kind": "sms"
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "+15555550123", "body": body, "kind": "sms"
|
||||
})));
|
||||
assert!(matches!(r, Err(ValidationError::Completeness { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn long_email_subject_fails_completeness() {
|
||||
let r = EmailValidator.validate(&Artifact::EmailDraft(serde_json::json!({
|
||||
"to": "a@b.com",
|
||||
"body": "hi",
|
||||
"subject": "x".repeat(100)
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "a@b.com", "body": "hi", "subject": "x".repeat(100)
|
||||
})));
|
||||
assert!(matches!(r, Err(ValidationError::Completeness { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_to_fails_schema() {
|
||||
let r = EmailValidator.validate(&Artifact::EmailDraft(serde_json::json!({"body": "hi"})));
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({"body": "hi"})));
|
||||
assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "to"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn well_formed_email_passes() {
|
||||
let r = EmailValidator.validate(&Artifact::EmailDraft(serde_json::json!({
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "hiring@example.com",
|
||||
"subject": "Interview: Friday 10am",
|
||||
"body": "Hi Jane — confirming interview Friday 10am."
|
||||
})));
|
||||
assert!(r.is_ok(), "well-formed email should pass: {:?}", r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssn_in_body_fails_policy() {
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "x@y.com",
|
||||
"body": "Hi Jane — your file shows 123-45-6789 on record."
|
||||
})));
|
||||
match r {
|
||||
Err(ValidationError::Policy { reason }) => assert!(reason.contains("SSN")),
|
||||
other => panic!("expected Policy SSN error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssn_in_subject_fails_policy() {
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "x@y.com",
|
||||
"subject": "Re: ID 123-45-6789",
|
||||
"body": "details inside"
|
||||
})));
|
||||
assert!(matches!(r, Err(ValidationError::Policy { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phone_number_does_not_trigger_ssn_false_positive() {
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "x@y.com",
|
||||
"body": "Call me at 555-123-4567 to confirm."
|
||||
})));
|
||||
assert!(r.is_ok(), "phone NNN-NNN-NNNN should NOT match SSN NNN-NN-NNNN: {:?}", r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn salary_disclosure_fails_policy() {
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "x@y.com",
|
||||
"body": "Confirming your hourly rate of $32.50 per hour."
|
||||
})));
|
||||
assert!(matches!(r, Err(ValidationError::Policy { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn discussing_dollars_without_salary_keyword_passes() {
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "x@y.com",
|
||||
"body": "The $20 parking pass is at the front desk."
|
||||
})));
|
||||
assert!(r.is_ok(), "non-salary $ should pass: {:?}", r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_candidate_id_fails_consistency() {
|
||||
let v = EmailValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "x@y.com",
|
||||
"body": "Hi Jane",
|
||||
"_context": {"candidate_id": "W-FAKE"}
|
||||
})));
|
||||
match r {
|
||||
Err(ValidationError::Consistency { reason }) => assert!(reason.contains("not found")),
|
||||
other => panic!("expected Consistency, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_first_name_in_body_is_warning() {
|
||||
let v = EmailValidator::new(lookup(vec![worker("W-1", "Jane Doe")]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "x@y.com",
|
||||
"body": "Hi there — confirming your interview Friday.",
|
||||
"_context": {"candidate_id": "W-1"}
|
||||
})));
|
||||
let report = r.expect("missing name should be warning, not error");
|
||||
assert_eq!(report.findings.len(), 1);
|
||||
assert_eq!(report.findings[0].severity, crate::Severity::Warning);
|
||||
assert!(report.findings[0].message.to_lowercase().contains("first name"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matching_first_name_passes_clean() {
|
||||
let v = EmailValidator::new(lookup(vec![worker("W-1", "Jane Doe")]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({
|
||||
"to": "x@y.com",
|
||||
"body": "Hi Jane — confirming your interview Friday.",
|
||||
"_context": {"candidate_id": "W-1"}
|
||||
})));
|
||||
let report = r.expect("matching name should pass");
|
||||
assert!(report.findings.is_empty(), "expected no findings, got {:?}", report.findings);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,22 +1,67 @@
|
||||
//! Fill-proposal validator.
|
||||
//! Fill-proposal validator (Phase 43 v2 — real consistency checks).
|
||||
//!
|
||||
//! PRD checks:
|
||||
//! - Schema compliance (propose_done shape matches
|
||||
//! `{fills: [{candidate_id, name}]}`)
|
||||
//! - Schema compliance (propose_done shape: `{fills: [{candidate_id, name}]}`)
|
||||
//! - Completeness (endorsed count == target_count)
|
||||
//! - Worker existence (every candidate_id present in workers_500k)
|
||||
//! - Status check (active, not_on_client_blacklist)
|
||||
//! - Worker existence (every candidate_id present in workers roster)
|
||||
//! - Status check (worker.status == "active")
|
||||
//! - Client blacklist (worker NOT in client.blacklisted_clients)
|
||||
//! - Geo/role match (worker city/state/role matches contract)
|
||||
//!
|
||||
//! Today this is a scaffold — schema check is real (it's cheap); the
|
||||
//! worker-existence / status / geo checks need a catalog lookup and
|
||||
//! land in a follow-up when the catalog query helper is wired into
|
||||
//! this crate.
|
||||
//! The contract metadata (target_count, city, state, role, client_id)
|
||||
//! travels alongside the JSON payload under a `_context` key:
|
||||
//! `{"_context": {"target_count": 2, "city": "Toledo", "state": "OH",
|
||||
//! "role": "Welder", "client_id": "CLI-00099"}, "fills": [...]}`.
|
||||
//! This keeps the Validator trait signature stable while letting the
|
||||
//! validator cross-check fills against contract truth.
|
||||
//!
|
||||
//! Worker-existence + status + geo + blacklist all share a single
|
||||
//! lookup trait (`WorkerLookup`) so the validator stays decoupled
|
||||
//! from queryd / parquet / catalogd transport details.
|
||||
|
||||
use crate::{Artifact, Report, Validator, ValidationError};
|
||||
use crate::{
|
||||
Artifact, Report, Validator, ValidationError, WorkerLookup, WorkerRecord,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct FillValidator;
|
||||
pub struct FillValidator {
|
||||
workers: Arc<dyn WorkerLookup>,
|
||||
}
|
||||
|
||||
impl FillValidator {
|
||||
pub fn new(workers: Arc<dyn WorkerLookup>) -> Self {
|
||||
Self { workers }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct FillContext {
|
||||
target_count: Option<usize>,
|
||||
city: Option<String>,
|
||||
state: Option<String>,
|
||||
role: Option<String>,
|
||||
client_id: Option<String>,
|
||||
}
|
||||
|
||||
fn extract_context(value: &serde_json::Value) -> FillContext {
|
||||
let ctx_obj = value.get("_context").and_then(|c| c.as_object());
|
||||
let ctx = match ctx_obj {
|
||||
Some(o) => o,
|
||||
None => return FillContext::default(),
|
||||
};
|
||||
FillContext {
|
||||
target_count: ctx.get("target_count").and_then(|v| v.as_u64()).map(|n| n as usize),
|
||||
city: ctx.get("city").and_then(|v| v.as_str()).map(String::from),
|
||||
state: ctx.get("state").and_then(|v| v.as_str()).map(String::from),
|
||||
role: ctx.get("role").and_then(|v| v.as_str()).map(String::from),
|
||||
client_id: ctx.get("client_id").and_then(|v| v.as_str()).map(String::from),
|
||||
}
|
||||
}
|
||||
|
||||
fn eq_ci(a: &str, b: &str) -> bool {
|
||||
a.trim().eq_ignore_ascii_case(b.trim())
|
||||
}
|
||||
|
||||
impl Validator for FillValidator {
|
||||
fn name(&self) -> &'static str { "staffing.fill" }
|
||||
@ -31,9 +76,7 @@ impl Validator for FillValidator {
|
||||
}),
|
||||
};
|
||||
|
||||
// Schema check — the only real validation shipped in this
|
||||
// scaffold. Catches the common "model emitted prose instead of
|
||||
// JSON" failure mode before the consistency checks even run.
|
||||
// ── Schema check ──
|
||||
let fills = value.get("fills").and_then(|f| f.as_array()).ok_or(
|
||||
ValidationError::Schema {
|
||||
field: "fills".into(),
|
||||
@ -55,12 +98,116 @@ impl Validator for FillValidator {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(phase-43 v2): worker-existence / status / geo checks.
|
||||
// Need a catalog query handle injected into FillValidator's
|
||||
// constructor — out of scope for the scaffold.
|
||||
let ctx = extract_context(value);
|
||||
|
||||
// ── Completeness: count match ──
|
||||
if let Some(target) = ctx.target_count {
|
||||
if fills.len() != target {
|
||||
return Err(ValidationError::Completeness {
|
||||
reason: format!(
|
||||
"endorsed count {} != target_count {target}",
|
||||
fills.len()
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ── Cross-roster checks ──
|
||||
let mut findings: Vec<crate::Finding> = vec![];
|
||||
let mut seen_ids = std::collections::HashSet::new();
|
||||
for (i, fill) in fills.iter().enumerate() {
|
||||
let candidate_id = fill.get("candidate_id").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let proposed_name = fill.get("name").and_then(|v| v.as_str()).unwrap_or("");
|
||||
|
||||
// Duplicate-ID guard inside one fill.
|
||||
if !seen_ids.insert(candidate_id.to_string()) {
|
||||
return Err(ValidationError::Consistency {
|
||||
reason: format!(
|
||||
"duplicate candidate_id {candidate_id:?} appears multiple times in fills"
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// Worker existence — the gate that catches phantom IDs the
|
||||
// model fabricates. This is the load-bearing check for
|
||||
// the 0→85% pattern.
|
||||
let worker: WorkerRecord = match self.workers.find(candidate_id) {
|
||||
Some(w) => w,
|
||||
None => return Err(ValidationError::Consistency {
|
||||
reason: format!(
|
||||
"fills[{i}].candidate_id {candidate_id:?} does not exist in worker roster"
|
||||
),
|
||||
}),
|
||||
};
|
||||
|
||||
// Status — only "active" workers can be endorsed.
|
||||
if !eq_ci(&worker.status, "active") {
|
||||
return Err(ValidationError::Consistency {
|
||||
reason: format!(
|
||||
"fills[{i}] worker {candidate_id:?} has status {:?}, expected \"active\"",
|
||||
worker.status
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// Client blacklist.
|
||||
if let Some(client) = ctx.client_id.as_deref() {
|
||||
if worker.blacklisted_clients.iter().any(|b| eq_ci(b, client)) {
|
||||
return Err(ValidationError::Policy {
|
||||
reason: format!(
|
||||
"fills[{i}] worker {candidate_id:?} blacklisted for client {client:?}"
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Geo / role match — warn-level when missing context, hard
|
||||
// fail on mismatch with explicit contract values.
|
||||
if let (Some(want_city), Some(have_city)) = (ctx.city.as_deref(), worker.city.as_deref()) {
|
||||
if !eq_ci(want_city, have_city) {
|
||||
return Err(ValidationError::Consistency {
|
||||
reason: format!(
|
||||
"fills[{i}] worker {candidate_id:?} city {have_city:?} doesn't match contract city {want_city:?}"
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
if let (Some(want_state), Some(have_state)) = (ctx.state.as_deref(), worker.state.as_deref()) {
|
||||
if !eq_ci(want_state, have_state) {
|
||||
return Err(ValidationError::Consistency {
|
||||
reason: format!(
|
||||
"fills[{i}] worker {candidate_id:?} state {have_state:?} doesn't match contract state {want_state:?}"
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
if let (Some(want_role), Some(have_role)) = (ctx.role.as_deref(), worker.role.as_deref()) {
|
||||
if !eq_ci(want_role, have_role) {
|
||||
return Err(ValidationError::Consistency {
|
||||
reason: format!(
|
||||
"fills[{i}] worker {candidate_id:?} role {have_role:?} doesn't match contract role {want_role:?}"
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Name-mismatch is a warning, not an error — recruiters
|
||||
// sometimes send updated names through the proposal layer
|
||||
// before the roster is updated.
|
||||
if !proposed_name.is_empty() && !eq_ci(proposed_name, &worker.name) {
|
||||
findings.push(crate::Finding {
|
||||
field: format!("fills[{i}].name"),
|
||||
severity: crate::Severity::Warning,
|
||||
message: format!(
|
||||
"proposed name {proposed_name:?} differs from roster name {:?} for {candidate_id:?}",
|
||||
worker.name
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Report {
|
||||
findings: vec![],
|
||||
findings,
|
||||
elapsed_ms: started.elapsed().as_millis() as u64,
|
||||
})
|
||||
}
|
||||
@ -69,35 +216,168 @@ impl Validator for FillValidator {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::InMemoryWorkerLookup;
|
||||
use serde_json::json;
|
||||
|
||||
fn lookup(records: Vec<WorkerRecord>) -> Arc<dyn WorkerLookup> {
|
||||
Arc::new(InMemoryWorkerLookup::from_records(records))
|
||||
}
|
||||
|
||||
fn worker(id: &str, name: &str, status: &str, city: &str, state: &str, role: &str) -> WorkerRecord {
|
||||
WorkerRecord {
|
||||
candidate_id: id.into(),
|
||||
name: name.into(),
|
||||
status: status.into(),
|
||||
city: Some(city.into()),
|
||||
state: Some(state.into()),
|
||||
role: Some(role.into()),
|
||||
blacklisted_clients: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_artifact_type_fails_schema() {
|
||||
let r = FillValidator.validate(&Artifact::EmailDraft(serde_json::json!({})));
|
||||
let v = FillValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::EmailDraft(json!({})));
|
||||
assert!(matches!(r, Err(ValidationError::Schema { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_fills_array_fails_schema() {
|
||||
let r = FillValidator.validate(&Artifact::FillProposal(serde_json::json!({})));
|
||||
let v = FillValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({})));
|
||||
assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "fills"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fill_without_candidate_id_fails() {
|
||||
let r = FillValidator.validate(&Artifact::FillProposal(serde_json::json!({
|
||||
"fills": [{"name": "Jane"}]
|
||||
})));
|
||||
let v = FillValidator::new(lookup(vec![]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({"fills": [{"name": "Jane"}]})));
|
||||
assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field.contains("candidate_id")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn well_formed_proposal_passes_schema() {
|
||||
let r = FillValidator.validate(&Artifact::FillProposal(serde_json::json!({
|
||||
fn well_formed_proposal_with_real_workers_passes() {
|
||||
let v = FillValidator::new(lookup(vec![
|
||||
worker("W-1", "Jane Doe", "active", "Toledo", "OH", "Welder"),
|
||||
worker("W-2", "John Smith", "active", "Toledo", "OH", "Welder"),
|
||||
]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 2, "city": "Toledo", "state": "OH", "role": "Welder"},
|
||||
"fills": [
|
||||
{"candidate_id": "W-123", "name": "Jane Doe"},
|
||||
{"candidate_id": "W-456", "name": "John Smith"}
|
||||
{"candidate_id": "W-1", "name": "Jane Doe"},
|
||||
{"candidate_id": "W-2", "name": "John Smith"}
|
||||
]
|
||||
})));
|
||||
assert!(r.is_ok(), "well-formed proposal should pass schema: {:?}", r);
|
||||
assert!(r.is_ok(), "expected pass, got {:?}", r);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn phantom_candidate_id_fails_consistency() {
|
||||
let v = FillValidator::new(lookup(vec![worker("W-1", "Jane", "active", "Toledo", "OH", "Welder")]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder"},
|
||||
"fills": [{"candidate_id": "W-FAKE-99999", "name": "Imaginary"}]
|
||||
})));
|
||||
match r {
|
||||
Err(ValidationError::Consistency { reason }) => assert!(reason.contains("does not exist")),
|
||||
other => panic!("expected Consistency error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inactive_worker_fails_consistency() {
|
||||
let v = FillValidator::new(lookup(vec![worker("W-1", "Jane", "inactive", "Toledo", "OH", "Welder")]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 1},
|
||||
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
|
||||
})));
|
||||
match r {
|
||||
Err(ValidationError::Consistency { reason }) => assert!(reason.contains("inactive")),
|
||||
other => panic!("expected Consistency error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_city_fails_consistency() {
|
||||
let v = FillValidator::new(lookup(vec![worker("W-1", "Jane", "active", "Cincinnati", "OH", "Welder")]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder"},
|
||||
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
|
||||
})));
|
||||
match r {
|
||||
Err(ValidationError::Consistency { reason }) => assert!(reason.to_lowercase().contains("city")),
|
||||
other => panic!("expected Consistency error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_role_fails_consistency() {
|
||||
let v = FillValidator::new(lookup(vec![worker("W-1", "Jane", "active", "Toledo", "OH", "Driver")]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder"},
|
||||
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
|
||||
})));
|
||||
match r {
|
||||
Err(ValidationError::Consistency { reason }) => assert!(reason.to_lowercase().contains("role")),
|
||||
other => panic!("expected Consistency error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_mismatch_fails_completeness() {
|
||||
let v = FillValidator::new(lookup(vec![
|
||||
worker("W-1", "Jane", "active", "Toledo", "OH", "Welder"),
|
||||
]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 2, "city": "Toledo", "state": "OH", "role": "Welder"},
|
||||
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
|
||||
})));
|
||||
assert!(matches!(r, Err(ValidationError::Completeness { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duplicate_candidate_id_fails_consistency() {
|
||||
let v = FillValidator::new(lookup(vec![
|
||||
worker("W-1", "Jane", "active", "Toledo", "OH", "Welder"),
|
||||
]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 2, "city": "Toledo", "state": "OH", "role": "Welder"},
|
||||
"fills": [
|
||||
{"candidate_id": "W-1", "name": "Jane"},
|
||||
{"candidate_id": "W-1", "name": "Jane"}
|
||||
]
|
||||
})));
|
||||
match r {
|
||||
Err(ValidationError::Consistency { reason }) => assert!(reason.contains("duplicate")),
|
||||
other => panic!("expected Consistency error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blacklisted_worker_fails_policy() {
|
||||
let mut w = worker("W-1", "Jane", "active", "Toledo", "OH", "Welder");
|
||||
w.blacklisted_clients = vec!["CLI-00099".into()];
|
||||
let v = FillValidator::new(lookup(vec![w]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder", "client_id": "CLI-00099"},
|
||||
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
|
||||
})));
|
||||
assert!(matches!(r, Err(ValidationError::Policy { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn name_mismatch_is_warning_not_error() {
|
||||
let v = FillValidator::new(lookup(vec![
|
||||
worker("W-1", "Jane Doe", "active", "Toledo", "OH", "Welder"),
|
||||
]));
|
||||
let r = v.validate(&Artifact::FillProposal(json!({
|
||||
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder"},
|
||||
"fills": [{"candidate_id": "W-1", "name": "Janet Doe"}]
|
||||
})));
|
||||
let report = r.expect("name mismatch should be warning, not error");
|
||||
assert_eq!(report.findings.len(), 1);
|
||||
assert_eq!(report.findings[0].severity, crate::Severity::Warning);
|
||||
assert!(report.findings[0].message.contains("differs from roster"));
|
||||
}
|
||||
}
|
||||
|
||||
81
reports/kimi/cross-lineage-bakeoff.md
Normal file
81
reports/kimi/cross-lineage-bakeoff.md
Normal file
@ -0,0 +1,81 @@
|
||||
# Cross-Lineage Auditor Bake-Off — 2026-04-27
|
||||
|
||||
Same diff (`HEAD~5..HEAD~2`, 32KB, 3 commits = the kimi-integration work)
|
||||
audited by three models from three vendor lineages. All three through
|
||||
the lakehouse gateway, all three with the same `kimi_architect` prompt
|
||||
template + grounding verification.
|
||||
|
||||
## Results
|
||||
|
||||
| | Kimi K2.6 (Moonshot) | Haiku 4.5 (Anthropic) | Opus 4.7 (Anthropic) |
|
||||
|---|---|---|---|
|
||||
| Provider | ollama_cloud | opencode/Zen | opencode/Zen |
|
||||
| Latency | 53.7s | **20.5s** | 53.6s |
|
||||
| Findings | 10 | 9 | 10 |
|
||||
| Grounded | 10/10 | 9/9 | 10/10 |
|
||||
| Severity (block/warn/info) | 0 / 9 / 1 | 0 / 5 / 4 | **3 / 5 / 2** |
|
||||
| Cost | flat-sub (Ollama Pro) | ~$0.02 | ~$0.10–0.15 |
|
||||
| Style | Architectural / migration | Boundary / resilience | Escalation / cross-file |
|
||||
|
||||
## Severity escalation pattern
|
||||
|
||||
Only Opus produced `block`-level findings. Haiku and Kimi described
|
||||
the same kind of issues as `warn`. This isn't randomness — it's
|
||||
training. Anthropic's Opus is calibrated to flag merge-stoppers more
|
||||
confidently than the lighter-weight or different-lineage models.
|
||||
|
||||
## What ONLY Opus 4.7 caught
|
||||
|
||||
- `parseFindings` rationale regex truncates on inline `**bold**`
|
||||
inside rationales — neither Haiku nor Kimi noticed
|
||||
- Cache-by-head-SHA survives `LH_AUDITOR_KIMI_MODEL` env flip
|
||||
(silently returns old findings under wrong model name)
|
||||
- Gateway/auditor timeout mismatch: kimi.rs 600s vs auditor curl 900s
|
||||
|
||||
## What ALL three caught
|
||||
|
||||
- `(ev as any).contractor` schema bypass (3/3)
|
||||
- Empty-env `Number("")` returns 0 trap (3/3)
|
||||
- `readFileSync` in async function (3/3)
|
||||
- mode.rs Rust test compile error (3/3)
|
||||
|
||||
Three-lineage consensus = high-confidence load-bearing real bug.
|
||||
|
||||
## What only Kimi K2.6 caught
|
||||
|
||||
- Schema version bump v1→v2 without explicit migration path
|
||||
- ISO timestamp precision in run_id derivation
|
||||
- Multimodal content array passed verbatim to Kimi (would 400)
|
||||
|
||||
Kimi favors architectural / API-contract concerns. Useful when the
|
||||
diff is a refactor rather than a feature.
|
||||
|
||||
## What only Haiku 4.5 caught
|
||||
|
||||
- `appendMetrics` mkdir target uses `join(path, "..")` not `dirname`
|
||||
- `KIMI_MODEL` cast in `parseFindings` not validated against type
|
||||
- Truncation of error messages in callKimi at 300 chars loses context
|
||||
|
||||
Haiku favors boundary cases — what happens when assumptions break.
|
||||
|
||||
## Cost-vs-quality verdict
|
||||
|
||||
| Diff size | Recommended model | Why |
|
||||
|---|---|---|
|
||||
| < 100k chars (normal PRs) | Haiku 4.5 | 80% of the same surface, 5x cheaper, 2.6x faster |
|
||||
| > 100k chars (refactors, multi-file) | Opus 4.7 | Cross-file ramifications + escalation that lighter models miss |
|
||||
|
||||
Auto-promotion implemented in `auditor/checks/kimi_architect.ts:74`
|
||||
via `selectModel(diffLen)`. Threshold env-overridable
|
||||
(`LH_AUDITOR_KIMI_OPUS_THRESHOLD_CHARS`, default 100000).
|
||||
|
||||
## Methodology notes
|
||||
|
||||
- Same prompt template, same grounding rules, same input bundle
|
||||
- Each call cached at `data/_auditor/kimi_verdicts/<pr>-<sha>.json`
|
||||
- Per-call metrics appended to `data/_kb/kimi_audits.jsonl`
|
||||
- Wall-clock measured from request POST to response parse
|
||||
- Cost computed as `prompt_tokens * input_rate + completion_tokens * output_rate`
|
||||
- `usage.prompt_tokens` underreports through opencode proxy path
|
||||
(verified ~7k input tokens vs reported 5); cost figures use
|
||||
observed prompt size rather than reported.
|
||||
@ -122,9 +122,18 @@ function synthesizeSft(
|
||||
case "observer_reviews":
|
||||
instruction = `Observer-review the latest attempt on '${ev.source_files?.[0] ?? "<file>"}'. Verdict: accept | reject | cycle.`;
|
||||
break;
|
||||
case "contract_analyses":
|
||||
instruction = `Analyze contractor '${(ev as any).contractor ?? "<contractor>"}' for permit '${ev.task_id.replace(/^permit:/, "")}'. Recommend with risk markers.`;
|
||||
case "contract_analyses": {
|
||||
// Read contractor from the typed metadata bucket (populated in
|
||||
// transforms.ts for contract_analyses rows). Pre-2026-04-27 this
|
||||
// used `(ev as any).contractor` and silently emitted "<contractor>"
|
||||
// for every row because EvidenceRecord didn't carry the field.
|
||||
const contractor = typeof ev.metadata?.contractor === "string" ? ev.metadata.contractor : null;
|
||||
const permit = ev.task_id.replace(/^permit:/, "");
|
||||
instruction = contractor
|
||||
? `Analyze contractor '${contractor}' for permit '${permit}'. Recommend with risk markers.`
|
||||
: `Analyze permit '${permit}'. Recommend with risk markers.`;
|
||||
break;
|
||||
}
|
||||
case "outcomes":
|
||||
instruction = `Run scenario; report per-event outcome with citations.`;
|
||||
break;
|
||||
|
||||
@ -451,7 +451,7 @@ export function buildDrift(current: RunSummary, prior: RunSummary | null): Drift
|
||||
delta_accepted: cur.accepted,
|
||||
delta_quarantined: cur.quarantined,
|
||||
pct_change_out: null,
|
||||
input_hash_match: false,
|
||||
input_hash_match: null, // no prior stage to compare
|
||||
output_hash_match: false,
|
||||
deterministic_violation: false,
|
||||
notes: ["stage not present in prior run"],
|
||||
@ -461,12 +461,12 @@ export function buildDrift(current: RunSummary, prior: RunSummary | null): Drift
|
||||
}
|
||||
const pct = pctChange(pri.records_out, cur.records_out);
|
||||
const out_match = pri.output_hash === cur.output_hash;
|
||||
const inp_match = (current.stages.find(s => s.stage === cur.stage)?.output_hash ?? "")
|
||||
!== "" /* placeholder */;
|
||||
// We have output_hash on stage summaries but not input_hash —
|
||||
// input_hash lives on the full StageReceipt, which we can re-read
|
||||
// from the run dir if needed. For simplicity, drift compares the
|
||||
// OUTPUT hashes (what really changed).
|
||||
// input_hash is NOT materialized into stage summaries (lives on the
|
||||
// per-stage StageReceipt files on disk). We don't load them here, so
|
||||
// we honestly report null. Schema v2 makes this explicit; v1 returned
|
||||
// `true` unconditionally which made deterministic_violation always
|
||||
// false even when it should have alerted. Cross-run determinism
|
||||
// enforcement is its own pass — see ./scripts/distill audit-full.
|
||||
const notes: string[] = [];
|
||||
if (pct !== null && Math.abs(pct) > DRIFT_THRESHOLD_PCT) {
|
||||
const dir = pct > 0 ? "spike" : "drop";
|
||||
@ -492,9 +492,9 @@ export function buildDrift(current: RunSummary, prior: RunSummary | null): Drift
|
||||
delta_accepted: cur.accepted - pri.accepted,
|
||||
delta_quarantined: cur.quarantined - pri.quarantined,
|
||||
pct_change_out: pct,
|
||||
input_hash_match: true, // simplified — see comment above
|
||||
input_hash_match: null, // not computed at this layer; see comment above
|
||||
output_hash_match: out_match,
|
||||
deterministic_violation: false, // requires input_hash match, see future tightening
|
||||
deterministic_violation: false, // requires input_hash match — null means "unknown", not "verified"
|
||||
notes,
|
||||
});
|
||||
}
|
||||
|
||||
@ -375,7 +375,12 @@ export async function replay(opts: ReplayRequest, root = DEFAULT_ROOT): Promise<
|
||||
}
|
||||
}
|
||||
|
||||
const recorded_run_id = `replay:${task_hash.slice(0, 16)}:${Date.now()}`;
|
||||
// Stable derivation from task_hash + recorded_at (already an ISO
|
||||
// timestamp captured at start of the call). Avoids a second wall-clock
|
||||
// read and makes run_id reproducible given a fixed recorded_at — useful
|
||||
// for fixture-driven tests + acceptance gates. Replaces Date.now()-based
|
||||
// id post-Kimi-audit 2026-04-27.
|
||||
const recorded_run_id = `replay:${task_hash.slice(0, 16)}:${(await canonicalSha256(recorded_at)).slice(0, 12)}`;
|
||||
const result: ReplayResult = {
|
||||
input_task: opts.task,
|
||||
task_hash,
|
||||
|
||||
@ -86,6 +86,17 @@ function gitDirty(root: string): boolean {
|
||||
return r.status === 0 && r.stdout.trim().length > 0;
|
||||
}
|
||||
|
||||
// Composite dedup key — `sig_hash:scorer_version`. Keying on sig_hash
|
||||
// alone made scorer-rule bumps invisible: a bumped SCORER_VERSION
|
||||
// produced different scoring categories, but pre-existing rows on disk
|
||||
// (with the OLD version) still matched the new sig_hash and the new
|
||||
// scoring was silently skipped. Compositing version forces re-scoring
|
||||
// when the version changes. Caller tags `scorer_version` on the
|
||||
// ScoredRun row, which we read alongside sig_hash.
|
||||
function dedupKey(sig_hash: string, scorer_version: string): string {
|
||||
return `${sig_hash}:${scorer_version}`;
|
||||
}
|
||||
|
||||
function loadSeenHashes(out_path: string): Set<string> {
|
||||
const seen = new Set<string>();
|
||||
if (!existsSync(out_path)) return seen;
|
||||
@ -93,7 +104,9 @@ function loadSeenHashes(out_path: string): Set<string> {
|
||||
if (!line) continue;
|
||||
try {
|
||||
const row = JSON.parse(line);
|
||||
if (row?.provenance?.sig_hash) seen.add(row.provenance.sig_hash);
|
||||
const sh = row?.provenance?.sig_hash;
|
||||
const sv = row?.scorer_version;
|
||||
if (sh && sv) seen.add(dedupKey(sh, sv));
|
||||
} catch { /* malformed — ignore */ }
|
||||
}
|
||||
return seen;
|
||||
@ -156,11 +169,12 @@ async function processEvidenceFile(
|
||||
}
|
||||
|
||||
const scored = await buildScoredRun(ev.value as EvidenceRecord, out_relpath, i, opts.recorded_at);
|
||||
if (seen.has(scored.provenance.sig_hash)) {
|
||||
const key = dedupKey(scored.provenance.sig_hash, scored.scorer_version);
|
||||
if (seen.has(key)) {
|
||||
result.rows_deduped++;
|
||||
continue;
|
||||
}
|
||||
seen.add(scored.provenance.sig_hash);
|
||||
seen.add(key);
|
||||
|
||||
const sv = validateScoredRun(scored);
|
||||
if (!sv.valid) {
|
||||
|
||||
@ -27,7 +27,11 @@ import type { ScoreCategory, ScoredRun } from "../../auditor/schemas/distillatio
|
||||
import { SCORED_RUN_SCHEMA_VERSION } from "../../auditor/schemas/distillation/scored_run";
|
||||
import { canonicalSha256 } from "../../auditor/schemas/distillation/types";
|
||||
|
||||
export const SCORER_VERSION = process.env.LH_SCORER_VERSION ?? "v1.0.0";
|
||||
// Hardcoded — the deterministic-output contract requires this. Bump the
|
||||
// literal in the same commit as any scoring-rule change so the version
|
||||
// stamp moves atomically with logic. Env override removed 2026-04-27
|
||||
// after Kimi audit flagged identical-input-different-version drift.
|
||||
export const SCORER_VERSION = "v1.0.0";
|
||||
|
||||
export interface ScoreOutput {
|
||||
category: ScoreCategory;
|
||||
|
||||
@ -100,6 +100,9 @@ export const TRANSFORMS: TransformDef[] = [
|
||||
cost_usd: typeof row.cost === "number" ? row.cost / 1_000_000 : undefined,
|
||||
latency_ms: row.duration_ms,
|
||||
text: row.analysis,
|
||||
metadata: typeof row.contractor === "string" && row.contractor.length > 0
|
||||
? { contractor: row.contractor }
|
||||
: undefined,
|
||||
}),
|
||||
},
|
||||
{
|
||||
@ -178,7 +181,11 @@ export const TRANSFORMS: TransformDef[] = [
|
||||
// even though the text field is empty.
|
||||
source_file_relpath: "data/_kb/auto_apply.jsonl",
|
||||
transform: ({ row, line_offset, source_file_relpath, recorded_at, sig_hash }) => {
|
||||
const ts: string = row.ts ?? new Date().toISOString();
|
||||
// Deterministic fallback: use the source-file's recorded_at when
|
||||
// the row itself lacks a ts. Wall-clock (new Date()) leaked here
|
||||
// pre-2026-04-27 — broke bit-identical reproducibility on rows
|
||||
// that historically wrote without a ts field.
|
||||
const ts: string = row.ts ?? recorded_at;
|
||||
const action = String(row.action ?? "unknown");
|
||||
const success = action === "committed";
|
||||
const reverted = action.includes("reverted");
|
||||
|
||||
@ -113,10 +113,17 @@ const TARGET_FILES: string[] = process.env.LH_SCRUM_FILES
|
||||
// strategy. Kimi K2.6, Gemini, free-tier, local fallback, etc. were
|
||||
// removed — they're available as routable tools later (mode router)
|
||||
// but not as automatic fallbacks.
|
||||
const LADDER: Array<{ provider: "ollama" | "ollama_cloud" | "openrouter"; model: string; note: string }> = [
|
||||
const LADDER: Array<{ provider: "ollama" | "ollama_cloud" | "openrouter" | "kimi"; model: string; note: string }> = [
|
||||
{ provider: "openrouter", model: "x-ai/grok-4.1-fast", note: "PRIMARY · Grok 4.1 fast · $0.20/$0.50 · 2M ctx · single-model strategy" },
|
||||
{ provider: "openrouter", model: "deepseek/deepseek-v4-flash", note: "FALLBACK on provider error · DeepSeek V4 flash · $0.14/$0.28 · 1M ctx" },
|
||||
{ provider: "openrouter", model: "qwen/qwen3-235b-a22b-2507", note: "LAST FALLBACK on provider error · Qwen3 235B · $0.07/$0.10 · 262K" },
|
||||
// kimi/kimi-for-coding (api.kimi.com) is wired through the gateway
|
||||
// but NOT in the auto-ladder. The endpoint is gated to specific
|
||||
// approved coding-agent User-Agents (Claude Code, Kimi CLI, Roo Code,
|
||||
// Kilo Code). Spoofing a User-Agent works technically but Moonshot's
|
||||
// TOS marks it as grounds for membership suspension. Use Kimi via a
|
||||
// sanctioned client (Claude Code subagent / Kimi CLI), not via this
|
||||
// unattended scrum loop.
|
||||
// Dropped from the ladder after 2026-04-24 probe:
|
||||
// - kimi-k2.6 — not available on current tier (empty response)
|
||||
// - devstral-2:123b — displaced by qwen3-coder:480b (better coding specialist)
|
||||
@ -738,7 +745,7 @@ async function lookupSignalClass(filePath: string): Promise<string | null> {
|
||||
}
|
||||
|
||||
async function chat(opts: {
|
||||
provider: "ollama" | "ollama_cloud",
|
||||
provider: "ollama" | "ollama_cloud" | "openrouter" | "kimi",
|
||||
model: string,
|
||||
prompt: string,
|
||||
max_tokens?: number,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user