Merge PR #11: distillation v1.0.0 + Phase 42-45 + auditor cross-lineage + staffing cutover

Closes the long-running scrum/auto-apply-19814 branch.

118 commits including:
- Distillation v1.0.0 substrate (tag distillation-v1.0.0 / e7636f2) — 145 tests, 22/22 acceptance, 16/16 audit-full
- Auditor rebuild on substrate (88s vs 25min, 50x fewer cloud calls)
- Phase 42-45 closure (validator crate + /v1/validate + /v1/iterate + /v1/health + /doc_drift/scan + Phase 44 /v1/chat migration)
- Auditor cross-lineage fabric (Kimi K2.6 / Haiku 4.5 / Opus 4.7 auto-promotion + per-PR cap with auto-reset on push)
- 5-provider routing (added opencode + kimi-direct adapters)
- Mode runner with composed-corpus downgrade gate (codereview_isolation default; composed lost 5/5 on grok-4.1-fast)
- Staffing cutover decisions A/C/D + B safe views — workers_500k_v9 corpus rebuild deferred to background job

Verified before merge:
- audit-full 16/16 required pass
- cargo check -p validator -p gateway clean
- All kimi_architect BLOCK findings dismissed as confabulation, logged in data/_kb/human_overrides.jsonl
- Kimi forensic HOLD on v1.0.0 verified manually: 2/8 false + 6/8 latent guarantees that do not fire under prod data
This commit is contained in:
profit 2026-04-27 15:55:22 +00:00
commit ed57eda1d8
195 changed files with 34614 additions and 615 deletions

View File

@ -0,0 +1,42 @@
# Real Archon workflow on the Lakehouse repo, fully via our gateway.
# Three Pi nodes, each fires LLM → /v1/chat/completions → OpenRouter,
# every call lands a Langfuse trace + observer event.
#
# Read-only (allowed_tools: [read]). Don't pass --branch / leave
# --no-worktree at runtime so Archon doesn't try to create a worktree.
name: lakehouse-architect-review
description: 'Pi reviews Lakehouse architecture in 3 turns through our gateway.'
provider: pi
model: openrouter/x-ai/grok-4.1-fast
nodes:
- id: shape
prompt: |
Read these files and answer in 3 short bullets describing the
architectural shape of Lakehouse:
- /home/profit/lakehouse/Cargo.toml
- /home/profit/lakehouse/lakehouse.toml
- /home/profit/lakehouse/docs/MODE_RUNNER_TUNING_PLAN.md
Be terse. No preamble.
allowed_tools: ["read"]
effort: low
idle_timeout: 90000
- id: weakness
prompt: |
Read /home/profit/lakehouse/crates/gateway/src/v1/mod.rs and
identify ONE real weakness or risk. Cite file:line. One paragraph.
allowed_tools: ["read"]
effort: low
idle_timeout: 90000
depends_on: [shape]
- id: improvement
prompt: |
Based on the prior weakness ($weakness.output), propose ONE
surgical improvement (≤6 lines of Rust). Show the patch as
`old_string` and `new_string` in markdown code blocks.
allowed_tools: []
effort: low
idle_timeout: 90000
depends_on: [weakness]

View File

@ -7,6 +7,14 @@
"LAKEHOUSE_URL": "http://localhost:3100", "LAKEHOUSE_URL": "http://localhost:3100",
"MCP_TRANSPORT": "stdio" "MCP_TRANSPORT": "stdio"
} }
},
"gitea": {
"command": "bunx",
"args": ["gitea-mcp"],
"env": {
"GITEA_HOST": "https://git.agentview.dev",
"GITEA_ACCESS_TOKEN": "SET_ME_FROM_GITEA_UI_USER_SETTINGS_APPLICATIONS"
}
} }
} }
} }

29
Cargo.lock generated
View File

@ -4086,11 +4086,14 @@ dependencies = [
"shared", "shared",
"storaged", "storaged",
"tokio", "tokio",
"toml",
"tonic", "tonic",
"tower-http", "tower-http",
"tracing", "tracing",
"tracing-opentelemetry", "tracing-opentelemetry",
"tracing-subscriber", "tracing-subscriber",
"truth",
"validator",
"vectord", "vectord",
] ]
@ -4679,6 +4682,7 @@ dependencies = [
"chrono", "chrono",
"croner", "croner",
"csv", "csv",
"journald",
"lopdf", "lopdf",
"mysql_async", "mysql_async",
"object_store", "object_store",
@ -6896,6 +6900,7 @@ dependencies = [
"storaged", "storaged",
"tokio", "tokio",
"tracing", "tracing",
"truth",
"url", "url",
] ]
@ -8727,6 +8732,17 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "truth"
version = "0.1.0"
dependencies = [
"serde",
"serde_json",
"tokio",
"toml",
"tracing",
]
[[package]] [[package]]
name = "try-lock" name = "try-lock"
version = "0.2.5" version = "0.2.5"
@ -8893,6 +8909,19 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "validator"
version = "0.1.0"
dependencies = [
"arrow 55.2.0",
"parquet 55.2.0",
"serde",
"serde_json",
"thiserror 2.0.18",
"tokio",
"tracing",
]
[[package]] [[package]]
name = "valuable" name = "valuable"
version = "0.1.1" version = "0.1.1"

View File

@ -14,6 +14,8 @@ members = [
"crates/ui", "crates/ui",
"crates/lance-bench", "crates/lance-bench",
"crates/vectord-lance", "crates/vectord-lance",
"crates/truth",
"crates/validator",
] ]
[workspace.dependencies] [workspace.dependencies]

View File

@ -23,6 +23,7 @@ import { runStaticCheck } from "./checks/static.ts";
import { runDynamicCheck } from "./checks/dynamic.ts"; import { runDynamicCheck } from "./checks/dynamic.ts";
import { runInferenceCheck } from "./checks/inference.ts"; import { runInferenceCheck } from "./checks/inference.ts";
import { runKbCheck } from "./checks/kb_query.ts"; import { runKbCheck } from "./checks/kb_query.ts";
import { runKimiArchitectCheck } from "./checks/kimi_architect.ts";
const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts"; const VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/verdicts";
// Playbook for audit findings — one row per block/warn finding from a // Playbook for audit findings — one row per block/warn finding from a
@ -67,6 +68,29 @@ export async function auditPr(pr: PrSnapshot, opts: AuditOptions = {}): Promise<
...kbFindings, ...kbFindings,
]; ];
// Kimi-architect second-pass review. Off by default; enabled with
// LH_AUDITOR_KIMI=1. Sequential (not in the parallel block above)
// because it consumes the prior findings as context — Kimi sees what
// deepseek already flagged and is asked "what did everyone miss?"
// Failure-isolated by design: any error returns a single info-level
// skip finding so the existing audit pipeline never blocks on Kimi.
if (process.env.LH_AUDITOR_KIMI === "1") {
try {
const kimiFindings = await runKimiArchitectCheck(diff, allFindings, {
pr_number: pr.number,
head_sha: pr.head_sha,
});
allFindings.push(...kimiFindings);
} catch (e) {
allFindings.push({
check: "kimi_architect",
severity: "info",
summary: `kimi_architect outer error — ${(e as Error).message.slice(0, 160)}`,
evidence: [(e as Error).stack?.slice(0, 360) ?? ""],
});
}
}
const duration_ms = Date.now() - t0; const duration_ms = Date.now() - t0;
const metrics = { const metrics = {
audit_duration_ms: duration_ms, audit_duration_ms: duration_ms,
@ -184,7 +208,7 @@ function formatReviewBody(v: Verdict): string {
lines.push(""); lines.push("");
// Per-check sections, only if the check produced findings. // Per-check sections, only if the check produced findings.
const checkOrder = ["static", "dynamic", "inference", "kb_query"] as const; const checkOrder = ["static", "dynamic", "inference", "kb_query", "kimi_architect"] as const;
for (const check of checkOrder) { for (const check of checkOrder) {
const fs = byCheck[check] ?? []; const fs = byCheck[check] ?? [];
if (fs.length === 0) continue; if (fs.length === 0) continue;
@ -217,6 +241,6 @@ function formatReviewBody(v: Verdict): string {
return lines.join("\n"); return lines.join("\n");
} }
function stubFinding(check: "dynamic" | "inference", why: string): Finding[] { function stubFinding(check: "dynamic" | "inference" | "kimi_architect", why: string): Finding[] {
return [{ check, severity: "info", summary: `${check} check skipped — ${why}`, evidence: [why] }]; return [{ check, severity: "info", summary: `${check} check skipped — ${why}`, evidence: [why] }];
} }

View File

@ -18,36 +18,37 @@ import { readFile, mkdir, appendFile } from "node:fs/promises";
import { extractFacts } from "../fact_extractor.ts"; import { extractFacts } from "../fact_extractor.ts";
const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100"; const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
const MODEL = process.env.LH_AUDITOR_REVIEW_MODEL ?? "gpt-oss:120b"; // Rebuild 2026-04-26: route claim verification through /v1/mode/execute
// Tie-breaker for claims where the N=3 consensus produces a 1-1-1 // (task_class=pr_audit) so we get pathway memory + lakehouse_answers_v1
// split (genuinely borderline). Different architecture from the // + JSON-shaped framing molded into ONE prompt. The hand-rolled
// primary reviewer (gpt-oss) so the tie-break isn't correlated with // systemMsg/userMsg path was reinventing the mode runner badly.
// the original disagreement. qwen3-coder:480b is a newer coding //
// specialist at 480B params, well-suited to PR-diff claim verification // 2026-04-27 update: original default kimi-k2:1t hit a sustained
// and distinct in training lineage from gpt-oss. // upstream outage on Ollama Cloud (consistent 500 ISE across hours of
const TIEBREAKER_MODEL = process.env.LH_AUDITOR_TIEBREAKER_MODEL ?? "qwen3-coder:480b"; // retries — verified with trivial 8-token probes). Swapped default to
// deepseek-v3.1:671b which is proven working end-to-end through the
// pr_audit mode runner during Phase 5 distillation acceptance testing.
// kimi-k2:1t can be re-selected via LH_AUDITOR_REVIEW_MODEL env when
// the upstream returns. Tie-breaker stays grok-4.1-fast (different
// vendor lineage so consensus + tie-break won't fail-correlate).
const MODEL = process.env.LH_AUDITOR_REVIEW_MODEL ?? "deepseek-v3.1:671b";
const TIEBREAKER_MODEL = process.env.LH_AUDITOR_TIEBREAKER_MODEL ?? "x-ai/grok-4.1-fast";
const N_CONSENSUS = Number(process.env.LH_AUDITOR_CONSENSUS_N ?? 3); const N_CONSENSUS = Number(process.env.LH_AUDITOR_CONSENSUS_N ?? 3);
const AUDIT_DISCREPANCIES_JSONL = "/home/profit/lakehouse/data/_kb/audit_discrepancies.jsonl"; const AUDIT_DISCREPANCIES_JSONL = "/home/profit/lakehouse/data/_kb/audit_discrepancies.jsonl";
// 40KB comfortably fits gpt-oss:120b's context. PR #1 (~39KB) was // 40KB comfortably fits the consensus models' context windows
// previously truncated at 15KB causing the reviewer to miss later // (deepseek-v3.1 64K, gpt-oss-120b 128K). When the raw PR diff
// files (gitea.ts, policy.ts) and flag "no Gitea client present" as a // exceeds this, we truncate and signal it via curationNote — the
// block finding when the file was simply outside the truncation window. // pr_audit mode runner's matrix retrieval (lakehouse_answers_v1 +
// // arch + symbols) supplies the cross-PR context that tree-split
// Above this threshold we curate via tree-split rather than truncate, // used to synthesize from scratch. Tree-split itself was retired
// following the scrum_master pattern: shard the diff, summarize each // 2026-04-27 (see commit deleting treeSplitDiff/callCloud/SHARD_*).
// shard against the claim-verification task, merge into a compact
// scratchpad, then ask the cloud to verify claims against the
// scratchpad. This gives the cloud full-PR fidelity without bursting
// its context window (observed failure mode: empty response or
// unparseable output when prompt exceeds model's comfortable range).
const MAX_DIFF_CHARS = 40000; const MAX_DIFF_CHARS = 40000;
// Tree-split kicks in above this. 30KB is below MAX_DIFF_CHARS so we
// curate BEFORE truncation would happen — never lose signal to a hard
// cut. Shard size is chosen so ~10 shards cover PR #8-size diffs in a
// reasonable round-trip budget.
const CURATION_THRESHOLD = 30000;
const DIFF_SHARD_SIZE = 4500;
const CALL_TIMEOUT_MS = 120_000; const CALL_TIMEOUT_MS = 120_000;
// Mode runner can take longer than a raw /v1/chat call because it does
// pathway-fingerprint lookup + matrix retrieval + relevance filter
// before the LLM call. Budget extra time so we don't trip on a slow
// answers-corpus search.
const MODE_RUNNER_TIMEOUT_MS = 240_000;
const REPO_ROOT = "/home/profit/lakehouse"; const REPO_ROOT = "/home/profit/lakehouse";
export interface InferenceContext { export interface InferenceContext {
@ -86,26 +87,23 @@ export async function runInferenceCheck(
}]; }];
} }
// Diff source for the cloud prompt — either the raw diff (small // 2026-04-27 architecture simplification: dropped the tree-split
// enough to fit), or a tree-split scratchpad (curation layer). We // scratchpad layer. Rationale: the mode runner's pr_audit pipeline
// prefer curation to truncation: truncation silently drops files // pulls from lakehouse_answers_v1 (gold-standard prior audits) +
// past the window; curation summarizes them so the cloud still sees // lakehouse_arch_v1 + lakehouse_symbols_v1 via matrix retrieval. That
// what changed, just densified. // corpus IS the cross-PR context the tree-split was synthesizing
let diffForPrompt: string; // from scratch on every audit run. With the distillation substrate
let curationNote = ""; // shipped (commits 27b1d27..1b433a9), per-shard fact extraction is
if (diff.length > CURATION_THRESHOLD) { // redundant — and gpt-oss:120b at 168 calls/audit was the dominant
const ts = await treeSplitDiff(diff, verifiable); // cost. Now: truncate diff to MAX_DIFF_CHARS, hand straight to the
diffForPrompt = ts.scratchpad; // mode runner, let retrieval supply context. ONE strong-model call
curationNote = ` (curated: ${diff.length} chars → ${ts.shards} shards → scratchpad ${ts.scratchpad.length} chars)`; // per consensus rep × N=3 reps = 3 calls total per audit.
} else { const truncated = diff.length > MAX_DIFF_CHARS
diffForPrompt = diff; ? diff.slice(0, MAX_DIFF_CHARS) + `\n...[${diff.length - MAX_DIFF_CHARS} more chars truncated — the pr_audit mode runner has matrix retrieval against lakehouse_answers_v1 + arch + symbols for cross-PR context]`
} : diff;
// Belt-and-suspenders truncation — even a tree-split scratchpad const curationNote = diff.length > MAX_DIFF_CHARS
// shouldn't exceed MAX_DIFF_CHARS in practice, but guard anyway so ? ` (truncated ${diff.length}${MAX_DIFF_CHARS} chars; matrix retrieval supplies cross-PR context)`
// pathological inputs can't burst the prompt. : "";
const truncated = diffForPrompt.length > MAX_DIFF_CHARS
? diffForPrompt.slice(0, MAX_DIFF_CHARS) + `\n...[${diffForPrompt.length - MAX_DIFF_CHARS} more chars truncated]`
: diffForPrompt;
// Build the reviewer prompt in the same shape as run_codereview's // Build the reviewer prompt in the same shape as run_codereview's
// review stage (llm_team_ui.py:10950), adapted for claim verification: // review stage (llm_team_ui.py:10950), adapted for claim verification:
@ -114,79 +112,20 @@ export async function runInferenceCheck(
// "Review: bugs/security/perf/style/edge. Provide corrected code." // "Review: bugs/security/perf/style/edge. Provide corrected code."
// We add: claim list upfront + ask for structured JSON verdict. // We add: claim list upfront + ask for structured JSON verdict.
// //
// When the diff was curated (tree-split scratchpad), we add an // Curation flag is now just a truncation flag — when the diff was
// explicit anti-false-positive instruction: the scratchpad is a // cut, tell the reviewer it didn't see the full picture so it doesn't
// distillation, not the full source, so absence-from-scratchpad is // confidently mark a claim NOT BACKED based on absence in the
// NOT evidence of absence-from-diff. Mirrors the fix we made in // (potentially incomplete) input.
// scrum_master's review prompt for the same class of error.
const isCurated = curationNote.length > 0; const isCurated = curationNote.length > 0;
const curationGuard = isCurated const prNumber = ctx?.pr_number ?? 0;
? [
"",
"CRITICAL: the 'Diff' below is a curated multi-shard scratchpad,",
"NOT the full raw diff. The scratchpad distills each shard down",
"to facts useful for claim verification and drops the rest.",
"DO NOT flag a function/field/feature as 'missing' or 'not",
"implemented' based solely on its absence from the scratchpad —",
"absence in a distillation is NOT evidence of absence in the",
"actual diff. Only judge a claim NOT BACKED when the scratchpad",
"DIRECTLY contradicts it (e.g. scratchpad shows the function was",
"added empty, or shows the claimed code path is a stub).",
"Skip the unflagged_gaps section entirely when operating on a",
"curated scratchpad — you can't reliably detect gaps from a",
"distillation, and false positives there are worse than misses.",
].join("\n")
: "";
const systemMsg = [
"You review pull-request diffs against the author's own ship-claims.",
"For each claim, decide: is it backed by actual code in the diff, or is",
"it placeholder / aspirational / unwired?",
"",
"A claim is BACKED when the diff contains a real code path that delivers",
"the claimed behavior. A claim is NOT BACKED when:",
" - the claim asserts functionality but the diff only adds types/fields",
" with no consumer",
" - the claim mentions tests but no test function was added",
" - the claim claims integration but the integration point is a stub",
" - the diff contains unimplemented!() / todo!() / TODO comments",
" - the claim says 'works end-to-end' but the diff has no end-to-end test",
curationGuard,
"",
"Respond with strict JSON only. No prose before or after. Shape:",
"{",
' "claim_verdicts": [',
' {"claim_idx": 0, "backed": false, "evidence": "short reason"}',
" ],",
' "unflagged_gaps": [',
' {"location": "file:line", "summary": "short description"}',
" ]",
"}",
].join("\n");
const userMsg = [ // N=3 consensus — fire the mode runner three times in parallel.
`Ship-claims the author made (numbered 0..N-1):`, // Each /v1/mode/execute call composes pathway memory + answers corpus
verifiable.map((c, i) => ` ${i}. [${c.strength}] "${c.text}" at ${c.location}`).join("\n"), // + JSON-shaped pr_audit framing internally, so the auditor's only
"", // job here is to vote-aggregate. Wall-clock ~= single call.
`Diff:`,
"```",
truncated,
"```",
"",
`For each numbered claim above, emit a claim_verdicts entry. For gaps the`,
`author DIDN'T claim but that look like placeholder code, emit unflagged_gaps.`,
`Strict JSON only, matching the shape described. No prose outside JSON.`,
].join("\n");
// N=3 consensus — run the primary reviewer in parallel, collect
// all three parsed responses, majority-vote per claim. Parallel
// (Promise.all) because each call is ~20-30s and they're independent;
// wall-clock stays ~same as single call, cost 3x tokens. Empirical
// justification: in 3-run determinism tests, 7/8 findings were
// stable but 1 flipped across runs — majority vote stabilizes the
// flipping class without losing the stable signal.
const primaryRuns = await Promise.all( const primaryRuns = await Promise.all(
Array.from({ length: N_CONSENSUS }, () => Array.from({ length: N_CONSENSUS }, () =>
runCloudInference(systemMsg, userMsg, MODEL)), runModeRunnerInference(truncated, verifiable, prNumber, isCurated, MODEL)),
); );
const parsedRuns = primaryRuns.filter(r => r.parsed !== null); const parsedRuns = primaryRuns.filter(r => r.parsed !== null);
@ -209,9 +148,19 @@ export async function runInferenceCheck(
interface Votes { trues: number; falses: number; evidences: string[] } interface Votes { trues: number; falses: number; evidences: string[] }
const votesByClaim = new Map<number, Votes>(); const votesByClaim = new Map<number, Votes>();
const unflaggedByRun: any[][] = []; const unflaggedByRun: any[][] = [];
let totalTokens = 0; // The N=3 consensus calls run via Promise.all — wall-clock is
// bounded by the SLOWEST call, not the sum. Pre-2026-04-27 we
// summed and reported "Xms total" which double/triple-counted
// (Opus self-audit caught it). Use max for accurate wall-clock.
let maxLatencyMs = 0;
let totalEnrichedChars = 0;
let bugFingerprintsSeen = 0;
let matrixKeptSeen = 0;
for (const run of parsedRuns) { for (const run of parsedRuns) {
totalTokens += run.tokens; maxLatencyMs = Math.max(maxLatencyMs, run.latency_ms ?? 0);
totalEnrichedChars += run.enriched_chars ?? 0;
bugFingerprintsSeen = Math.max(bugFingerprintsSeen, run.bug_fingerprints ?? 0);
matrixKeptSeen = Math.max(matrixKeptSeen, run.matrix_kept ?? 0);
unflaggedByRun.push(Array.isArray(run.parsed?.unflagged_gaps) ? run.parsed.unflagged_gaps : []); unflaggedByRun.push(Array.isArray(run.parsed?.unflagged_gaps) ? run.parsed.unflagged_gaps : []);
for (const v of run.parsed?.claim_verdicts ?? []) { for (const v of run.parsed?.claim_verdicts ?? []) {
const idx = Number(v?.claim_idx); const idx = Number(v?.claim_idx);
@ -233,10 +182,11 @@ export async function runInferenceCheck(
findings.push({ findings.push({
check: "inference", check: "inference",
severity: "info", severity: "info",
summary: `cloud review completed (model=${MODEL}, consensus=${parsedRuns.length}/${N_CONSENSUS}, tokens=${totalTokens})${curationNote}`, summary: `pr_audit mode runner completed (model=${MODEL}, consensus=${parsedRuns.length}/${N_CONSENSUS}, ${maxLatencyMs}ms wall-clock)${curationNote}`,
evidence: [ evidence: [
`claims voted: ${votesByClaim.size}`, `claims voted: ${votesByClaim.size}`,
`parsed runs: ${parsedRuns.length} / ${N_CONSENSUS}`, `parsed runs: ${parsedRuns.length} / ${N_CONSENSUS}`,
`enrichment: ${bugFingerprintsSeen} bug fingerprints, ${matrixKeptSeen} answers-corpus chunks, prompt avg ${Math.round(totalEnrichedChars / Math.max(parsedRuns.length, 1))} chars`,
], ],
}); });
@ -266,8 +216,9 @@ export async function runInferenceCheck(
notBacked = false; notBacked = false;
resolution = "majority_backed"; resolution = "majority_backed";
} else { } else {
// Tie. Run tie-breaker with a different-architecture model. // Tie. Run tie-breaker with a different-architecture model
const tb = await runCloudInference(systemMsg, userMsg, TIEBREAKER_MODEL); // through the same mode runner so framing/enrichment match.
const tb = await runModeRunnerInference(truncated, verifiable, prNumber, isCurated, TIEBREAKER_MODEL);
if (tb.parsed) { if (tb.parsed) {
const tv = (tb.parsed.claim_verdicts ?? []).find((v: any) => Number(v?.claim_idx) === idx); const tv = (tb.parsed.claim_verdicts ?? []).find((v: any) => Number(v?.claim_idx) === idx);
if (tv?.backed === false) { if (tv?.backed === false) {
@ -335,9 +286,13 @@ export async function runInferenceCheck(
// don't exit before extraction lands; the systemd poller has plenty // don't exit before extraction lands; the systemd poller has plenty
// of headroom (90s cycle vs ~15s extraction). A failure inside // of headroom (90s cycle vs ~15s extraction). A failure inside
// extractAndPersistFacts is caught + logged but never throws. // extractAndPersistFacts is caught + logged but never throws.
// Post-2026-04-27: extraction now runs against the truncated diff
// (no scratchpad to extract from since tree-split was retired).
// Fact extraction is still useful for surfacing entities/symbols
// into audit_facts.jsonl even from truncated input.
if (isCurated && ctx && process.env.LH_AUDITOR_SKIP_EXTRACT !== "1") { if (isCurated && ctx && process.env.LH_AUDITOR_SKIP_EXTRACT !== "1") {
try { try {
await extractAndPersistFacts(diffForPrompt, ctx); await extractAndPersistFacts(truncated, ctx);
} catch (e) { } catch (e) {
console.error(`[inference] fact extraction failed: ${(e as Error).message}`); console.error(`[inference] fact extraction failed: ${(e as Error).message}`);
} }
@ -394,60 +349,106 @@ export async function runInferenceCheck(
return findings; return findings;
} }
// Single cloud call — the consensus loop calls this N times in // Single mode-runner call — consensus + tie-breaker dispatch through
// parallel. Returns the parsed JSON shape + token usage + any error // here. Returns parsed JSON shape + telemetry from /v1/mode/execute
// diagnostic. NEVER throws; the consensus aggregator handles partial // (latency, enrichment metrics) + any error diagnostic. NEVER throws.
// failures by dropping non-parsed runs from the vote. // The consensus aggregator handles partial failures by dropping
// non-parsed runs from the vote.
interface CloudRunResult { interface CloudRunResult {
parsed: any | null; parsed: any | null;
tokens: number; latency_ms: number;
enriched_chars: number;
bug_fingerprints: number;
matrix_kept: number;
error?: string; // "unreachable" | "non_200" | "unparseable" error?: string; // "unreachable" | "non_200" | "unparseable"
diagnostic?: string; // first 200 chars for debugging diagnostic?: string; // first 200 chars for debugging
model: string; model: string;
} }
async function runCloudInference(systemMsg: string, userMsg: string, model: string): Promise<CloudRunResult> { async function runModeRunnerInference(
diffOrScratchpad: string,
claims: Claim[],
prNumber: number,
isCurated: boolean,
model: string,
): Promise<CloudRunResult> {
// user_question carries the claim list + the curation note (if any).
// pr_audit's framing (mode.rs FRAMING_PR_AUDIT) holds the JSON shape +
// strict-output rules so we don't repeat them here.
const claimDigest = claims
.map((c, i) => ` ${i}. [${c.strength}] "${c.text}" at ${c.location}`)
.join("\n");
const curationNote = isCurated
? "\n\nNOTE: the FILE below is a curated multi-shard scratchpad of the diff, not the raw diff itself. Absence in the scratchpad is NOT evidence of absence in the actual diff. Only mark backed=false on direct contradiction (e.g. scratchpad shows the function is empty / a stub). Skip unflagged_gaps entirely when scratchpad is curated."
: "";
const userQuestion = [
"Verify each ship-claim against the diff (or scratchpad).",
"",
"Ship-claims (numbered 0..N-1):",
claimDigest,
curationNote,
"",
"Every claim above must produce exactly one claim_verdicts entry. Output strict JSON only — no prose outside the JSON object.",
].join("\n");
let resp: Response; let resp: Response;
try { try {
resp = await fetch(`${GATEWAY}/v1/chat`, { resp = await fetch(`${GATEWAY}/v1/mode/execute`, {
method: "POST", method: "POST",
headers: { "content-type": "application/json" }, headers: { "content-type": "application/json" },
body: JSON.stringify({ body: JSON.stringify({
provider: "ollama_cloud", task_class: "pr_audit",
model, file_path: `pr-${prNumber}.diff`,
messages: [ file_content: diffOrScratchpad,
{ role: "system", content: systemMsg }, user_question: userQuestion,
{ role: "user", content: userMsg }, force_model: model,
], force_temperature: 0,
// temp=0 (greedy) + think=true. think=true is required for
// gpt-oss:120b — without it the model returns empty content
// on large prompts. Variance from the think trace is observed
// in practice, which is why we use N=3 consensus, not single-
// call determinism.
max_tokens: 3000,
temperature: 0,
think: true,
}), }),
signal: AbortSignal.timeout(CALL_TIMEOUT_MS), signal: AbortSignal.timeout(MODE_RUNNER_TIMEOUT_MS),
}); });
} catch (e) { } catch (e) {
return { parsed: null, tokens: 0, error: "unreachable", diagnostic: (e as Error).message.slice(0, 200), model }; return {
parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
error: "unreachable", diagnostic: (e as Error).message.slice(0, 200), model,
};
} }
if (!resp.ok) { if (!resp.ok) {
return { parsed: null, tokens: 0, error: "non_200", diagnostic: `${resp.status}: ${(await resp.text()).slice(0, 160)}`, model }; return {
parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
error: "non_200", diagnostic: `${resp.status}: ${(await resp.text()).slice(0, 160)}`, model,
};
} }
let body: any; let body: any;
try { body = await resp.json(); } try { body = await resp.json(); }
catch (e) { return { parsed: null, tokens: 0, error: "unparseable", diagnostic: (e as Error).message, model }; } catch (e) {
const content: string = body?.choices?.[0]?.message?.content ?? ""; return {
const tokens: number = body?.usage?.total_tokens ?? 0; parsed: null, latency_ms: 0, enriched_chars: 0, bug_fingerprints: 0, matrix_kept: 0,
const parsed = extractJson(content); error: "unparseable", diagnostic: (e as Error).message, model,
if (!parsed) { };
return { parsed: null, tokens, error: "unparseable", diagnostic: content.slice(0, 200), model };
} }
return { parsed, tokens, model }; const content: string = typeof body?.response === "string" ? body.response : "";
const parsed = extractJson(content);
// Number-coerced extractors so a non-numeric upstream value (string,
// null, NaN) collapses to 0 instead of poisoning downstream
// arithmetic. Caught 2026-04-27 by kimi_architect self-audit —
// optional-chaining + ?? only catches null/undefined, not type drift.
const num = (v: unknown): number => {
const n = typeof v === "number" ? v : Number(v);
return Number.isFinite(n) ? n : 0;
};
return {
parsed,
latency_ms: num(body?.latency_ms),
enriched_chars: num(body?.enriched_prompt_chars),
bug_fingerprints: num(body?.sources?.bug_fingerprints_count),
matrix_kept: num(body?.sources?.matrix_chunks_kept),
error: parsed ? undefined : "unparseable",
diagnostic: parsed ? undefined : content.slice(0, 200),
model,
};
} }
async function persistDiscrepancies(ctx: InferenceContext, discrepancies: any[]): Promise<void> { async function persistDiscrepancies(ctx: InferenceContext, discrepancies: any[]): Promise<void> {
await mkdir("/home/profit/lakehouse/data/_kb", { recursive: true }); await mkdir("/home/profit/lakehouse/data/_kb", { recursive: true });
const rows = discrepancies.map(d => JSON.stringify({ const rows = discrepancies.map(d => JSON.stringify({
@ -490,94 +491,7 @@ async function extractAndPersistFacts(scratchpad: string, ctx: InferenceContext)
await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(row) + "\n"); await appendFile(AUDIT_FACTS_JSONL, JSON.stringify(row) + "\n");
} }
// Curation via tree-split — ports the scrum_master pattern into the
// inference check. Shards the raw diff into DIFF_SHARD_SIZE chunks,
// summarizes each shard *against the claim-verification task* so the
// summary preserves exactly what the cloud needs to judge claims
// (function signatures, struct fields, deletions, new files), drops
// everything else. Merges into a compact scratchpad.
//
// Cost: N cloud calls for the shard summaries + 1 cloud call for the
// final verification = N+1 calls instead of 1. Mitigation: shards run
// serially (not parallel) to keep gateway load bounded; summary calls
// use max_tokens=400 so they're fast (~2s each on gpt-oss:120b).
//
// Determinism: each shard summary call uses temp=0 + think=true (same
// as the top-level inference call), so identical input yields
// identical scratchpad. The final verification call then sees a
// stable scratchpad, giving stable verdicts.
async function treeSplitDiff(
fullDiff: string,
claims: Claim[],
): Promise<{ scratchpad: string; shards: number }> {
const shards: Array<{ from: number; to: number; text: string }> = [];
for (let i = 0; i < fullDiff.length; i += DIFF_SHARD_SIZE) {
const end = Math.min(i + DIFF_SHARD_SIZE, fullDiff.length);
shards.push({ from: i, to: end, text: fullDiff.slice(i, end) });
}
// Curate the claim list into a short form the summary prompt can
// use to bias extraction toward relevant facts.
const claimDigest = claims.map((c, i) =>
`${i}. [${c.strength}] "${c.text.slice(0, 100)}"`
).join("\n");
let scratchpad = "";
for (const [si, shard] of shards.entries()) {
const prompt = [
`You are summarizing shard ${si + 1}/${shards.length} (chars ${shard.from}..${shard.to}) of a PR diff.`,
`The downstream task will verify these ship-claims against the full-PR summary. Extract ONLY facts that could confirm or refute these claims:`,
"",
claimDigest,
"",
"Extract: new function/method signatures, struct fields, deletions, new files, wiring (function X calls Y), absence-of-implementation markers, TODO comments on added lines.",
"Skip: comment-only edits, whitespace, import reordering, unrelated cosmetic changes.",
"",
"─────── shard diff ───────",
shard.text,
"─────── end shard ───────",
"",
"Output: up to 180 words of facts in bullet form. No prose preamble, no claim verdicts (that's for the downstream step).",
].join("\n");
const r = await callCloud(prompt, 400);
if (r.content) {
scratchpad += `\n--- shard ${si + 1} (chars ${shard.from}..${shard.to}) ---\n${r.content.trim()}\n`;
}
}
return { scratchpad: scratchpad.trim(), shards: shards.length };
}
// Minimal cloud caller used only by treeSplitDiff — same gateway +
// model as the top-level call, but think=false. Shards are small
// (≤DIFF_SHARD_SIZE ~4500 chars) and the task is pure fact
// extraction, not reasoning. think=true on the shards introduced
// variance in reasoning traces that compounded across 23 calls into
// a non-deterministic scratchpad (observed during curation
// validation: same-SHA runs produced 5/7/8 final findings).
// think=false on small prompts is stable — only breaks at the main
// call's 10K+ prompt size, which keeps think=true.
async function callCloud(prompt: string, maxTokens: number): Promise<{ content: string }> {
try {
const r = await fetch(`${GATEWAY}/v1/chat`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
provider: "ollama_cloud",
model: MODEL,
messages: [{ role: "user", content: prompt }],
max_tokens: maxTokens,
temperature: 0,
think: false,
}),
signal: AbortSignal.timeout(CALL_TIMEOUT_MS),
});
if (!r.ok) return { content: "" };
const j: any = await r.json();
return { content: j?.choices?.[0]?.message?.content ?? "" };
} catch {
return { content: "" };
}
}
// Pull out plausible code-symbol names from a summary string. // Pull out plausible code-symbol names from a summary string.
// Matches: // Matches:

View File

@ -0,0 +1,461 @@
// Kimi-architect check — second-pass senior architectural review using
// kimi-for-coding (Kimi K2.6) via /v1/chat provider=kimi.
//
// Runs AFTER the deepseek inference check (N=3 consensus) and the
// static/kb_query checks. Reads their findings as context and asks Kimi
// "what did everyone else miss?" — complementing the cheap-consensus
// voting with a sparse senior pass that catches load-bearing issues
// (compile errors, false telemetry, schema bypasses, etc.) which the
// voting structure can't see.
//
// Why Kimi here and not in the inner inference loop:
// - Cost: ~3min wall-clock per call vs ~30s for deepseek consensus.
// - TOS: api.kimi.com is User-Agent-gated (see crates/gateway/src/v1/
// kimi.rs); cost-bounded calls only.
// - Value: experiment 2026-04-27 showed 7/7 grounding rate with full
// files vs ~50% on truncated input. Best as a sparse complement, not
// a replacement.
//
// Failure-isolated: any Kimi error returns a single info-level Finding
// "kimi_architect skipped — <reason>" so the existing audit pipeline
// is never blocked by a Kimi outage / TOS revocation / 429.
//
// Cost cap: if a kimi_verdicts/<pr>-<sha>.json file exists less than 24h
// old, return cached findings without calling upstream. New commits
// produce new SHAs so this is per-head, not per-day.
//
// Off by default: caller checks LH_AUDITOR_KIMI=1 before invoking.
import { readFile, writeFile, mkdir, appendFile, stat, realpath } from "node:fs/promises";
import { existsSync, realpathSync } from "node:fs";
import { dirname, join, resolve } from "node:path";
import type { Finding, CheckKind } from "../types.ts";
const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl";
const REPO_ROOT = "/home/profit/lakehouse";
// Canonicalize at module load — REPO_ROOT itself may be a symlink in
// some environments (e.g. /home/profit is a bind-mount). Computing
// once at startup means the per-finding grounding loop can compare
// realpath(target) against this stable anchor.
const REPO_ROOT_REAL = (() => {
try { return realpathSync(REPO_ROOT); }
catch { return REPO_ROOT; }
})();
// 15 min budget. Bun's fetch has an intrinsic ~300s limit that our
// AbortController + setTimeout combo could not override; we use curl
// via Bun.spawn instead (callKimi below). Curl honors -m for max
// transfer time without a hard intrinsic ceiling.
const CALL_TIMEOUT_MS = 900_000;
const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
const MAX_DIFF_CHARS = 180_000;
const MAX_PRIOR_FINDINGS = 50;
// Default provider/model = ollama_cloud/kimi-k2.6. Pre-2026-04-27 we
// went direct to api.kimi.com, but Ollama Cloud Pro now exposes the
// same model legitimately, so we route there to avoid User-Agent
// gating. The api.kimi.com path (provider=kimi) remains wired in the
// gateway as a fallback for when Ollama Cloud is upstream-broken.
const KIMI_PROVIDER = process.env.LH_AUDITOR_KIMI_PROVIDER ?? "ollama_cloud";
const KIMI_MODEL = process.env.LH_AUDITOR_KIMI_MODEL ?? "kimi-k2.6";
// Cross-lineage alternation. 2026-04-27 J's call: Opus is too
// expensive to auto-fire (~$0.30/audit). Kimi K2.6 via Go-sub is
// effectively free; Haiku 4.5 via Zen is ~$0.04. Alternate between
// them so we get cross-lineage signal (Moonshot vs Anthropic) on
// every PR's audit history without burning the budget.
//
// Default: Kimi K2.6 on even audits, Haiku 4.5 on odd. Each PR's
// audits flip between vendors as new SHAs come in.
//
// Frontier models (Opus 4.7, GPT-5.5, Gemini 3.1) are NOT in the
// auto path. Operator hands distilled findings to a frontier model
// manually when high-leverage decisions need it. Removing Opus from
// auto-promotion saves ~$1-3/day on the daemon at our cadence.
//
// Override the alternation entirely with LH_AUDITOR_KIMI_MODEL
// (forces one model regardless of audit count); set
// LH_AUDITOR_KIMI_ALT_MODEL to the alternate.
const ALT_MODEL = process.env.LH_AUDITOR_KIMI_ALT_MODEL ?? "claude-haiku-4-5";
const ALT_PROVIDER = process.env.LH_AUDITOR_KIMI_ALT_PROVIDER ?? "opencode";
const FORCE_DEFAULT = process.env.LH_AUDITOR_KIMI_MODEL !== undefined && process.env.LH_AUDITOR_KIMI_MODEL !== "";
function selectModel(diffLen: number, auditIndex: number = 0): { provider: string; model: string; promoted: boolean } {
// Operator override — env-pinned model wins.
if (FORCE_DEFAULT) {
return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false };
}
// Alternate Kimi (default, even index) ↔ Haiku (alt, odd index).
// diffLen kept in the signature for future "big diff → Haiku
// anyway" logic; not used yet so we don't auto-burn on big PRs.
void diffLen;
if (auditIndex % 2 === 1) {
return { provider: ALT_PROVIDER, model: ALT_MODEL, promoted: true };
}
return { provider: KIMI_PROVIDER, model: KIMI_MODEL, promoted: false };
}
// Model-aware max_tokens. Different upstream APIs cap at different
// limits and reject requests that exceed them:
// - Anthropic Opus 4.x: 32K output (with extended-output header)
// - Anthropic Haiku 4.5: 8K output
// - Kimi K2.6 (reasoning): 128K — needs headroom because
// reasoning_content counts against the budget
// - Default: 16K, conservative middle ground
//
// 2026-04-27 BLOCK from Opus self-audit: the prior single-default of
// 128K worked silently (Anthropic clamps server-side) but was
// technically invalid. Per-model caps make it explicit. Override via
// LH_AUDITOR_KIMI_MAX_TOKENS to force a value (also fixes the empty-
// env Number("") -> 0 trap by using `||` not `??`).
const MAX_TOKENS_OVERRIDE = Number(process.env.LH_AUDITOR_KIMI_MAX_TOKENS) || 0;
function maxTokensFor(model: string): number {
if (MAX_TOKENS_OVERRIDE > 0) return MAX_TOKENS_OVERRIDE;
if (model.startsWith("claude-opus")) return 32_000;
if (model.startsWith("claude-haiku") || model.startsWith("claude-sonnet")) return 8_192;
if (model.startsWith("kimi-")) return 128_000;
if (model.startsWith("gpt-5") || model.startsWith("o1") || model.startsWith("o3") || model.startsWith("o4")) return 32_000;
return 16_000;
}
export interface KimiArchitectContext {
pr_number: number;
head_sha: string;
}
interface KimiVerdictFile {
pr_number: number;
head_sha: string;
cached_at: string;
model: string;
latency_ms: number;
finish_reason: string;
usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
raw_content: string;
findings: Finding[];
grounding: { total: number; verified: number; rate: number };
}
export async function runKimiArchitectCheck(
diff: string,
priorFindings: Finding[],
ctx: KimiArchitectContext,
): Promise<Finding[]> {
const cachePath = join(KIMI_VERDICTS_DIR, `${ctx.pr_number}-${ctx.head_sha.slice(0, 12)}.json`);
const outageSentinel = `${cachePath}.outage`;
const OUTAGE_TTL_MS = 10 * 60 * 1000;
// Outage negative-cache — if upstream failed within OUTAGE_TTL_MS,
// skip this audit and return immediately. Prevents the daemon from
// hammering a downed Kimi/Anthropic upstream every 90s.
if (existsSync(outageSentinel)) {
try {
const s = await stat(outageSentinel);
if (Date.now() - s.mtimeMs < OUTAGE_TTL_MS) {
const note = JSON.parse(await readFile(outageSentinel, "utf8"));
return [skipFinding(`upstream still down (cached ${Math.round((Date.now() - s.mtimeMs) / 1000)}s ago): ${String(note.reason).slice(0, 160)}`)];
}
} catch { /* malformed sentinel — fall through to fresh call */ }
}
// Cost cap — return cached findings if a verdict for this exact head
// SHA was generated within the TTL.
const cached = await loadCachedVerdict(cachePath);
if (cached) {
return cached.findings.length > 0
? cached.findings
: [{ check: "kimi_architect" as CheckKind, severity: "info", summary: "kimi_architect cached — 0 findings", evidence: [`cache: ${cachePath}`] }];
}
// Alternate model based on how many audits this PR has had — gives
// cross-lineage signal (Kimi/Moonshot ↔ Haiku/Anthropic) on every
// PR's audit history. Count is derived from existing kimi_verdicts
// files for this PR; cheap O(N_PRs) directory read.
let auditIndex = 0;
try {
const dir = "/home/profit/lakehouse/data/_auditor/kimi_verdicts";
if (existsSync(dir)) {
const all = require("node:fs").readdirSync(dir) as string[];
auditIndex = all.filter((f) => f.startsWith(`${ctx.pr_number}-`)).length;
}
} catch { /* default 0 — Kimi */ }
const selected = selectModel(diff.length, auditIndex);
let response: { content: string; usage: any; finish_reason: string; latency_ms: number };
try {
response = await callKimi(buildPrompt(diff, priorFindings, ctx), selected.provider, selected.model);
} catch (e) {
// Negative-cache for 10 min on outage (caught 2026-04-27 by Opus
// self-audit): without this, every audit cycle within the 24h
// TTL re-calls upstream while it's still down. Use a sentinel
// file with mtime check rather than persisting a verdict so the
// happy-path cache reader doesn't have to special-case it.
const sentinel = `${cachePath}.outage`;
try { await writeFile(sentinel, JSON.stringify({ at: new Date().toISOString(), reason: (e as Error).message.slice(0, 200) })); } catch {}
return [skipFinding(`kimi call failed (${selected.model}): ${(e as Error).message.slice(0, 200)}`)];
}
const findings = parseFindings(response.content);
const grounding = await computeGrounding(findings);
const verdict: KimiVerdictFile = {
pr_number: ctx.pr_number,
head_sha: ctx.head_sha,
cached_at: new Date().toISOString(),
model: selected.model,
latency_ms: response.latency_ms,
finish_reason: response.finish_reason,
usage: {
prompt_tokens: response.usage?.prompt_tokens ?? 0,
completion_tokens: response.usage?.completion_tokens ?? 0,
total_tokens: response.usage?.total_tokens ?? 0,
},
raw_content: response.content,
findings,
grounding,
};
// Cache-poisoning guard (caught 2026-04-27 by Opus self-audit):
// when parseFindings returns 0 findings (Kimi rambled, prompt too
// big, or the markdown shape changed and our regex missed every
// block), persisting the empty verdict short-circuits all future
// audits in the 24h TTL window with a useless cached "0 findings"
// result. Better to leave no cache and re-call upstream next time.
// Always append metrics — observability shouldn't depend on whether
// findings parsed.
await appendMetrics(verdict);
if (findings.length > 0) {
await persistVerdict(cachePath, verdict);
return findings;
}
return [{
check: "kimi_architect" as CheckKind,
severity: "info",
summary: `kimi_architect produced 0 ranked findings (${response.finish_reason}, ${verdict.usage.completion_tokens} tokens) — not cached`,
evidence: [`raw saved (no cache): see kimi_audits.jsonl ${verdict.cached_at}`],
}];
}
async function loadCachedVerdict(path: string): Promise<KimiVerdictFile | null> {
if (!existsSync(path)) return null;
try {
const s = await stat(path);
if (Date.now() - s.mtimeMs > CACHE_TTL_MS) return null;
return JSON.parse(await readFile(path, "utf8")) as KimiVerdictFile;
} catch { return null; }
}
function buildPrompt(diff: string, priorFindings: Finding[], ctx: KimiArchitectContext): string {
const truncatedDiff = diff.length > MAX_DIFF_CHARS
? diff.slice(0, MAX_DIFF_CHARS) + `\n\n... [truncated; original diff was ${diff.length} chars]`
: diff;
const priorBlock = priorFindings
.filter(f => f.severity !== "info")
.slice(0, MAX_PRIOR_FINDINGS)
.map(f => `- [${f.check}/${f.severity}] ${f.summary}${f.evidence?.[0] ? `${f.evidence[0].slice(0, 160)}` : ""}`)
.join("\n");
return `You are a senior software architect doing a second-pass review on PR #${ctx.pr_number} (head ${ctx.head_sha.slice(0, 12)}). The team's automated auditor (deepseek-v3.1:671b, N=3 consensus) already produced findings. Your job is NOT to repeat what they found — your job is to catch what their voting structure CAN'T see: compile errors, type-system bypasses, false telemetry, silent determinism leaks, schema-bypass anti-patterns, load-bearing assumptions that look fine line-by-line.
GROUNDING RULES (non-negotiable):
- Cite file:line for EVERY finding. Lines you cite must actually contain what you claim. Confabulating a finding wastes more time than missing one.
- If the diff is truncated and you can't verify a claim, say "diff-truncated, can't verify" DO NOT guess.
- Distinguish architectural concerns (no specific line) from concrete bugs (specific line). Don't dress one as the other.
PRIOR FINDINGS FROM DEEPSEEK CONSENSUS (do not repeat these):
${priorBlock || "(none)"}
OUTPUT FORMAT (markdown):
- ## Verdict (one sentence)
- ## Findings (5-10 items, each formatted EXACTLY as below)
For each finding use this exact shape so a parser can lift them:
### F1: <one-line summary>
- **Severity:** block | warn | info
- **File:** path/to/file.ext:LINE
- **Rationale:** one or two sentences
THE DIFF:
${truncatedDiff}
`;
}
async function callKimi(prompt: string, provider: string, model: string): Promise<{ content: string; usage: any; finish_reason: string; latency_ms: number }> {
const t0 = Date.now();
const body = JSON.stringify({
provider,
model,
messages: [{ role: "user", content: prompt }],
max_tokens: maxTokensFor(model),
temperature: 0.2,
});
// curl via Bun.spawn — bypasses Bun fetch's ~300s intrinsic ceiling.
// -m sets the max transfer time honored end-to-end. Body is piped via
// stdin to avoid argv length limits on big audit prompts (~50K+ tokens).
const proc = Bun.spawn({
cmd: [
"curl", "-sS", "-X", "POST",
"-m", String(Math.ceil(CALL_TIMEOUT_MS / 1000)),
"-H", "content-type: application/json",
"--data-binary", "@-",
`${GATEWAY}/v1/chat`,
],
stdin: "pipe",
stdout: "pipe",
stderr: "pipe",
});
proc.stdin.write(body);
await proc.stdin.end();
const [stdout, stderr, exitCode] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
proc.exited,
]);
if (exitCode !== 0) {
throw new Error(`curl exit ${exitCode}: ${stderr.slice(0, 300)}`);
}
let j: any;
try { j = JSON.parse(stdout); }
catch (e) {
throw new Error(`bad response (${stdout.length} bytes): ${stdout.slice(0, 300)}`);
}
if (j.error || !j.choices) {
throw new Error(`gateway error: ${JSON.stringify(j).slice(0, 300)}`);
}
return {
content: j.choices?.[0]?.message?.content ?? "",
usage: j.usage ?? {},
finish_reason: j.choices?.[0]?.finish_reason ?? "unknown",
latency_ms: Date.now() - t0,
};
}
// Parse Kimi's markdown into Finding[]. Format expected (per buildPrompt):
// ### F<N>: <summary>
// - **Severity:** block | warn | info
// - **File:** path:line
// - **Rationale:** ...
function parseFindings(content: string): Finding[] {
const findings: Finding[] = [];
const blocks = content.split(/^###\s+F\d+:\s*/m).slice(1);
for (const block of blocks) {
const summary = (block.split("\n")[0] ?? "").trim();
if (!summary) continue;
const sev = /\*\*Severity:\*\*\s*(block|warn|info)/i.exec(block)?.[1]?.toLowerCase();
const fileLine = /\*\*File:\*\*\s*(\S+)/i.exec(block)?.[1] ?? "unknown";
const rationale = /\*\*Rationale:\*\*\s*([\s\S]+?)(?=\n###|\n\*\*|$)/i.exec(block)?.[1]?.trim() ?? "";
const severity: Finding["severity"] = sev === "block" ? "block" : sev === "warn" ? "warn" : "info";
findings.push({
check: "kimi_architect" as CheckKind,
severity,
summary: summary.slice(0, 240),
evidence: [fileLine, rationale.slice(0, 360)].filter(Boolean),
});
}
return findings;
}
// For each finding's cited file:line, grep the actual file to verify
// the line exists. Returns total + verified counts; per-finding metadata
// is appended into the evidence array so the reader can see which
// citations were verified.
async function computeGrounding(findings: Finding[]): Promise<{ total: number; verified: number; rate: number }> {
// readFile (async) instead of readFileSync — caught 2026-04-27 by
// Kimi's self-audit. Sync I/O in an async fn blocks the event loop
// for every cited file; doesn't matter at 10 findings, would matter
// at 100+.
const checks = await Promise.all(findings.map(async (f) => {
const cite = f.evidence[0] ?? "";
const m = /^(\S+?):(\d+)/.exec(cite);
if (!m) return false;
const [, relpath, lineStr] = m;
const line = Number(lineStr);
if (!line || !relpath) return false;
// Path-traversal guard, two-layer (caught 2026-04-27 by Kimi
// self-audits on dd77632 then 2d9cb12).
//
// Layer 1 (lexical): resolve() normalizes `..` segments. Refuse
// any path that doesn't anchor under REPO_ROOT.
//
// Layer 2 (symlink): even if the lexical path is anchored, it
// could be a symlink whose target escapes. realpath() resolves
// symlinks; compare the real path against REPO_ROOT_REAL.
//
// Both layers exist because attackers might bypass either alone:
// raw `../etc/passwd` triggers layer 1; a planted symlink at
// ./safe-looking-name → /etc/passwd triggers layer 2.
const abs = resolve(REPO_ROOT, relpath);
if (!abs.startsWith(REPO_ROOT + "/") && abs !== REPO_ROOT) {
f.evidence.push(`[grounding: path escapes repo root, refusing]`);
return false;
}
if (!existsSync(abs)) {
f.evidence.push("[grounding: file not found]");
return false;
}
try {
// Symlink-resolution check before any read. realpath() throws
// if the file doesn't exist; existsSync above shields the
// common case but a TOCTOU race could still error here — the
// outer catch handles it.
const realPath = await realpath(abs);
if (!realPath.startsWith(REPO_ROOT_REAL + "/") && realPath !== REPO_ROOT_REAL) {
f.evidence.push(`[grounding: symlink target escapes repo root, refusing]`);
return false;
}
const lines = (await readFile(realPath, "utf8")).split("\n");
if (line < 1 || line > lines.length) {
f.evidence.push(`[grounding: line ${line} > EOF (${lines.length})]`);
return false;
}
f.evidence.push(`[grounding: verified at ${relpath}:${line}]`);
return true;
} catch (e) {
f.evidence.push(`[grounding: read failed: ${(e as Error).message.slice(0, 80)}]`);
return false;
}
}));
const verified = checks.filter(Boolean).length;
const total = findings.length;
return { total, verified, rate: total === 0 ? 0 : verified / total };
}
async function persistVerdict(path: string, v: KimiVerdictFile): Promise<void> {
await mkdir(KIMI_VERDICTS_DIR, { recursive: true });
await writeFile(path, JSON.stringify(v, null, 2));
}
async function appendMetrics(v: KimiVerdictFile): Promise<void> {
// dirname() instead of join(path, "..") — caught 2026-04-27 by both
// Haiku and Opus self-audits. The "/.." idiom resolves correctly
// via Node path normalization but is non-idiomatic + breaks if the
// path ever has trailing dots.
await mkdir(dirname(KIMI_AUDITS_JSONL), { recursive: true });
await appendFile(KIMI_AUDITS_JSONL, JSON.stringify({
pr_number: v.pr_number,
head_sha: v.head_sha,
audited_at: v.cached_at,
model: v.model,
latency_ms: v.latency_ms,
finish_reason: v.finish_reason,
prompt_tokens: v.usage.prompt_tokens,
completion_tokens: v.usage.completion_tokens,
findings_total: v.findings.length,
findings_block: v.findings.filter(f => f.severity === "block").length,
findings_warn: v.findings.filter(f => f.severity === "warn").length,
grounding_verified: v.grounding.verified,
grounding_rate: Number(v.grounding.rate.toFixed(3)),
}) + "\n");
}
function skipFinding(why: string): Finding {
return {
check: "kimi_architect" as CheckKind,
severity: "info",
summary: `kimi_architect skipped — ${why}`,
evidence: [why],
};
}

View File

@ -54,49 +54,87 @@ export function runStaticCheck(diff: string): Finding[] {
const isAuditorCheckerFile = path.startsWith("auditor/checks/") || const isAuditorCheckerFile = path.startsWith("auditor/checks/") ||
path.startsWith("auditor/fixtures/"); path.startsWith("auditor/fixtures/");
// Track multi-line backtick-template state across the file. Walks
// all post-merge lines (context + added, skipping removed lines)
// in order and keeps `inMultilineBacktick` flipping on each
// unescaped backtick. Pre-2026-04-26 the per-line walk in
// isInsideQuotedString missed `todo!()` matches inside docstring
// template literals because the opening backtick lived on a
// line above the match. Now we OR the file-level state into the
// per-line check.
let inMultilineBacktick = false;
for (let idx = 0; idx < lines.length; idx++) { for (let idx = 0; idx < lines.length; idx++) {
const line = lines[idx]; const line = lines[idx];
if (!line.startsWith("+") || line.startsWith("+++")) continue;
const added = line.slice(1);
if (!isAuditorCheckerFile) { // Diff bookkeeping lines and removed lines don't contribute to
for (const { re, why } of BLOCK_PATTERNS) { // the post-merge file's string state.
const m = added.match(re); if (line.startsWith("+++") || line.startsWith("---") ||
if (m && typeof m.index === "number") { line.startsWith("@@") || line.startsWith("\\ No newline")) continue;
// Skip if the match sits inside a quoted string literal — if (line.startsWith("-")) continue;
// this is how rubric files (tests/real-world/*, prompt
// templates) legitimately reference the patterns they const isAdded = line.startsWith("+");
// guard against, without actually executing them. // Strip the diff prefix (' ' for context, '+' for added).
if (isInsideQuotedString(added, m.index)) continue; const body = (isAdded || line.startsWith(" ")) ? line.slice(1) : line;
// Compute the file-level backtick state ENTERING this line.
// The state machine sees pattern matches against the right
// context: a line that opens a backtick block has its own
// pattern checks evaluated under "inside-backtick" semantics
// for the portion AFTER the opening tick. Pre-2026-04-27 the
// state was updated AFTER the pattern checks, so the FIRST
// pattern on a backtick-opening line slipped through with
// stale "outside-backtick" semantics. Caught by Kimi self-audit.
const stateAtLineStart = inMultilineBacktick;
const stateAtLineEnd = updateBacktickState(body, stateAtLineStart);
if (isAdded) {
const added = body;
if (!isAuditorCheckerFile) {
for (const { re, why } of BLOCK_PATTERNS) {
const m = added.match(re);
if (m && typeof m.index === "number") {
// Skip if EITHER (a) the file was already inside a
// multi-line backtick block when this line started, OR
// (b) the match sits inside a quoted string literal on
// THIS line. The earlier code only checked stateAtLineStart;
// now we also check that the match isn't past the
// opening backtick of a block that opens on this line.
if (stateAtLineStart || isInsideQuotedString(added, m.index)) continue;
findings.push({
check: "static",
severity: "block",
summary: `${why} in ${path}`,
evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
});
}
}
}
for (const { re, why } of WARN_COMMENT_PATTERNS) {
if (re.test(line)) {
findings.push({ findings.push({
check: "static", check: "static",
severity: "block", severity: "warn",
summary: `${why} in ${path}`,
evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
});
}
}
for (const { re, why } of INFO_HARDCODED_PATTERNS) {
if (re.test(added)) {
findings.push({
check: "static",
severity: "info",
summary: `${why} in ${path}`, summary: `${why} in ${path}`,
evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`], evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
}); });
} }
} }
} }
for (const { re, why } of WARN_COMMENT_PATTERNS) {
if (re.test(line)) { // Carry the end-of-line state forward to the next iteration.
findings.push({ inMultilineBacktick = stateAtLineEnd;
check: "static",
severity: "warn",
summary: `${why} in ${path}`,
evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
});
}
}
for (const { re, why } of INFO_HARDCODED_PATTERNS) {
if (re.test(added)) {
findings.push({
check: "static",
severity: "info",
summary: `${why} in ${path}`,
evidence: [`${path}:+${idx + 1}: ${added.trim().slice(0, 160)}`],
});
}
}
} }
// "Field added but never read" heuristic — catches exactly the // "Field added but never read" heuristic — catches exactly the
@ -105,10 +143,20 @@ export function runStaticCheck(diff: string): Finding[] {
// elsewhere might exist). The point is: if NEITHER this diff nor // elsewhere might exist). The point is: if NEITHER this diff nor
// any other line in the diff reads the field, the PR is shipping // any other line in the diff reads the field, the PR is shipping
// state without a consumer. // state without a consumer.
//
// Serde exemption: if the field's parent struct derives Serialize
// or Deserialize, the read-site is the macro itself — JSON
// round-trips consume every public field. Without this exemption
// the check produces false positives on every response/request
// struct shipped through `/v1/*`.
const addedLines = lines.filter(l => l.startsWith("+") && !l.startsWith("+++")) const addedLines = lines.filter(l => l.startsWith("+") && !l.startsWith("+++"))
.map(l => l.slice(1)); .map(l => l.slice(1));
const newFields = extractNewFields(addedLines); const newFields = extractNewFieldsWithLine(lines);
for (const field of newFields) { const seenNames = new Set<string>();
for (const { name: field, lineIdx } of newFields) {
if (seenNames.has(field)) continue;
seenNames.add(field);
if (parentStructHasSerdeDerive(lines, lineIdx)) continue;
const readPattern = new RegExp(`[\\.:]\\s*${escape(field)}\\b|\\b${escape(field)}\\s*:`); const readPattern = new RegExp(`[\\.:]\\s*${escape(field)}\\b|\\b${escape(field)}\\s*:`);
// The definition line itself matches readPattern — filter it out // The definition line itself matches readPattern — filter it out
// by requiring at least TWO lines in the diff mention the field // by requiring at least TWO lines in the diff mention the field
@ -146,26 +194,105 @@ function splitDiffByFile(diff: string): Map<string, string[]> {
return out; return out;
} }
// Extract new `pub name: Type,` fields from added lines. Rust syntax. // Extract new `pub name: Type,` fields from the per-file diff lines,
// Narrowly-scoped: only matches at the start of a trimmed line, // keeping each occurrence's line index so the caller can resolve the
// requires `pub ` prefix, ignores `pub fn` / `pub struct` / etc. // parent struct. Same narrow rules as before: starts with `pub `,
function extractNewFields(addedLines: string[]): string[] { // excludes `pub fn` / `pub struct` / etc.
const fields = new Set<string>(); function extractNewFieldsWithLine(lines: string[]): Array<{ name: string; lineIdx: number }> {
for (const line of addedLines) { const out: Array<{ name: string; lineIdx: number }> = [];
const t = line.trim(); for (let i = 0; i < lines.length; i++) {
// pub NAME: Type, const line = lines[i];
if (!line.startsWith("+") || line.startsWith("+++")) continue;
const t = line.slice(1).trim();
const m = t.match(/^pub\s+(?!fn\b|struct\b|enum\b|mod\b|use\b|trait\b|impl\b|const\b|static\b|type\b)(\w+)\s*:/); const m = t.match(/^pub\s+(?!fn\b|struct\b|enum\b|mod\b|use\b|trait\b|impl\b|const\b|static\b|type\b)(\w+)\s*:/);
if (m) fields.add(m[1]); if (m) out.push({ name: m[1], lineIdx: i });
} }
return Array.from(fields); return out;
}
// True if the field at `fieldLineIdx` lives inside a struct whose
// declaration carries `#[derive(... Serialize|Deserialize ...)]`. We
// walk backward through the diff (added + context lines both count —
// a struct declaration unchanged by the PR still appears as context)
// to find the nearest `pub struct` boundary, then scan a few lines
// above it for derive attributes. Conservative bounds:
// - 80 lines back to find `struct` (struct definitions can grow large)
// - 8 lines above the `struct` keyword for attribute lines
// Stops the struct-search early if we hit a `}` at zero indent
// (the previous scope) or another `pub struct` (we left ours).
function parentStructHasSerdeDerive(lines: string[], fieldLineIdx: number): boolean {
// Bounds-check fieldLineIdx (caught 2026-04-27 by Kimi self-audit).
// Pre-fix: if fieldLineIdx >= lines.length, the loop ran from a
// negative implicit upper bound (fieldLineIdx - 80 could be > 0
// even when fieldLineIdx is past EOF) and read undefined slots.
// Defensive: bail early on out-of-range input.
if (fieldLineIdx < 0 || fieldLineIdx >= lines.length) return false;
let structLineIdx = -1;
for (let i = fieldLineIdx - 1; i >= 0 && i >= fieldLineIdx - 80; i--) {
const raw = lines[i];
if (typeof raw !== "string" || raw.length === 0) continue;
const body = stripDiffPrefix(raw);
const trimmed = body.trim();
if (/^pub\s+struct\s+\w/.test(trimmed)) {
structLineIdx = i;
break;
}
// Closing brace at column 0 means the enclosing scope ended above
// the field — we're not actually inside a struct.
if (body.startsWith("}")) return false;
}
if (structLineIdx < 0) return false;
for (let j = structLineIdx - 1; j >= 0 && j >= structLineIdx - 8; j--) {
const raw = lines[j];
if (typeof raw !== "string") continue;
const trimmed = stripDiffPrefix(raw).trim();
if (trimmed === "" || trimmed.startsWith("//") || trimmed.startsWith("///")) continue;
if (!trimmed.startsWith("#[")) break;
if (/derive\s*\([^)]*\b(Serialize|Deserialize)\b/.test(trimmed)) return true;
}
return false;
}
// Strip leading +/-/space from a unified-diff line, leaving the raw
// source line. Handles the case where the line is shorter than 1 char
// (rare but real for empty-context lines).
function stripDiffPrefix(line: string): string {
if (line.length === 0) return line;
const c = line[0];
if (c === "+" || c === "-" || c === " ") return line.slice(1);
return line;
}
// Walk a single line and toggle the cross-line backtick state on each
// unescaped backtick. Single-quote and double-quote runs are line-
// bounded in JS/TS/Rust by language rules (string literals don't span
// newlines without explicit `\` continuation), so we only track
// backticks across lines. Returns the new state for the next line.
function updateBacktickState(line: string, inBacktick: boolean): boolean {
let state = inBacktick;
let inDouble = false;
let inSingle = false;
for (let i = 0; i < line.length; i++) {
const c = line[i];
const esc = i > 0 && line[i - 1] === "\\";
if (esc) continue;
// Inside a multi-line backtick template, single/double quotes
// don't open new strings — they're literal characters of the
// template. Same applies the other way around.
if (c === '"' && !inSingle && !state) inDouble = !inDouble;
else if (c === "'" && !inDouble && !state) inSingle = !inSingle;
else if (c === "`" && !inDouble && !inSingle) state = !state;
}
return state;
} }
// True if `pos` falls inside a double- or single-quoted string on this // True if `pos` falls inside a double- or single-quoted string on this
// line (backtick template literals too). Walks left→right toggling the // line (backtick template literals too). Walks left→right toggling the
// "in quote" state on each unescaped quote. Good enough for single- // "in quote" state on each unescaped quote. Per-line only — the file-
// line matches; multi-line strings aren't parsed (they're extremely // level walk in runStaticCheck handles multi-line backtick templates
// rare in the patterns we're blocking on, and would require a proper // via updateBacktickState.
// tokenizer to handle correctly).
function isInsideQuotedString(line: string, pos: number): boolean { function isInsideQuotedString(line: string, pos: number): boolean {
let inDouble = false, inSingle = false, inBacktick = false; let inDouble = false, inSingle = false, inBacktick = false;
for (let i = 0; i < pos; i++) { for (let i = 0; i < pos; i++) {

View File

@ -24,14 +24,30 @@ const POLL_INTERVAL_MS = 90_000; // 90s — enough budget for audit runs to comp
const PAUSE_FILE = "/home/profit/lakehouse/auditor.paused"; const PAUSE_FILE = "/home/profit/lakehouse/auditor.paused";
const STATE_FILE = "/home/profit/lakehouse/data/_auditor/state.json"; const STATE_FILE = "/home/profit/lakehouse/data/_auditor/state.json";
// Per-PR audit cap. Prevents the daemon from running away on a PR
// when each push surfaces new findings — operator wants to review
// in batch, not have the daemon burn budget while they're away.
// Default 3 audits per PR. Override via LH_AUDITOR_MAX_AUDITS_PER_PR.
// Set to 0 to disable the cap.
//
// Reset (after manual review): edit data/_auditor/state.json and
// set audit_count_per_pr.<N> = 0 (or delete the key). Daemon picks
// up the change on the next cycle without restart.
const MAX_AUDITS_PER_PR = Number(process.env.LH_AUDITOR_MAX_AUDITS_PER_PR) || 3;
interface State { interface State {
// Map: PR number → last-audited head SHA. Lets us dedupe audits // Map: PR number → last-audited head SHA. Lets us dedupe audits
// across restarts (poller can crash/restart without re-auditing // across restarts (poller can crash/restart without re-auditing
// all open PRs from scratch). // all open PRs from scratch).
last_audited: Record<string, string>; last_audited: Record<string, string>;
// Map: PR number → number of audits run on that PR since last reset.
// Daemon halts auditing a PR once this hits MAX_AUDITS_PER_PR.
// Operator clears the entry to resume.
audit_count_per_pr: Record<string, number>;
started_at: string; started_at: string;
cycles_total: number; cycles_total: number;
cycles_skipped_paused: number; cycles_skipped_paused: number;
cycles_skipped_capped: number;
audits_run: number; audits_run: number;
last_cycle_at?: string; last_cycle_at?: string;
} }
@ -47,17 +63,21 @@ async function loadState(): Promise<State> {
return { return {
last_audited: s.last_audited ?? {}, last_audited: s.last_audited ?? {},
started_at: s.started_at ?? new Date().toISOString(), started_at: s.started_at ?? new Date().toISOString(),
audit_count_per_pr: s.audit_count_per_pr ?? {},
cycles_total: s.cycles_total ?? 0, cycles_total: s.cycles_total ?? 0,
cycles_skipped_paused: s.cycles_skipped_paused ?? 0, cycles_skipped_paused: s.cycles_skipped_paused ?? 0,
cycles_skipped_capped: s.cycles_skipped_capped ?? 0,
audits_run: s.audits_run ?? 0, audits_run: s.audits_run ?? 0,
last_cycle_at: s.last_cycle_at, last_cycle_at: s.last_cycle_at,
}; };
} catch { } catch {
return { return {
last_audited: {}, last_audited: {},
audit_count_per_pr: {},
started_at: new Date().toISOString(), started_at: new Date().toISOString(),
cycles_total: 0, cycles_total: 0,
cycles_skipped_paused: 0, cycles_skipped_paused: 0,
cycles_skipped_capped: 0,
audits_run: 0, audits_run: 0,
}; };
} }
@ -89,12 +109,38 @@ async function runCycle(state: State): Promise<State> {
console.log(`[auditor] cycle ${state.cycles_total}: ${prs.length} open PR(s)`); console.log(`[auditor] cycle ${state.cycles_total}: ${prs.length} open PR(s)`);
for (const pr of prs) { for (const pr of prs) {
const last = state.last_audited[String(pr.number)]; const prKey = String(pr.number);
const last = state.last_audited[prKey];
if (last === pr.head_sha) { if (last === pr.head_sha) {
console.log(`[auditor] skip PR #${pr.number} (SHA ${pr.head_sha.slice(0, 8)} already audited)`); console.log(`[auditor] skip PR #${pr.number} (SHA ${pr.head_sha.slice(0, 8)} already audited)`);
continue; continue;
} }
console.log(`[auditor] audit PR #${pr.number} (${pr.head_sha.slice(0, 8)}) — ${pr.title.slice(0, 60)}`); // Per-head-SHA audit cap. Each new push gets MAX_AUDITS_PER_PR
// fresh attempts; the counter auto-resets when the head SHA
// changes. Operator only intervenes manually if a single SHA
// somehow needs MORE than the cap (rare — usually transient
// upstream errors clear themselves inside 3 attempts).
//
// Reset rule: if `last` exists (we've seen this PR before) AND
// pr.head_sha != last, that's a new push. Drop the counter.
// The dedup branch above already handles same-SHA → skip, so
// we only land here when the SHA actually moved.
if (last !== undefined && (state.audit_count_per_pr[prKey] ?? 0) > 0) {
const prior_count = state.audit_count_per_pr[prKey];
console.log(`[auditor] PR #${pr.number} new head ${pr.head_sha.slice(0, 8)} (prior ${last.slice(0, 8)}, was ${prior_count}/${MAX_AUDITS_PER_PR}) — resetting cap counter`);
state.audit_count_per_pr[prKey] = 0;
}
const auditedSoFar = state.audit_count_per_pr[prKey] ?? 0;
if (MAX_AUDITS_PER_PR > 0 && auditedSoFar >= MAX_AUDITS_PER_PR) {
// This branch only fires now if the SAME head SHA somehow
// burned MAX audits (transient upstream errors retried that
// many times). Operator can clear state.audit_count_per_pr.<N>
// = 0 to force one more attempt; otherwise wait for next push.
console.log(`[auditor] skip PR #${pr.number} (same head ${pr.head_sha.slice(0, 8)} burned ${auditedSoFar}/${MAX_AUDITS_PER_PR} — push new code or clear state.json audit_count_per_pr.${prKey})`);
state.cycles_skipped_capped += 1;
continue;
}
console.log(`[auditor] audit PR #${pr.number} (${pr.head_sha.slice(0, 8)}) — ${pr.title.slice(0, 60)} [${auditedSoFar + 1}/${MAX_AUDITS_PER_PR}]`);
try { try {
// Skip dynamic by default: it mutates live playbook state and // Skip dynamic by default: it mutates live playbook state and
// re-runs on every PR update would pollute quickly. Operator // re-runs on every PR update would pollute quickly. Operator
@ -106,8 +152,22 @@ async function runCycle(state: State): Promise<State> {
skip_inference: process.env.LH_AUDITOR_SKIP_INFERENCE === "1", skip_inference: process.env.LH_AUDITOR_SKIP_INFERENCE === "1",
}); });
console.log(`[auditor] verdict=${verdict.overall} findings=${verdict.metrics.findings_total} (block=${verdict.metrics.findings_block} warn=${verdict.metrics.findings_warn})`); console.log(`[auditor] verdict=${verdict.overall} findings=${verdict.metrics.findings_total} (block=${verdict.metrics.findings_block} warn=${verdict.metrics.findings_warn})`);
state.last_audited[String(pr.number)] = pr.head_sha; state.last_audited[prKey] = pr.head_sha;
state.audit_count_per_pr[prKey] = auditedSoFar + 1;
state.audits_run += 1; state.audits_run += 1;
if (state.audit_count_per_pr[prKey] >= MAX_AUDITS_PER_PR) {
console.log(`[auditor] PR #${pr.number} reached cap (${MAX_AUDITS_PER_PR} audits) — daemon will skip further audits until reset`);
}
// Persist state immediately after each successful audit so the
// increment survives a crash. Pre-2026-04-27 the cycle saved
// once at the end (main.ts:140), which lost the count if the
// daemon was killed mid-cycle. Fix lifted from kimi_architect's
// own audit on this very file. saveState is idempotent + cheap
// (one JSON write), so per-audit cost is negligible.
try { await saveState(state); }
catch (e) {
console.error(`[auditor] saveState mid-cycle failed: ${(e as Error).message} — count held in memory`);
}
} catch (e) { } catch (e) {
console.error(`[auditor] audit failed: ${(e as Error).message}`); console.error(`[auditor] audit failed: ${(e as Error).message}`);
} }

View File

@ -0,0 +1,85 @@
// drift_report.ts — comparison of a current run summary vs the
// previous run summary on disk. Spec calls this "drift detection";
// concretely it answers: did the pipeline behave the same way as
// last time, and if not, was the change explained by an input change
// or did it appear out of nowhere (silent drift)?
//
// Severity:
// ok — within 20% on every metric, no hash surprises
// warn — record-count or category swing > 20%, OR new error class
// alert — output_hash differs while input_hash is identical
// (deterministic violation — same input → different output)
import {
ValidationResult, requireString, requireIsoTimestamp,
} from "./types";
import type { StageName } from "./stage_receipt";
export const DRIFT_REPORT_SCHEMA_VERSION = 2;
export const DRIFT_THRESHOLD_PCT = 0.20;
export type DriftSeverity = "ok" | "warn" | "alert";
export interface StageDrift {
stage: StageName;
delta_records_in: number; // current - prior
delta_records_out: number;
delta_accepted: number;
delta_quarantined: number;
pct_change_out: number | null; // null when prior had 0 records
// null when input_hash isn't materialized into the stage summary —
// schema v1 lied and reported `true` here. v2 is honest: callers
// that want determinism enforcement must read the full StageReceipt
// off disk and compute input_hash equality there.
input_hash_match: boolean | null;
output_hash_match: boolean;
// alert if input_hash matches but output_hash diverges
deterministic_violation: boolean;
notes: string[];
}
export interface DriftReport {
schema_version: number;
run_id: string;
prior_run_id: string | null; // null when no prior run on disk
generated_at: string;
severity: DriftSeverity;
stages: StageDrift[];
// Top-level swings the human reader should see immediately.
flags: string[];
}
export function validateDriftReport(input: unknown): ValidationResult<DriftReport> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) {
return { valid: false, errors: ["expected object"] };
}
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== DRIFT_REPORT_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${DRIFT_REPORT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.run_id, "run_id", errors) && ok;
if (r.prior_run_id !== null && typeof r.prior_run_id !== "string") {
errors.push("prior_run_id: must be string or null");
ok = false;
}
ok = requireIsoTimestamp(r.generated_at, "generated_at", errors) && ok;
if (!["ok", "warn", "alert"].includes(r.severity as string)) {
errors.push(`severity: must be ok|warn|alert, got ${JSON.stringify(r.severity)}`);
ok = false;
}
if (!Array.isArray(r.stages)) {
errors.push("stages: expected array");
ok = false;
}
if (!Array.isArray(r.flags)) {
errors.push("flags: expected array");
ok = false;
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as DriftReport };
}

View File

@ -0,0 +1,116 @@
// EvidenceRecord schema tests.
//
// Two positive fixtures (one per real-source prototype: distilled_facts
// + contract_analyses) and three negative fixtures pinning the
// non-negotiable invariants the spec demands:
// - every record must trace to a source (provenance)
// - schema_version must match — silent v1/v2 drift is the worst kind
// - required identity fields (run_id) cannot be missing
//
// Run with: bun test auditor/schemas/distillation/evidence_record.test.ts
import { test, expect } from "bun:test";
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { validateEvidenceRecord, EVIDENCE_SCHEMA_VERSION } from "./evidence_record";
const FIXTURE_DIR = resolve(import.meta.dir, "fixtures");
function loadFixture(name: string): unknown {
return JSON.parse(readFileSync(resolve(FIXTURE_DIR, name), "utf8"));
}
test("EVIDENCE_SCHEMA_VERSION is 1 — bump deliberately, never silently", () => {
expect(EVIDENCE_SCHEMA_VERSION).toBe(1);
});
test("positive: distilled_fact materialized record validates", () => {
const r = validateEvidenceRecord(loadFixture("evidence_positive_distilled_fact.json"));
if (!r.valid) console.error("unexpected errors:", r.errors);
expect(r.valid).toBe(true);
if (r.valid) {
expect(r.value.run_id).toBe("cae21289");
expect(r.value.model_role).toBe("extractor");
expect(r.value.provenance.source_file).toBe("data/_kb/distilled_facts.jsonl");
}
});
test("positive: contract_analysis materialized record validates with retrieval + observer fields", () => {
const r = validateEvidenceRecord(loadFixture("evidence_positive_contract_analysis.json"));
if (!r.valid) console.error("unexpected errors:", r.errors);
expect(r.valid).toBe(true);
if (r.valid) {
expect(r.value.observer_verdict).toBe("reject");
expect(r.value.observer_confidence).toBe(95);
expect(r.value.retrieved_context?.matrix_corpora?.length).toBe(4);
expect(r.value.failure_markers).toContain("observer_rejected");
}
});
test("negative: missing run_id is rejected with a specific error", () => {
const r = validateEvidenceRecord(loadFixture("evidence_negative_no_run_id.json"));
expect(r.valid).toBe(false);
if (!r.valid) {
expect(r.errors.some(e => e.includes("run_id"))).toBe(true);
}
});
test("negative: schema_version mismatch is rejected (silent v1/v2 drift guard)", () => {
const r = validateEvidenceRecord(loadFixture("evidence_negative_bad_schema_version.json"));
expect(r.valid).toBe(false);
if (!r.valid) {
expect(r.errors.some(e => e.includes("schema_version"))).toBe(true);
}
});
test("negative: bad provenance (non-sha256 sig_hash, non-ISO timestamp) is rejected", () => {
const r = validateEvidenceRecord(loadFixture("evidence_negative_bad_provenance.json"));
expect(r.valid).toBe(false);
if (!r.valid) {
// Must catch BOTH the sig_hash AND the recorded_at — comprehensive
// error reporting is part of the contract.
expect(r.errors.some(e => e.includes("sig_hash"))).toBe(true);
expect(r.errors.some(e => e.includes("recorded_at"))).toBe(true);
}
});
test("negative: non-object input is rejected with clear error", () => {
const r = validateEvidenceRecord("not an object");
expect(r.valid).toBe(false);
if (!r.valid) {
expect(r.errors[0]).toContain("expected object");
}
});
test("negative: human_override with invalid decision is rejected", () => {
const fixture = loadFixture("evidence_positive_distilled_fact.json") as Record<string, unknown>;
fixture.human_override = {
overrider: "test-user",
decision: "maybe", // invalid — must be accept|reject|needs_review
reason: "test",
overridden_at: "2026-04-26T22:30:00.000Z",
};
const r = validateEvidenceRecord(fixture);
expect(r.valid).toBe(false);
if (!r.valid) {
expect(r.errors.some(e => e.includes("human_override.decision"))).toBe(true);
}
});
test("positive: human_override = null is allowed (explicitly no override)", () => {
const fixture = loadFixture("evidence_positive_distilled_fact.json") as Record<string, unknown>;
fixture.human_override = null;
const r = validateEvidenceRecord(fixture);
expect(r.valid).toBe(true);
});
test("negative: observer_confidence outside [0, 100] is rejected", () => {
const fixture = loadFixture("evidence_positive_contract_analysis.json") as Record<string, unknown>;
fixture.observer_confidence = 150;
const r = validateEvidenceRecord(fixture);
expect(r.valid).toBe(false);
if (!r.valid) {
expect(r.errors.some(e => e.includes("observer_confidence"))).toBe(true);
}
});

View File

@ -0,0 +1,202 @@
// EvidenceRecord — the unified per-execution-trace record that the
// Evidence View emits and the Success Scorer reads.
//
// Derived from now.md spec + reconciliation of two existing prototypes:
// - distilled_facts.jsonl / distilled_procedures.jsonl (LLM-extracted
// text with run_id + sig_hash + extractor + verifier + embedding)
// - contract_analyses.jsonl (observer integration + retrieval
// telemetry + cost + duration)
//
// Required fields are the ones every record MUST have for traceability:
// run_id, task_id, timestamp, schema_version, provenance. Everything
// else is typed-but-optional because no single source has all of them
// — the Evidence View materializes them by JOINing across streams when
// the source data is present.
//
// schema_version starts at 1 and gets bumped on breaking changes.
// Validators MUST check schema_version and refuse unknown values so a
// future v2 reader doesn't silently accept v1 records (or vice versa).
import {
ValidationResult, Provenance,
requireString, requireNumber, requireIsoTimestamp, requireProvenance, requireStringArray,
} from "./types";
export const EVIDENCE_SCHEMA_VERSION = 1;
export type ModelRole =
| "executor" // produced the answer (e.g. scrum reviewer, mode runner LLM call)
| "reviewer" // judged an executor output (e.g. observer, hand-review)
| "extractor" // pulled structured data from text (e.g. fact_extractor)
| "verifier" // confirmed/rejected an extracted claim (verifier in distilled_*)
| "categorizer" // assigned a category (categorizer in distilled_*)
| "tiebreaker" // resolved a consensus split
| "applier" // landed code (scrum_applier)
| "embedder" // produced embeddings
| "other";
export interface EvidenceRecord {
// ── Identity ──
// run_id ties this record to a specific execution. Sources use it
// inconsistently (some stream-level, some per-call). The Evidence
// View canonicalizes to per-call; if the source is stream-level,
// synthesize as `${stream_run_id}:${row_index}`.
run_id: string;
// task_id groups records by logical task (e.g. one PR = one task_id
// across multiple per-call runs). Defaults to run_id when no group
// exists — never null.
task_id: string;
// ISO 8601 of when the EXECUTION happened, not when this record was
// materialized. Use the source row's timestamp; provenance carries
// the materialization time separately.
timestamp: string;
schema_version: number;
// ── Provenance ── (required — no record without source linkage)
provenance: Provenance;
// ── Model attribution (optional) ──
model_name?: string; // e.g. "kimi-k2:1t", "gpt-oss:120b"
model_provider?: string; // e.g. "ollama_cloud", "openrouter", "ollama"
model_role?: ModelRole;
// ── Content hashes (optional) ──
// sha256 of the full input prompt and full output content. Pre-
// computed so the Evidence Index can dedup across re-runs of the
// same prompt without re-hashing.
input_hash?: string;
output_hash?: string;
// ── Repo + execution context ──
source_files?: string[]; // files the run touched/read
commands_run?: string[]; // shell commands or tool calls fired
retrieved_context?: { // what the model saw via retrieval
matrix_corpora?: string[];
matrix_hits?: number;
matrix_chunks_kept?: number;
matrix_chunks_dropped?: number;
pathway_fingerprints_seen?: number;
};
// ── Observer + scratchpad ──
observer_notes?: string[]; // observer.review() free-form notes
observer_verdict?: "accept" | "reject" | "cycle" | string;
observer_confidence?: number; // 0-100
scratchpad_summary?: string; // tree-split scratchpad text or hash ref
// ── Outcome markers ──
// Both arrays exist because a run can have multiple succeeded gates
// AND multiple failed gates simultaneously. Empty arrays are valid;
// missing arrays are also valid (means "no evidence either way").
success_markers?: string[]; // e.g. "cargo_green", "tests_passed", "anchor_grounded"
failure_markers?: string[]; // e.g. "warning_count_up", "rationale_mismatch", "consensus_split"
// ── Validation telemetry ──
validation_results?: {
grounded_fraction?: number; // mode_compare grounding %
schema_valid?: boolean;
pathway_replay_succeeded?: boolean;
[key: string]: unknown;
};
// ── Human-in-loop ──
human_override?: {
overrider: string; // user identifier
decision: "accept" | "reject" | "needs_review";
reason: string;
overridden_at: string; // ISO 8601
} | null;
// ── Performance ──
cost_usd?: number;
latency_ms?: number;
prompt_tokens?: number;
completion_tokens?: number;
// ── Free-form text content (the actual run output) ──
// Optional because some sources are pure metadata (auto_apply.jsonl)
// and have no text payload. Present for distilled_*, contract_analyses,
// mode_experiments, scrum_reviews etc.
text?: string;
// ── Domain-specific metadata bucket ──
// Source-specific fields that don't earn a top-level slot. e.g.
// contract_analyses rows carry `contractor` here; mode_experiments
// could carry `corpus_set`. Typed scalar values only — keep this
// small or it becomes a junk drawer. Added 2026-04-27 (Kimi audit
// flagged `(ev as any).contractor` schema bypass at export_sft.ts:126).
metadata?: Record<string, string | number | boolean>;
}
export function validateEvidenceRecord(input: unknown): ValidationResult<EvidenceRecord> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) {
return { valid: false, errors: ["expected object, got " + (input === null ? "null" : typeof input)] };
}
const r = input as Record<string, unknown>;
// Required
let ok = true;
ok = requireString(r.run_id, "run_id", errors) && ok;
ok = requireString(r.task_id, "task_id", errors) && ok;
ok = requireIsoTimestamp(r.timestamp, "timestamp", errors) && ok;
ok = requireProvenance(r.provenance, "provenance", errors) && ok;
if (r.schema_version !== EVIDENCE_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${EVIDENCE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
// Optional but typed-when-present
if (r.model_role !== undefined) {
const valid: ModelRole[] = ["executor", "reviewer", "extractor", "verifier", "categorizer", "tiebreaker", "applier", "embedder", "other"];
if (!valid.includes(r.model_role as ModelRole)) {
errors.push(`model_role: must be one of ${valid.join("|")}, got ${JSON.stringify(r.model_role)}`);
ok = false;
}
}
if (r.input_hash !== undefined && !/^[0-9a-f]{64}$/.test(String(r.input_hash))) {
errors.push("input_hash: must be hex sha256 when present");
ok = false;
}
if (r.output_hash !== undefined && !/^[0-9a-f]{64}$/.test(String(r.output_hash))) {
errors.push("output_hash: must be hex sha256 when present");
ok = false;
}
if (r.source_files !== undefined && !requireStringArray(r.source_files, "source_files", errors)) ok = false;
if (r.commands_run !== undefined && !requireStringArray(r.commands_run, "commands_run", errors)) ok = false;
if (r.success_markers !== undefined && !requireStringArray(r.success_markers, "success_markers", errors)) ok = false;
if (r.failure_markers !== undefined && !requireStringArray(r.failure_markers, "failure_markers", errors)) ok = false;
if (r.observer_notes !== undefined && !requireStringArray(r.observer_notes, "observer_notes", errors)) ok = false;
if (r.observer_confidence !== undefined) {
if (!requireNumber(r.observer_confidence, "observer_confidence", errors)) ok = false;
else if ((r.observer_confidence as number) < 0 || (r.observer_confidence as number) > 100) {
errors.push("observer_confidence: must be in [0, 100]");
ok = false;
}
}
if (r.human_override !== undefined && r.human_override !== null) {
const ho = r.human_override as Record<string, unknown>;
if (typeof ho !== "object") {
errors.push("human_override: expected object or null");
ok = false;
} else {
ok = requireString(ho.overrider, "human_override.overrider", errors) && ok;
ok = requireString(ho.reason, "human_override.reason", errors) && ok;
ok = requireIsoTimestamp(ho.overridden_at, "human_override.overridden_at", errors) && ok;
if (!["accept", "reject", "needs_review"].includes(ho.decision as string)) {
errors.push(`human_override.decision: must be accept|reject|needs_review`);
ok = false;
}
}
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as EvidenceRecord };
}

View File

@ -0,0 +1,11 @@
{
"run_id": "cae21289",
"task_id": "team_runs:637",
"timestamp": "2026-04-23T09:54:40.729599Z",
"schema_version": 1,
"provenance": {
"source_file": "data/_kb/distilled_facts.jsonl",
"sig_hash": "not-a-real-sha256",
"recorded_at": "yesterday"
}
}

View File

@ -0,0 +1,11 @@
{
"run_id": "cae21289",
"task_id": "team_runs:637",
"timestamp": "2026-04-23T09:54:40.729599Z",
"schema_version": 99,
"provenance": {
"source_file": "data/_kb/distilled_facts.jsonl",
"sig_hash": "21a809e2dc43dfae0000000000000000000000000000000000000000deadbeef",
"recorded_at": "2026-04-26T22:30:00.000Z"
}
}

View File

@ -0,0 +1,11 @@
{
"task_id": "team_runs:637",
"timestamp": "2026-04-23T09:54:40.729599Z",
"schema_version": 1,
"provenance": {
"source_file": "data/_kb/distilled_facts.jsonl",
"sig_hash": "21a809e2dc43dfae0000000000000000000000000000000000000000deadbeef",
"recorded_at": "2026-04-26T22:30:00.000Z"
},
"text": "missing run_id should fail validation"
}

View File

@ -0,0 +1,27 @@
{
"run_id": "contract_analysis:101078392:1777250758717",
"task_id": "permit:101078392",
"timestamp": "2026-04-25T23:45:58.717Z",
"schema_version": 1,
"provenance": {
"source_file": "data/_kb/contract_analyses.jsonl",
"line_offset": 0,
"sig_hash": "f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1f1",
"recorded_at": "2026-04-26T22:30:00.000Z"
},
"model_name": "kimi-k2:1t",
"model_role": "executor",
"model_provider": "ollama_cloud",
"retrieved_context": {
"matrix_corpora": ["entity_brief_v1", "chicago_permits_v1", "distilled_procedural_v20260423102847", "sec_tickers_v1"],
"matrix_hits": 10
},
"observer_notes": ["contractor history shows 0 prior fills in Chicago downtown zone"],
"observer_verdict": "reject",
"observer_confidence": 95,
"success_markers": ["matrix_hits_above_threshold"],
"failure_markers": ["observer_rejected"],
"cost_usd": 0.0002,
"latency_ms": 25419,
"text": "Permit 101078392 contractor ANTHONY FIORE — analysis: insufficient prior performance signal; recommend escalation."
}

View File

@ -0,0 +1,19 @@
{
"run_id": "cae21289",
"task_id": "team_runs:637",
"timestamp": "2026-04-23T09:54:40.729599Z",
"schema_version": 1,
"provenance": {
"source_file": "data/_kb/distilled_facts.jsonl",
"line_offset": 0,
"sig_hash": "21a809e2dc43dfae0000000000000000000000000000000000000000deadbeef",
"recorded_at": "2026-04-26T22:30:00.000Z"
},
"model_name": "qwen2.5:latest",
"model_role": "extractor",
"model_provider": "ollama",
"text": "Convergence refers to the system stabilizing into a state of high performance with low variance across iterations.",
"validation_results": {
"schema_valid": true
}
}

View File

@ -0,0 +1,56 @@
// ModelLedgerEntry — aggregate per-task-type-per-model performance.
// Built by aggregating mode_experiments.jsonl + model_trust.jsonl.
// Updated rather than appended — one row per (model_name, task_type)
// representing latest aggregates.
import {
ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireStringArray,
} from "./types";
export const MODEL_LEDGER_SCHEMA_VERSION = 1;
export interface ModelLedgerEntry {
schema_version: number;
model_name: string;
model_provider: string;
task_type: string;
success_rate: number; // [0, 1]
failure_modes: string[]; // top failure mode tags
best_partner_model?: string; // pairs well with X (consensus / tie-break)
escalation_role?: string; // when this model gets escalated TO (or FROM)
cost_usd_p50?: number;
latency_ms_p50?: number;
latency_ms_p95?: number;
context_window?: number;
sample_count: number;
last_updated: string; // ISO 8601
notes?: string;
}
export function validateModelLedgerEntry(input: unknown): ValidationResult<ModelLedgerEntry> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== MODEL_LEDGER_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${MODEL_LEDGER_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.model_name, "model_name", errors) && ok;
ok = requireString(r.model_provider, "model_provider", errors) && ok;
ok = requireString(r.task_type, "task_type", errors) && ok;
ok = requireIsoTimestamp(r.last_updated, "last_updated", errors) && ok;
ok = requireStringArray(r.failure_modes, "failure_modes", errors) && ok;
if (!requireNumber(r.success_rate, "success_rate", errors)) ok = false;
else if ((r.success_rate as number) < 0 || (r.success_rate as number) > 1) {
errors.push("success_rate: must be in [0, 1]"); ok = false;
}
if (!requireNumber(r.sample_count, "sample_count", errors)) ok = false;
else if ((r.sample_count as number) < 1 || !Number.isInteger(r.sample_count)) {
errors.push("sample_count: must be positive integer (no aggregate from zero samples)"); ok = false;
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as ModelLedgerEntry };
}

View File

@ -0,0 +1,68 @@
// Playbook — procedural knowledge extracted from accepted/partially-
// accepted runs. Different from pathway_memory's bug_fingerprints (which
// are pattern-detectors) — playbooks describe HOW to handle a task type.
import {
ValidationResult, requireString, requireIsoTimestamp, requireProvenance, requireStringArray,
} from "./types";
export const PLAYBOOK_SCHEMA_VERSION = 1;
export interface Playbook {
schema_version: number;
playbook_id: string;
task_type: string; // e.g. "scrum_review", "pr_audit", "staffing.fill"
problem_pattern: string; // when does this playbook apply?
useful_context: string[]; // what to retrieve before running
model_routing_path: string[]; // ordered model attempts that worked
commands_worked: string[];
commands_failed: string[];
validation_steps: string[];
repo_files_touched: string[];
recovery_strategy: string; // what to do when the path fails
known_failure_modes: string[];
escalation_threshold: string; // when to switch to a stronger model
acceptance_criteria: string[]; // how to know it succeeded
source_run_ids: string[]; // FK to EvidenceRecord.run_id (provenance — every playbook traces to source)
created_at: string;
provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
}
export function validatePlaybook(input: unknown): ValidationResult<Playbook> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== PLAYBOOK_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${PLAYBOOK_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.playbook_id, "playbook_id", errors) && ok;
ok = requireString(r.task_type, "task_type", errors) && ok;
ok = requireString(r.problem_pattern, "problem_pattern", errors) && ok;
ok = requireString(r.recovery_strategy, "recovery_strategy", errors) && ok;
ok = requireString(r.escalation_threshold, "escalation_threshold", errors) && ok;
ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok;
ok = requireStringArray(r.useful_context, "useful_context", errors) && ok;
ok = requireStringArray(r.model_routing_path, "model_routing_path", errors) && ok;
ok = requireStringArray(r.commands_worked, "commands_worked", errors) && ok;
ok = requireStringArray(r.commands_failed, "commands_failed", errors) && ok;
ok = requireStringArray(r.validation_steps, "validation_steps", errors) && ok;
ok = requireStringArray(r.repo_files_touched, "repo_files_touched", errors) && ok;
ok = requireStringArray(r.known_failure_modes, "known_failure_modes", errors) && ok;
ok = requireStringArray(r.acceptance_criteria, "acceptance_criteria", errors) && ok;
ok = requireStringArray(r.source_run_ids, "source_run_ids", errors) && ok;
if (Array.isArray(r.source_run_ids) && r.source_run_ids.length === 0) {
errors.push("source_run_ids: must be non-empty — every playbook traces to source evidence (spec non-negotiable)");
ok = false;
}
if (Array.isArray(r.acceptance_criteria) && r.acceptance_criteria.length === 0) {
errors.push("acceptance_criteria: must be non-empty — every playbook needs success criteria (spec non-negotiable)");
ok = false;
}
ok = requireProvenance(r.provenance, "provenance", errors) && ok;
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as Playbook };
}

View File

@ -0,0 +1,60 @@
// PreferenceSample — entry in exports/preference/chosen_rejected.jsonl.
// Source: real disagreements (audit_discrepancies, scrum ladder retries).
// Validator pins: chosen != rejected, both source_run_ids present, reason
// is non-empty. No synthesized preferences.
import {
ValidationResult, requireString, requireIsoTimestamp, requireProvenance,
} from "./types";
export const PREFERENCE_SAMPLE_SCHEMA_VERSION = 1;
export interface PreferenceSample {
schema_version: number;
id: string;
prompt: string;
chosen: string;
rejected: string;
reason: string; // why chosen > rejected — must be non-empty
chosen_run_id: string;
rejected_run_id: string;
created_at: string;
provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
}
export function validatePreferenceSample(input: unknown): ValidationResult<PreferenceSample> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== PREFERENCE_SAMPLE_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${PREFERENCE_SAMPLE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.id, "id", errors) && ok;
ok = requireString(r.prompt, "prompt", errors) && ok;
ok = requireString(r.chosen, "chosen", errors) && ok;
ok = requireString(r.rejected, "rejected", errors) && ok;
ok = requireString(r.reason, "reason", errors) && ok;
ok = requireString(r.chosen_run_id, "chosen_run_id", errors) && ok;
ok = requireString(r.rejected_run_id, "rejected_run_id", errors) && ok;
ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok;
ok = requireProvenance(r.provenance, "provenance", errors) && ok;
// Self-pairing guard.
if (r.chosen === r.rejected && typeof r.chosen === "string") {
errors.push("chosen and rejected must differ — preference data needs a real disagreement");
ok = false;
}
if (r.chosen_run_id === r.rejected_run_id && typeof r.chosen_run_id === "string") {
errors.push("chosen_run_id and rejected_run_id must differ — same run can't disagree with itself");
ok = false;
}
if (typeof r.reason === "string" && (r.reason as string).trim().length === 0) {
errors.push("reason: must be non-whitespace (every preference needs WHY chosen > rejected)");
ok = false;
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as PreferenceSample };
}

View File

@ -0,0 +1,72 @@
// RagSample — entry in exports/rag/playbooks.jsonl. Spec shape exactly,
// plus provenance + success_score (so the index can re-rank by quality).
import {
ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireProvenance, requireStringArray,
} from "./types";
export const RAG_SAMPLE_SCHEMA_VERSION = 1;
// Allowed source_category values. RAG accepts accepted/partial freely;
// needs_human_review is opt-in (must be tagged so consumers can filter
// it out for SFT).
export const RAG_ALLOWED_CATEGORIES = ["accepted", "partially_accepted", "needs_human_review"] as const;
export type RagSourceCategory = (typeof RAG_ALLOWED_CATEGORIES)[number];
export interface RagSample {
schema_version: number;
id: string;
title: string;
content: string;
tags: string[];
source_run_id: string;
// Snapshot of the score the source carried at export time. Lets a
// consumer see "this was partial" without re-reading scored-runs.
success_score: RagSourceCategory;
// Same value as success_score by spec (now.md asks for both fields).
// Kept distinct so future schemas can diverge them (e.g. an
// "is_review_material" flag) without breaking old consumers.
source_category: RagSourceCategory;
embedding_text: string; // the text to embed (often == content but can be shorter)
created_at: string;
provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
}
export function validateRagSample(input: unknown): ValidationResult<RagSample> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== RAG_SAMPLE_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${RAG_SAMPLE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.id, "id", errors) && ok;
ok = requireString(r.title, "title", errors) && ok;
ok = requireString(r.content, "content", errors) && ok;
ok = requireString(r.embedding_text, "embedding_text", errors) && ok;
ok = requireString(r.source_run_id, "source_run_id", errors) && ok;
ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok;
ok = requireStringArray(r.tags, "tags", errors) && ok;
ok = requireProvenance(r.provenance, "provenance", errors) && ok;
if (!RAG_ALLOWED_CATEGORIES.includes(r.success_score as RagSourceCategory)) {
errors.push(`success_score: must be one of ${RAG_ALLOWED_CATEGORIES.join("|")} (rejected never enters RAG)`);
ok = false;
}
if (!RAG_ALLOWED_CATEGORIES.includes(r.source_category as RagSourceCategory)) {
errors.push(`source_category: must be one of ${RAG_ALLOWED_CATEGORIES.join("|")}`);
ok = false;
}
if (r.success_score !== r.source_category) {
errors.push("success_score and source_category must match (mirrored fields per spec)");
ok = false;
}
if (typeof r.content === "string" && (r.content as string).trim().length === 0) {
errors.push("content: must be non-whitespace");
ok = false;
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as RagSample };
}

View File

@ -0,0 +1,286 @@
// Real-data validation test — proves the EvidenceRecord schema fits
// what we ALREADY produce, with the minimum transformation each source
// stream requires. Doubles as the stale-extraction probe: if
// distilled_facts.jsonl rows can't materialize, we know that stream
// has rotted and Phase 2 sources from elsewhere.
//
// Strategy:
// 1. Read first N rows from each source jsonl (skip if missing)
// 2. Apply minimal transformer: add schema_version + provenance,
// synthesize run_id/task_id when source doesn't carry them
// 3. Validate each materialized record
// 4. Tally pass/fail per source + collect failure reasons
//
// This file is allowed to skip when source files don't exist (fresh
// clone), so it acts as both a CI guard and a real-environment probe.
import { test, expect } from "bun:test";
import { existsSync, readFileSync } from "node:fs";
import { resolve } from "node:path";
import {
validateEvidenceRecord, EVIDENCE_SCHEMA_VERSION, EvidenceRecord, ModelRole,
} from "./evidence_record";
const ROOT = "/home/profit/lakehouse";
const SAMPLE_PER_SOURCE = 10;
interface SourceProbe {
source_file: string;
transform: (row: any, lineNo: number) => Partial<EvidenceRecord> | null;
}
// Canonical 64-char synthetic sha256 for tests where the source row
// lacks one. Pretends the materializer would compute it via
// canonicalSha256(orderedKeys(row)) at Phase 2 time. We use a fixed
// value here to keep the test deterministic; real materialization
// re-hashes per row.
const PLACEHOLDER_SHA = "0000000000000000000000000000000000000000000000000000000000000000";
const RECORDED = "2026-04-26T22:30:00.000Z";
function provFor(source_file: string, lineNo: number, sigHashRaw?: string): EvidenceRecord["provenance"] {
// Pad shorter hashes (distilled_* uses 16-char) to 64 — mimics
// canonical recompute.
const sig = sigHashRaw && /^[0-9a-f]+$/.test(sigHashRaw)
? sigHashRaw.padEnd(64, "0").slice(0, 64)
: PLACEHOLDER_SHA;
return {
source_file: source_file.replace(`${ROOT}/`, ""),
line_offset: lineNo,
sig_hash: sig,
recorded_at: RECORDED,
};
}
const PROBES: SourceProbe[] = [
{
source_file: `${ROOT}/data/_kb/distilled_facts.jsonl`,
transform: (row: any, lineNo: number) => ({
run_id: String(row.run_id ?? `distilled_facts:${lineNo}`),
task_id: String(row.source_label ?? `distilled_facts:${lineNo}`),
timestamp: row.created_at,
schema_version: EVIDENCE_SCHEMA_VERSION,
provenance: provFor(`${ROOT}/data/_kb/distilled_facts.jsonl`, lineNo, row.sig_hash),
model_name: row.extractor,
model_role: "extractor" as ModelRole,
model_provider: "ollama",
text: row.text,
}),
},
{
source_file: `${ROOT}/data/_kb/distilled_procedures.jsonl`,
transform: (row: any, lineNo: number) => ({
run_id: String(row.run_id ?? `distilled_procedures:${lineNo}`),
task_id: String(row.source_label ?? `distilled_procedures:${lineNo}`),
timestamp: row.created_at,
schema_version: EVIDENCE_SCHEMA_VERSION,
provenance: provFor(`${ROOT}/data/_kb/distilled_procedures.jsonl`, lineNo, row.sig_hash),
model_name: row.extractor,
model_role: "extractor" as ModelRole,
model_provider: "ollama",
text: row.text,
}),
},
{
source_file: `${ROOT}/data/_kb/contract_analyses.jsonl`,
transform: (row: any, lineNo: number) => ({
run_id: `contract_analysis:${row.permit_id}:${new Date(row.ts).getTime()}`,
task_id: `permit:${row.permit_id}`,
timestamp: row.ts,
schema_version: EVIDENCE_SCHEMA_VERSION,
provenance: provFor(`${ROOT}/data/_kb/contract_analyses.jsonl`, lineNo),
model_role: "executor" as ModelRole,
retrieved_context: {
matrix_corpora: Object.keys(row.matrix_corpora ?? {}),
matrix_hits: row.matrix_hits,
},
observer_notes: row.observer_notes ? [row.observer_notes].flat() : undefined,
observer_verdict: row.observer_verdict,
observer_confidence: row.observer_conf,
success_markers: row.ok ? ["matrix_hits_above_threshold"] : undefined,
failure_markers: !row.ok || row.observer_verdict === "reject" ? ["observer_rejected"] : undefined,
cost_usd: typeof row.cost === "number" ? row.cost / 1_000_000 : undefined,
latency_ms: row.duration_ms,
text: row.analysis,
}),
},
{
source_file: `${ROOT}/data/_kb/mode_experiments.jsonl`,
transform: (row: any, lineNo: number) => ({
run_id: `mode_exec:${new Date(row.ts).getTime()}:${row.file_path ?? "?"}`,
task_id: row.task_class,
timestamp: row.ts,
schema_version: EVIDENCE_SCHEMA_VERSION,
provenance: provFor(`${ROOT}/data/_kb/mode_experiments.jsonl`, lineNo),
model_name: row.model,
model_role: "executor" as ModelRole,
model_provider: row.model?.includes("/") ? "openrouter" : "ollama_cloud",
retrieved_context: {
matrix_corpora: row.sources?.matrix_corpus,
matrix_chunks_kept: row.sources?.matrix_chunks_kept,
matrix_chunks_dropped: row.sources?.matrix_chunks_dropped,
pathway_fingerprints_seen: row.sources?.bug_fingerprints_count,
},
latency_ms: row.latency_ms,
text: row.response,
source_files: row.file_path ? [row.file_path] : undefined,
}),
},
{
source_file: `${ROOT}/data/_kb/scrum_reviews.jsonl`,
transform: (row: any, lineNo: number) => ({
run_id: `scrum:${new Date(row.reviewed_at).getTime()}:${row.file}`,
task_id: `scrum_review:${row.file}`,
timestamp: row.reviewed_at,
schema_version: EVIDENCE_SCHEMA_VERSION,
provenance: provFor(`${ROOT}/data/_kb/scrum_reviews.jsonl`, lineNo),
model_name: row.accepted_model,
model_role: "executor" as ModelRole,
source_files: [row.file],
success_markers: row.accepted_on_attempt ? [`accepted_on_attempt_${row.accepted_on_attempt}`] : undefined,
text: row.suggestions_preview,
}),
},
{
source_file: `${ROOT}/data/_kb/observer_escalations.jsonl`,
transform: (row: any, lineNo: number) => ({
run_id: `obs_esc:${new Date(row.ts).getTime()}:${row.sig_hash}`,
task_id: `observer_escalation:${row.cluster_endpoint ?? "?"}`,
timestamp: row.ts,
schema_version: EVIDENCE_SCHEMA_VERSION,
provenance: provFor(`${ROOT}/data/_kb/observer_escalations.jsonl`, lineNo, row.sig_hash),
model_role: "reviewer" as ModelRole,
prompt_tokens: row.prompt_tokens,
completion_tokens: row.completion_tokens,
text: row.analysis,
}),
},
{
source_file: `${ROOT}/data/_kb/audit_facts.jsonl`,
transform: (row: any, lineNo: number) => ({
run_id: `audit_facts:${row.head_sha}:${lineNo}`,
task_id: `pr:${row.pr_number}`,
timestamp: row.extracted_at,
schema_version: EVIDENCE_SCHEMA_VERSION,
provenance: provFor(`${ROOT}/data/_kb/audit_facts.jsonl`, lineNo),
model_name: row.extractor,
model_role: "extractor" as ModelRole,
// facts/entities/relationships go into text as a JSON dump for now;
// structured handling lives in Phase 2 where we map to specific
// EvidenceRecord substructures.
text: JSON.stringify({
facts: row.facts?.length ?? 0,
entities: row.entities?.length ?? 0,
relationships: row.relationships?.length ?? 0,
}),
}),
},
];
interface ProbeResult {
source_file: string;
rows_attempted: number;
rows_present: boolean;
passed: number;
failed: number;
failure_reasons: string[]; // unique error strings, top 5
}
const RESULTS: ProbeResult[] = [];
for (const probe of PROBES) {
const sourceLabel = probe.source_file.replace(`${ROOT}/`, "");
test(`real-data: ${sourceLabel}`, () => {
const result: ProbeResult = {
source_file: sourceLabel,
rows_attempted: 0,
rows_present: false,
passed: 0,
failed: 0,
failure_reasons: [],
};
if (!existsSync(probe.source_file)) {
RESULTS.push(result);
// Skip silently — fresh clones won't have these files
return;
}
result.rows_present = true;
const lines = readFileSync(probe.source_file, "utf8").split("\n").filter(Boolean).slice(0, SAMPLE_PER_SOURCE);
const reasons = new Set<string>();
for (let i = 0; i < lines.length; i++) {
result.rows_attempted++;
let row: unknown;
try { row = JSON.parse(lines[i]); }
catch { continue; }
const transformed = probe.transform(row, i);
if (!transformed) continue;
const v = validateEvidenceRecord(transformed);
if (v.valid) result.passed++;
else {
result.failed++;
for (const e of v.errors) reasons.add(e);
}
}
result.failure_reasons = Array.from(reasons).slice(0, 5);
RESULTS.push(result);
// Test passes as long as we attempted something and got a result.
// Per-source pass/fail counts are reported in the markdown writeup.
expect(result.rows_attempted).toBeGreaterThanOrEqual(0);
});
}
test("real-data: emit markdown report", () => {
const md: string[] = [];
md.push("# Real-data validation report");
md.push("");
md.push("Schema = EvidenceRecord v" + EVIDENCE_SCHEMA_VERSION + ". Sample = first " + SAMPLE_PER_SOURCE + " rows per source.");
md.push("");
md.push("| Source | Present | Rows | Pass | Fail | Pass% |");
md.push("|---|---|---|---|---|---|");
for (const r of RESULTS) {
const pct = r.rows_attempted > 0 ? Math.round(100 * r.passed / r.rows_attempted) + "%" : "—";
md.push(`| ${r.source_file} | ${r.rows_present ? "✓" : "—"} | ${r.rows_attempted} | ${r.passed} | ${r.failed} | ${pct} |`);
}
md.push("");
let hasFailures = false;
for (const r of RESULTS) {
if (r.failed > 0) {
hasFailures = true;
md.push(`## Failures in ${r.source_file}`);
for (const reason of r.failure_reasons) md.push(`- \`${reason}\``);
md.push("");
}
}
if (!hasFailures) {
md.push("**No failures across all probed sources.** Every materialized record validates against EvidenceRecord v1.");
md.push("");
}
// Stale extraction probe: explicit pass/fail
const distilledFacts = RESULTS.find(r => r.source_file.endsWith("distilled_facts.jsonl"));
const distilledProc = RESULTS.find(r => r.source_file.endsWith("distilled_procedures.jsonl"));
md.push("## Stale-extraction probe");
md.push("");
if (distilledFacts && distilledFacts.rows_present && distilledFacts.passed > 0) {
md.push(`- **distilled_facts.jsonl:** ${distilledFacts.passed}/${distilledFacts.rows_attempted} materialize cleanly. Stream is alive at the schema level.`);
} else if (distilledFacts && !distilledFacts.rows_present) {
md.push(`- **distilled_facts.jsonl:** missing — stale or never produced. Phase 2 sources from live streams instead.`);
} else {
md.push(`- **distilled_facts.jsonl:** present but materialization failures; treat as suspect, prefer mode_experiments + scrum_reviews.`);
}
if (distilledProc && distilledProc.rows_present && distilledProc.passed > 0) {
md.push(`- **distilled_procedures.jsonl:** ${distilledProc.passed}/${distilledProc.rows_attempted} materialize cleanly.`);
}
md.push("");
// Write the markdown to a stable path and stdout
const out = md.join("\n");
Bun.write(`${ROOT}/data/_kb/realdata_validation_report.md`, out);
console.log("\n" + out);
});

View File

@ -0,0 +1,111 @@
// Receipt — per-pipeline-stage record with everything needed to
// reproduce the run. Spec non-negotiable: substantive receipts, not
// "ran successfully". Every field below has a deterministic source so
// the receipt schema validator catches "I forgot to fill it in" the
// same way it catches type errors.
import {
ValidationResult, requireString, requireNumber, requireIsoTimestamp,
} from "./types";
export const RECEIPT_SCHEMA_VERSION = 1;
export interface FileReference {
path: string; // relative to repo root
sha256: string; // hex
bytes?: number; // optional but recommended
}
export interface Receipt {
schema_version: number;
command: string; // shell-line or script identifier
git_sha: string; // 40-char hex (full SHA1)
git_branch?: string;
git_dirty?: boolean; // true if working tree had uncommitted changes
started_at: string; // ISO 8601
ended_at: string; // ISO 8601
duration_ms: number;
input_files: FileReference[];
output_files: FileReference[];
record_counts: {
in: number;
out: number;
[key: string]: number; // per-stage extras (filtered, dropped, etc.)
};
validation_pass: boolean; // explicit — never inferred
errors: string[];
warnings: string[];
}
function validateFileRef(v: unknown, field: string, errors: string[]): boolean {
if (typeof v !== "object" || v === null) {
errors.push(`${field}: expected object`);
return false;
}
const f = v as Record<string, unknown>;
let ok = true;
ok = requireString(f.path, `${field}.path`, errors) && ok;
if (typeof f.sha256 !== "string" || !/^[0-9a-f]{64}$/.test(f.sha256)) {
errors.push(`${field}.sha256: must be hex sha256`);
ok = false;
}
if (f.bytes !== undefined && typeof f.bytes !== "number") {
errors.push(`${field}.bytes: expected number when present`);
ok = false;
}
return ok;
}
export function validateReceipt(input: unknown): ValidationResult<Receipt> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) {
return { valid: false, errors: ["expected object"] };
}
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== RECEIPT_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${RECEIPT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.command, "command", errors) && ok;
if (typeof r.git_sha !== "string" || !/^[0-9a-f]{40}$/.test(r.git_sha as string)) {
errors.push("git_sha: must be 40-char hex");
ok = false;
}
ok = requireIsoTimestamp(r.started_at, "started_at", errors) && ok;
ok = requireIsoTimestamp(r.ended_at, "ended_at", errors) && ok;
ok = requireNumber(r.duration_ms, "duration_ms", errors) && ok;
if (typeof r.validation_pass !== "boolean") {
errors.push("validation_pass: must be boolean (explicit, never inferred)");
ok = false;
}
if (!Array.isArray(r.input_files)) {
errors.push("input_files: expected array");
ok = false;
} else {
for (let i = 0; i < r.input_files.length; i++) {
if (!validateFileRef(r.input_files[i], `input_files[${i}]`, errors)) ok = false;
}
}
if (!Array.isArray(r.output_files)) {
errors.push("output_files: expected array");
ok = false;
} else {
for (let i = 0; i < r.output_files.length; i++) {
if (!validateFileRef(r.output_files[i], `output_files[${i}]`, errors)) ok = false;
}
}
if (typeof r.record_counts !== "object" || r.record_counts === null) {
errors.push("record_counts: expected object");
ok = false;
} else {
const rc = r.record_counts as Record<string, unknown>;
if (typeof rc.in !== "number") { errors.push("record_counts.in: expected number"); ok = false; }
if (typeof rc.out !== "number") { errors.push("record_counts.out: expected number"); ok = false; }
}
if (!Array.isArray(r.errors)) { errors.push("errors: expected array"); ok = false; }
if (!Array.isArray(r.warnings)) { errors.push("warnings: expected array"); ok = false; }
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as Receipt };
}

View File

@ -0,0 +1,90 @@
// run_summary.ts — aggregates StageReceipt rows for one run_id.
// Spec field set: total records processed, total accepted/rejected/
// quarantined, dataset sizes, validation status, overall hash of run.
import {
ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireSha256,
} from "./types";
import type { StageName } from "./stage_receipt";
export const RUN_SUMMARY_SCHEMA_VERSION = 1;
export interface RunStageSummary {
stage: StageName;
records_in: number;
records_out: number;
accepted: number;
rejected: number;
quarantined: number;
skipped: number;
passed: boolean;
duration_ms: number;
output_hash: string;
}
export interface RunSummary {
schema_version: number;
run_id: string;
started_at: string; // earliest stage timestamp
ended_at: string; // latest stage timestamp + duration
git_commit: string;
stages: RunStageSummary[];
// Aggregates across stages
total_records_in: number;
total_records_out: number;
total_accepted: number;
total_rejected: number;
total_quarantined: number;
total_skipped: number;
// Dataset sizes — final outputs of each export stage
rag_records: number;
sft_records: number;
preference_pairs: number;
// Pipeline-wide pass = AND of every stage validation.passed
overall_passed: boolean;
// Run-wide hash: sha256 over each stage's output hash, sorted by stage name.
// Detects ANY change in any stage output across runs.
run_hash: string;
total_duration_ms: number;
}
export function validateRunSummary(input: unknown): ValidationResult<RunSummary> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) {
return { valid: false, errors: ["expected object"] };
}
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== RUN_SUMMARY_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${RUN_SUMMARY_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.run_id, "run_id", errors) && ok;
ok = requireIsoTimestamp(r.started_at, "started_at", errors) && ok;
ok = requireIsoTimestamp(r.ended_at, "ended_at", errors) && ok;
if (typeof r.git_commit !== "string" || !/^[0-9a-f]{40}$/.test(r.git_commit as string)) {
errors.push("git_commit: must be 40-char hex");
ok = false;
}
if (typeof r.overall_passed !== "boolean") {
errors.push("overall_passed: must be boolean");
ok = false;
}
ok = requireSha256(r.run_hash, "run_hash", errors) && ok;
for (const k of ["total_records_in", "total_records_out", "total_accepted", "total_rejected",
"total_quarantined", "total_skipped", "rag_records", "sft_records",
"preference_pairs", "total_duration_ms"]) {
if (typeof (r as any)[k] !== "number") {
errors.push(`${k}: expected number`);
ok = false;
}
}
if (!Array.isArray(r.stages)) {
errors.push("stages: expected array");
ok = false;
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as RunSummary };
}

View File

@ -0,0 +1,367 @@
// Combined schema tests for ScoredRun, Receipt, Playbook,
// ScratchpadSummary, ModelLedgerEntry, RagSample, SftSample,
// PreferenceSample. EvidenceRecord lives in its own file because it's
// the foundational schema and warrants the JSON-fixture round-trip
// pattern; the rest use inline fixture makers since they're simpler.
//
// Each schema: 1 positive fixture + 4-5 negative cases pinning the
// non-negotiable invariants from now.md.
//
// Run: bun test auditor/schemas/distillation/schemas.test.ts
import { test, expect } from "bun:test";
import { validateScoredRun, SCORED_RUN_SCHEMA_VERSION } from "./scored_run";
import { validateReceipt, RECEIPT_SCHEMA_VERSION } from "./receipt";
import { validatePlaybook, PLAYBOOK_SCHEMA_VERSION } from "./playbook";
import { validateScratchpadSummary, SCRATCHPAD_SCHEMA_VERSION } from "./scratchpad_summary";
import { validateModelLedgerEntry, MODEL_LEDGER_SCHEMA_VERSION } from "./model_ledger";
import { validateRagSample, RAG_SAMPLE_SCHEMA_VERSION } from "./rag_sample";
import { validateSftSample, SFT_SAMPLE_SCHEMA_VERSION } from "./sft_sample";
import { validatePreferenceSample, PREFERENCE_SAMPLE_SCHEMA_VERSION } from "./preference_sample";
const NOW = "2026-04-26T22:30:00.000Z";
const SHA = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
const GIT_SHA = "f753e11157eef753e11157eef753e11157eef753";
const PROVENANCE = {
source_file: "data/_kb/scored_runs.jsonl",
line_offset: 0,
sig_hash: SHA,
recorded_at: NOW,
};
// ─── ScoredRun ───────────────────────────────────────────────────────
const SCORED_RUN_OK = {
schema_version: SCORED_RUN_SCHEMA_VERSION,
evidence_run_id: "run-abc",
evidence_task_id: "task-abc",
category: "accepted",
reasons: ["cargo_green=true", "anchor_grounding=0.95"],
scored_at: NOW,
scorer_version: "v1.0.0",
sub_scores: { cargo_green: true, anchor_grounding: 0.95 },
provenance: PROVENANCE,
};
test("ScoredRun: positive validates", () => {
const r = validateScoredRun(SCORED_RUN_OK);
if (!r.valid) console.error(r.errors);
expect(r.valid).toBe(true);
});
test("ScoredRun: empty reasons rejected (every score needs a reason)", () => {
const r = validateScoredRun({ ...SCORED_RUN_OK, reasons: [] });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("reasons"))).toBe(true);
});
test("ScoredRun: invalid category rejected", () => {
const r = validateScoredRun({ ...SCORED_RUN_OK, category: "maybe_ok" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("category"))).toBe(true);
});
test("ScoredRun: anchor_grounding > 1 rejected (must be in [0, 1])", () => {
const r = validateScoredRun({ ...SCORED_RUN_OK, sub_scores: { ...SCORED_RUN_OK.sub_scores, anchor_grounding: 1.5 } });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("anchor_grounding"))).toBe(true);
});
// ─── Receipt ─────────────────────────────────────────────────────────
const RECEIPT_OK = {
schema_version: RECEIPT_SCHEMA_VERSION,
command: "bun run scripts/build_evidence_index.ts",
git_sha: GIT_SHA,
git_branch: "scrum/auto-apply-19814",
git_dirty: false,
started_at: NOW,
ended_at: NOW,
duration_ms: 1234,
input_files: [{ path: "data/_kb/scrum_reviews.jsonl", sha256: SHA, bytes: 448000 }],
output_files: [{ path: "data/evidence/2026/04/26/run.jsonl", sha256: SHA }],
record_counts: { in: 100, out: 95, filtered: 5 },
validation_pass: true,
errors: [],
warnings: [],
};
test("Receipt: positive validates", () => {
const r = validateReceipt(RECEIPT_OK);
if (!r.valid) console.error(r.errors);
expect(r.valid).toBe(true);
});
test("Receipt: bad git_sha rejected (must be 40-char hex)", () => {
const r = validateReceipt({ ...RECEIPT_OK, git_sha: "abc123" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("git_sha"))).toBe(true);
});
test("Receipt: validation_pass must be boolean (never inferred)", () => {
const r = validateReceipt({ ...RECEIPT_OK, validation_pass: "yes" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("validation_pass"))).toBe(true);
});
test("Receipt: file refs without proper sha256 rejected", () => {
const r = validateReceipt({ ...RECEIPT_OK, output_files: [{ path: "x", sha256: "short" }] });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("sha256"))).toBe(true);
});
// ─── Playbook ────────────────────────────────────────────────────────
const PLAYBOOK_OK = {
schema_version: PLAYBOOK_SCHEMA_VERSION,
playbook_id: "pb-scrum-review-001",
task_type: "scrum_review",
problem_pattern: "Cargo workspace warning escalation after applier patch",
useful_context: ["pathway memory bug fingerprints for the file area"],
model_routing_path: ["x-ai/grok-4.1-fast"],
commands_worked: ["cargo check --workspace"],
commands_failed: [],
validation_steps: ["warning count must not increase"],
repo_files_touched: ["crates/queryd/src/service.rs"],
recovery_strategy: "git checkout -- file when cargo red",
known_failure_modes: ["unused import noise"],
escalation_threshold: "use kimi-k2:1t when isolation mode rejects 2 attempts",
acceptance_criteria: ["cargo green", "warning count stable", "rationale-diff aligned"],
source_run_ids: ["run-xyz", "run-abc"],
created_at: NOW,
provenance: PROVENANCE,
};
test("Playbook: positive validates", () => {
const r = validatePlaybook(PLAYBOOK_OK);
if (!r.valid) console.error(r.errors);
expect(r.valid).toBe(true);
});
test("Playbook: empty source_run_ids rejected (every playbook traces to source — spec)", () => {
const r = validatePlaybook({ ...PLAYBOOK_OK, source_run_ids: [] });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("source_run_ids"))).toBe(true);
});
test("Playbook: empty acceptance_criteria rejected (every playbook needs success criteria — spec)", () => {
const r = validatePlaybook({ ...PLAYBOOK_OK, acceptance_criteria: [] });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("acceptance_criteria"))).toBe(true);
});
// ─── ScratchpadSummary ───────────────────────────────────────────────
const SCRATCHPAD_OK = {
schema_version: SCRATCHPAD_SCHEMA_VERSION,
run_id: "run-abc",
current_objective: "verify pr_audit mode end-to-end",
completed_steps: ["restart gateway"],
failed_steps: ["cloud chat returned 500"],
pending_steps: ["swap default model"],
important_paths: ["auditor/checks/inference.ts"],
decisions: ["defer kimi-k2 swap until upstream returns"],
unresolved_questions: ["does deepseek match kimi quality?"],
validation_status: "partial",
next_command: "bun run auditor/audit_one.ts 11",
source_scratchpad_hash: SHA,
summarized_at: NOW,
provenance: PROVENANCE,
};
test("ScratchpadSummary: positive validates", () => {
const r = validateScratchpadSummary(SCRATCHPAD_OK);
if (!r.valid) console.error(r.errors);
expect(r.valid).toBe(true);
});
test("ScratchpadSummary: invalid validation_status rejected", () => {
const r = validateScratchpadSummary({ ...SCRATCHPAD_OK, validation_status: "tbd" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("validation_status"))).toBe(true);
});
test("ScratchpadSummary: short scratchpad_hash rejected", () => {
const r = validateScratchpadSummary({ ...SCRATCHPAD_OK, source_scratchpad_hash: "short" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("source_scratchpad_hash"))).toBe(true);
});
// ─── ModelLedgerEntry ────────────────────────────────────────────────
const LEDGER_OK = {
schema_version: MODEL_LEDGER_SCHEMA_VERSION,
model_name: "kimi-k2:1t",
model_provider: "ollama_cloud",
task_type: "pr_audit",
success_rate: 0.85,
failure_modes: ["upstream_500", "context_truncation"],
best_partner_model: "x-ai/grok-4.1-fast",
escalation_role: "primary",
cost_usd_p50: 0.0002,
latency_ms_p50: 50000,
latency_ms_p95: 90000,
context_window: 200000,
sample_count: 47,
last_updated: NOW,
};
test("ModelLedgerEntry: positive validates", () => {
const r = validateModelLedgerEntry(LEDGER_OK);
if (!r.valid) console.error(r.errors);
expect(r.valid).toBe(true);
});
test("ModelLedgerEntry: success_rate > 1 rejected", () => {
const r = validateModelLedgerEntry({ ...LEDGER_OK, success_rate: 1.5 });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("success_rate"))).toBe(true);
});
test("ModelLedgerEntry: zero sample_count rejected (no aggregate from zero)", () => {
const r = validateModelLedgerEntry({ ...LEDGER_OK, sample_count: 0 });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("sample_count"))).toBe(true);
});
// ─── RagSample ───────────────────────────────────────────────────────
const RAG_OK = {
schema_version: RAG_SAMPLE_SCHEMA_VERSION,
id: "rag-pb-001",
title: "Scrum applier rationale-diff alignment",
content: "When the applier emits a patch with rationale claiming X but the diff shows Y, the rationale-token alignment gate catches it...",
tags: ["scrum_review", "applier"],
source_run_id: "run-xyz",
success_score: "accepted",
source_category: "accepted",
embedding_text: "applier rationale-diff alignment guard scrum",
created_at: NOW,
provenance: PROVENANCE,
};
test("RagSample: positive validates", () => {
const r = validateRagSample(RAG_OK);
if (!r.valid) console.error(r.errors);
expect(r.valid).toBe(true);
});
test("RagSample: success_score=rejected forbidden (RAG never takes rejected)", () => {
const r = validateRagSample({ ...RAG_OK, success_score: "rejected", source_category: "rejected" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("success_score"))).toBe(true);
});
test("RagSample: success_score and source_category must match", () => {
const r = validateRagSample({ ...RAG_OK, success_score: "accepted", source_category: "partially_accepted" });
expect(r.valid).toBe(false);
});
test("RagSample: whitespace-only content rejected", () => {
const r = validateRagSample({ ...RAG_OK, content: " \n " });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("content"))).toBe(true);
});
// ─── SftSample (the strict one) ──────────────────────────────────────
const SFT_OK = {
schema_version: SFT_SAMPLE_SCHEMA_VERSION,
id: "sft-pr11-001",
instruction: "Audit this PR diff against ship-claims.",
context: "claims: 3 strong, 2 moderate",
response: "{\"claim_verdicts\": [...]}",
source_run_id: "run-pr11",
quality_score: "accepted",
created_at: NOW,
provenance: PROVENANCE,
};
test("SftSample: positive validates", () => {
const r = validateSftSample(SFT_OK);
if (!r.valid) console.error(r.errors);
expect(r.valid).toBe(true);
});
test("SftSample: quality_score=partially_accepted ACCEPTED (--include-partial path)", () => {
// Phase 4 update: partial allowed at schema layer; CLI gate decides.
const r = validateSftSample({ ...SFT_OK, quality_score: "partially_accepted" });
expect(r.valid).toBe(true);
});
test("SftSample: quality_score=rejected REJECTED (spec non-negotiable, no leak)", () => {
const r = validateSftSample({ ...SFT_OK, quality_score: "rejected" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("quality_score"))).toBe(true);
});
test("SftSample: quality_score=needs_human_review REJECTED (no leak)", () => {
const r = validateSftSample({ ...SFT_OK, quality_score: "needs_human_review" });
expect(r.valid).toBe(false);
});
test("SftSample: missing context rejected (must be string, even if empty)", () => {
const fixture: Record<string, unknown> = { ...SFT_OK };
delete fixture.context;
const r = validateSftSample(fixture);
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("context"))).toBe(true);
});
test("SftSample: empty-string context allowed", () => {
const r = validateSftSample({ ...SFT_OK, context: "" });
expect(r.valid).toBe(true);
});
test("SftSample: empty response rejected (no empty pairs)", () => {
const r = validateSftSample({ ...SFT_OK, response: "" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("response"))).toBe(true);
});
test("SftSample: whitespace-only instruction rejected", () => {
const r = validateSftSample({ ...SFT_OK, instruction: " \t\n " });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("instruction"))).toBe(true);
});
// ─── PreferenceSample ────────────────────────────────────────────────
const PREF_OK = {
schema_version: PREFERENCE_SAMPLE_SCHEMA_VERSION,
id: "pref-task-x-001",
prompt: "Verify claim: 'all 3 services running on matrix-test'",
chosen: "{\"backed\": true, \"evidence\": \"systemctl status confirms 3 active\"}",
rejected: "{\"backed\": true, \"evidence\": \"the README says so\"}",
reason: "chosen cites runtime evidence, rejected cites doc claim only",
chosen_run_id: "run-A",
rejected_run_id: "run-B",
created_at: NOW,
provenance: PROVENANCE,
};
test("PreferenceSample: positive validates", () => {
const r = validatePreferenceSample(PREF_OK);
if (!r.valid) console.error(r.errors);
expect(r.valid).toBe(true);
});
test("PreferenceSample: chosen == rejected rejected (no self-pairing)", () => {
const r = validatePreferenceSample({ ...PREF_OK, chosen: "x", rejected: "x" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("chosen and rejected"))).toBe(true);
});
test("PreferenceSample: chosen_run_id == rejected_run_id rejected (no self-disagreement)", () => {
const r = validatePreferenceSample({ ...PREF_OK, chosen_run_id: "run-A", rejected_run_id: "run-A" });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("chosen_run_id"))).toBe(true);
});
test("PreferenceSample: empty reason rejected (every preference needs WHY)", () => {
const r = validatePreferenceSample({ ...PREF_OK, reason: " " });
expect(r.valid).toBe(false);
if (!r.valid) expect(r.errors.some(e => e.includes("reason"))).toBe(true);
});

View File

@ -0,0 +1,86 @@
// ScoredRun — output of the deterministic Success Scorer (Phase 3).
// Spec mandates 4 categories with explicit reasons; we add scorer
// versioning so a future scorer change is detectable in historical data.
import {
ValidationResult, requireString, requireIsoTimestamp, requireProvenance, requireStringArray, requireNumber,
} from "./types";
export const SCORED_RUN_SCHEMA_VERSION = 1;
export const SCORE_CATEGORIES = ["accepted", "partially_accepted", "rejected", "needs_human_review"] as const;
export type ScoreCategory = (typeof SCORE_CATEGORIES)[number];
export interface ScoredRun {
schema_version: number;
evidence_run_id: string; // FK to EvidenceRecord.run_id
evidence_task_id: string; // FK to EvidenceRecord.task_id
category: ScoreCategory;
reasons: string[]; // human-readable, e.g. ["cargo_green=true", "anchor_grounding<0.7"]
scored_at: string; // ISO 8601
scorer_version: string; // e.g. "v1.0.0" — bumped on scorer code change
// Sub-scores that the scorer collapsed into the category. Persisted
// so a downstream UI can show "why" without re-running the scorer.
sub_scores?: {
cargo_green?: boolean;
anchor_grounding?: number;
schema_valid?: boolean;
pathway_replay_succeeded?: boolean;
observer_verdict?: "accept" | "reject" | "cycle";
[key: string]: unknown;
};
provenance: {
source_file: string;
line_offset?: number;
sig_hash: string;
recorded_at: string;
};
}
export function validateScoredRun(input: unknown): ValidationResult<ScoredRun> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) {
return { valid: false, errors: ["expected object"] };
}
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== SCORED_RUN_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${SCORED_RUN_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.evidence_run_id, "evidence_run_id", errors) && ok;
ok = requireString(r.evidence_task_id, "evidence_task_id", errors) && ok;
ok = requireIsoTimestamp(r.scored_at, "scored_at", errors) && ok;
ok = requireString(r.scorer_version, "scorer_version", errors) && ok;
ok = requireStringArray(r.reasons, "reasons", errors) && ok;
if (Array.isArray(r.reasons) && r.reasons.length === 0) {
errors.push("reasons: must be non-empty (every score must have at least one reason)");
ok = false;
}
if (!SCORE_CATEGORIES.includes(r.category as ScoreCategory)) {
errors.push(`category: must be one of ${SCORE_CATEGORIES.join("|")}, got ${JSON.stringify(r.category)}`);
ok = false;
}
ok = requireProvenance(r.provenance, "provenance", errors) && ok;
if (r.sub_scores !== undefined) {
if (typeof r.sub_scores !== "object" || r.sub_scores === null) {
errors.push("sub_scores: expected object when present");
ok = false;
} else {
const ss = r.sub_scores as Record<string, unknown>;
if (ss.anchor_grounding !== undefined) {
if (!requireNumber(ss.anchor_grounding, "sub_scores.anchor_grounding", errors)) ok = false;
else if ((ss.anchor_grounding as number) < 0 || (ss.anchor_grounding as number) > 1) {
errors.push("sub_scores.anchor_grounding: must be in [0, 1]");
ok = false;
}
}
if (ss.observer_verdict !== undefined && !["accept", "reject", "cycle"].includes(ss.observer_verdict as string)) {
errors.push("sub_scores.observer_verdict: must be accept|reject|cycle");
ok = false;
}
}
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as ScoredRun };
}

View File

@ -0,0 +1,65 @@
// ScratchpadSummary — structured normalization of a tree-split or
// long-running scratchpad. Distinct from EvidenceRecord because a
// scratchpad accumulates across many calls; this schema captures the
// state at a checkpoint moment.
import {
ValidationResult, requireString, requireIsoTimestamp, requireProvenance, requireStringArray,
} from "./types";
export const SCRATCHPAD_SCHEMA_VERSION = 1;
export interface ScratchpadSummary {
schema_version: number;
run_id: string;
current_objective: string;
completed_steps: string[];
failed_steps: string[];
pending_steps: string[];
important_paths: string[]; // file paths the scratchpad references
decisions: string[]; // architectural/scope decisions made
unresolved_questions: string[];
validation_status: "pass" | "fail" | "partial" | "pending";
next_command?: string; // recommendation for next action
source_scratchpad_hash: string; // sha256 of the full source scratchpad text — diff detection
summarized_at: string; // ISO 8601
provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
}
const STATUS = ["pass", "fail", "partial", "pending"];
export function validateScratchpadSummary(input: unknown): ValidationResult<ScratchpadSummary> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== SCRATCHPAD_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${SCRATCHPAD_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.run_id, "run_id", errors) && ok;
ok = requireString(r.current_objective, "current_objective", errors) && ok;
ok = requireIsoTimestamp(r.summarized_at, "summarized_at", errors) && ok;
if (typeof r.source_scratchpad_hash !== "string" || !/^[0-9a-f]{64}$/.test(r.source_scratchpad_hash as string)) {
errors.push("source_scratchpad_hash: must be hex sha256");
ok = false;
}
ok = requireStringArray(r.completed_steps, "completed_steps", errors) && ok;
ok = requireStringArray(r.failed_steps, "failed_steps", errors) && ok;
ok = requireStringArray(r.pending_steps, "pending_steps", errors) && ok;
ok = requireStringArray(r.important_paths, "important_paths", errors) && ok;
ok = requireStringArray(r.decisions, "decisions", errors) && ok;
ok = requireStringArray(r.unresolved_questions, "unresolved_questions", errors) && ok;
if (!STATUS.includes(r.validation_status as string)) {
errors.push(`validation_status: must be one of ${STATUS.join("|")}`);
ok = false;
}
if (r.next_command !== undefined && typeof r.next_command !== "string") {
errors.push("next_command: expected string when present");
ok = false;
}
ok = requireProvenance(r.provenance, "provenance", errors) && ok;
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as ScratchpadSummary };
}

View File

@ -0,0 +1,69 @@
// SftSample — entry in exports/sft/instruction_response.jsonl. Spec
// non-negotiable: ONLY accepted runs, never partial/rejected/needs_human.
// Validator enforces that invariant — exporters can't bypass.
import {
ValidationResult, requireString, requireIsoTimestamp, requireProvenance, requireNumber,
} from "./types";
export const SFT_SAMPLE_SCHEMA_VERSION = 1;
// SFT default: only `accepted` ships. With --include-partial CLI flag,
// `partially_accepted` becomes legal. `rejected` and `needs_human_review`
// NEVER ship to SFT — that's the contamination firewall.
export const SFT_QUALITY_SCORES = ["accepted", "partially_accepted"] as const;
export type SftQualityScore = (typeof SFT_QUALITY_SCORES)[number];
export interface SftSample {
schema_version: number;
id: string;
instruction: string; // the prompt / user message
context: string; // retrieved context that was visible (empty string allowed; null/undefined not)
response: string; // the model output that was accepted
source_run_id: string;
quality_score: SftQualityScore;
created_at: string;
provenance: { source_file: string; line_offset?: number; sig_hash: string; recorded_at: string };
}
export function validateSftSample(input: unknown): ValidationResult<SftSample> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) return { valid: false, errors: ["expected object"] };
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== SFT_SAMPLE_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${SFT_SAMPLE_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.id, "id", errors) && ok;
ok = requireString(r.instruction, "instruction", errors) && ok;
ok = requireString(r.response, "response", errors) && ok;
ok = requireString(r.source_run_id, "source_run_id", errors) && ok;
ok = requireIsoTimestamp(r.created_at, "created_at", errors) && ok;
ok = requireProvenance(r.provenance, "provenance", errors) && ok;
// Empty pair guard.
if (typeof r.instruction === "string" && (r.instruction as string).trim().length === 0) {
errors.push("instruction: must be non-whitespace (no empty pairs)");
ok = false;
}
if (typeof r.response === "string" && (r.response as string).trim().length === 0) {
errors.push("response: must be non-whitespace (no empty pairs)");
ok = false;
}
// Context is required-string but empty is allowed (some SFT samples
// are pure instruction→response with no retrieval context).
if (typeof r.context !== "string") {
errors.push("context: expected string (use empty string for no-context samples)");
ok = false;
}
// The non-negotiable: SFT samples MUST have quality_score in
// SFT_QUALITY_SCORES. Anything else is a leak.
if (!SFT_QUALITY_SCORES.includes(r.quality_score as SftQualityScore)) {
errors.push(`quality_score: must be one of ${SFT_QUALITY_SCORES.join("|")} (no rejected/needs_human leak into SFT — spec non-negotiable). Got ${JSON.stringify(r.quality_score)}`);
ok = false;
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as SftSample };
}

View File

@ -0,0 +1,190 @@
// stage_receipt.ts — forensic-grade per-stage receipt.
//
// Distinct from auditor/schemas/distillation/receipt.ts (Phase 1):
// - Phase 1 Receipt is per-script invocation, format inherited from
// the early auditor wiring
// - StageReceipt (THIS file) matches the now.md Phase 5 spec exactly
// and is the canonical artifact for pipeline observability
//
// Every pipeline stage (collect, score, export-rag, export-sft,
// export-preference, future extract-playbooks/index) emits ONE
// StageReceipt per run. Receipts are joined by `run_id` (shared
// across all stages of a single `run-all` invocation) so a future
// query can aggregate across the whole pipeline.
import {
ValidationResult, requireString, requireNumber, requireIsoTimestamp, requireSha256,
requireStringArray,
} from "./types";
export const STAGE_RECEIPT_SCHEMA_VERSION = 1;
export const STAGE_NAMES = [
"collect", // build_evidence_index — materialize source jsonls → EvidenceRecord
"score", // score_runs — EvidenceRecord → ScoredRun
"export-rag", // exports/rag/playbooks.jsonl
"export-sft", // exports/sft/instruction_response.jsonl
"export-preference",// exports/preference/chosen_rejected.jsonl
// Reserved for future stages — accept them in the schema so a stage
// can be added without bumping schema_version.
"extract-playbooks",
"index",
] as const;
export type StageName = (typeof STAGE_NAMES)[number];
export interface StageFileRef {
path: string; // relative to repo root
sha256: string; // 64-char hex
bytes?: number;
record_count?: number; // line count for jsonl, when meaningful
}
export interface StageIO {
files: StageFileRef[];
record_count: number;
hash: string; // 64-char hex — aggregate over all file hashes (sorted)
}
export interface StageStats {
accepted: number; // rows that ended up in the stage's output
rejected: number; // explicit category=rejected (Score), invalid pairs (Preference), etc.
quarantined: number; // routed to exports/quarantine/* with structured reason
skipped: number; // parse failures, schema violations at write time
}
export interface StageValidation {
passed: boolean; // explicit boolean — never inferred (spec non-negotiable)
errors: string[];
warnings: string[];
}
export interface StageReceipt {
schema_version: number;
run_id: string; // shared across all stages of one pipeline run
stage: StageName;
timestamp: string; // ISO 8601 — stage start
git_commit: string; // 40-char hex
inputs: StageIO;
outputs: StageIO;
stats: StageStats;
validation: StageValidation;
duration_ms: number;
}
function validateStageIO(v: unknown, field: string, errors: string[]): boolean {
if (typeof v !== "object" || v === null) {
errors.push(`${field}: expected object`);
return false;
}
const io = v as Record<string, unknown>;
let ok = true;
if (!Array.isArray(io.files)) {
errors.push(`${field}.files: expected array`);
ok = false;
} else {
for (let i = 0; i < io.files.length; i++) {
const f = io.files[i] as Record<string, unknown>;
if (typeof f !== "object" || f === null) {
errors.push(`${field}.files[${i}]: expected object`);
ok = false;
continue;
}
ok = requireString(f.path, `${field}.files[${i}].path`, errors) && ok;
ok = requireSha256(f.sha256, `${field}.files[${i}].sha256`, errors) && ok;
if (f.bytes !== undefined && typeof f.bytes !== "number") {
errors.push(`${field}.files[${i}].bytes: expected number when present`);
ok = false;
}
if (f.record_count !== undefined && typeof f.record_count !== "number") {
errors.push(`${field}.files[${i}].record_count: expected number when present`);
ok = false;
}
}
}
ok = requireNumber(io.record_count, `${field}.record_count`, errors) && ok;
ok = requireSha256(io.hash, `${field}.hash`, errors) && ok;
return ok;
}
export function validateStageReceipt(input: unknown): ValidationResult<StageReceipt> {
const errors: string[] = [];
if (typeof input !== "object" || input === null) {
return { valid: false, errors: ["expected object"] };
}
const r = input as Record<string, unknown>;
let ok = true;
if (r.schema_version !== STAGE_RECEIPT_SCHEMA_VERSION) {
errors.push(`schema_version: expected ${STAGE_RECEIPT_SCHEMA_VERSION}, got ${JSON.stringify(r.schema_version)}`);
ok = false;
}
ok = requireString(r.run_id, "run_id", errors) && ok;
if (typeof r.run_id === "string" && r.run_id.length < 8) {
errors.push("run_id: too short — expect uuid-like");
ok = false;
}
if (typeof r.stage !== "string" || !STAGE_NAMES.includes(r.stage as StageName)) {
errors.push(`stage: must be one of ${STAGE_NAMES.join("|")}`);
ok = false;
}
ok = requireIsoTimestamp(r.timestamp, "timestamp", errors) && ok;
if (typeof r.git_commit !== "string" || !/^[0-9a-f]{40}$/.test(r.git_commit as string)) {
errors.push("git_commit: must be 40-char hex");
ok = false;
}
if (typeof r.duration_ms !== "number") {
errors.push("duration_ms: expected number");
ok = false;
}
if (typeof r.inputs !== "object" || r.inputs === null) {
errors.push("inputs: expected object");
ok = false;
} else {
ok = validateStageIO(r.inputs, "inputs", errors) && ok;
}
if (typeof r.outputs !== "object" || r.outputs === null) {
errors.push("outputs: expected object");
ok = false;
} else {
ok = validateStageIO(r.outputs, "outputs", errors) && ok;
}
if (typeof r.stats !== "object" || r.stats === null) {
errors.push("stats: expected object");
ok = false;
} else {
const s = r.stats as Record<string, unknown>;
for (const k of ["accepted", "rejected", "quarantined", "skipped"]) {
if (typeof s[k] !== "number") { errors.push(`stats.${k}: expected number`); ok = false; }
}
}
if (typeof r.validation !== "object" || r.validation === null) {
errors.push("validation: expected object");
ok = false;
} else {
const v = r.validation as Record<string, unknown>;
if (typeof v.passed !== "boolean") {
errors.push("validation.passed: must be boolean (explicit, never inferred)");
ok = false;
}
if (!Array.isArray(v.errors)) { errors.push("validation.errors: expected array"); ok = false; }
if (!Array.isArray(v.warnings)) { errors.push("validation.warnings: expected array"); ok = false; }
if (Array.isArray(v.errors)) ok = requireStringArray(v.errors, "validation.errors", errors) && ok;
if (Array.isArray(v.warnings)) ok = requireStringArray(v.warnings, "validation.warnings", errors) && ok;
}
if (!ok) return { valid: false, errors };
return { valid: true, value: r as unknown as StageReceipt };
}
// Compute the canonical aggregate hash over a list of file refs.
// Sorted by path so order-of-iteration doesn't drift the hash.
// Each entry contributes "<path>|<sha256>|<record_count>" so two
// files with identical content but different paths produce distinct
// digests (real difference = real hash difference).
export async function aggregateIoHash(files: StageFileRef[]): Promise<string> {
const sorted = [...files].sort((a, b) => a.path.localeCompare(b.path));
const parts = sorted.map(f => `${f.path}|${f.sha256}|${f.record_count ?? 0}`);
const h = new Bun.CryptoHasher("sha256");
h.update(parts.join("\n"));
return h.digest("hex");
}

View File

@ -0,0 +1,141 @@
// Shared types for distillation schemas. Hand-rolled validators (no Zod
// dependency) — bun:test runs them; runtime cost is one tiny function
// per record. Pattern: each schema exports `validate(x): ValidationResult`
// returning `{valid: true, value}` or `{valid: false, errors}`.
//
// Why hand-rolled: the auditor + scrum + observer pipelines emit JSONL
// rows in shapes that already work; we want to ENFORCE those shapes
// without adding a 100KB dependency or rewriting producers. The
// validators codify what we already produce.
//
// Naming: schemas live as nouns (`EvidenceRecord`), validators as
// `validate<Noun>`. Each schema file exports both the type and the
// validator.
export interface Provenance {
// Path to the JSONL or other source where this row came from. Always
// relative to /home/profit/lakehouse so receipts are reproducible
// across deploys with the same repo layout.
source_file: string;
// Optional byte offset / line number into the source file. Lets a
// future "open the source row" UI jump directly to the line. Some
// sources (single-row JSON files like _playbook_lessons/*.json) don't
// need this.
line_offset?: number;
// SHA-256 of the canonical JSON of the source row (sorted keys, no
// whitespace). This is the dedup key — running distillation twice on
// the same source produces identical sig_hash, so duplicates are
// detectable without full row comparison.
sig_hash: string;
// ISO 8601 of when this provenance link was recorded — usually the
// moment the unified Evidence Index ran. Distinct from the source
// row's own timestamp, which lives on the EvidenceRecord itself.
recorded_at: string;
}
// Returned by every schema validator. The shape is `{valid: true, value}`
// for success (so callers can use `value` with the right type narrowed)
// or `{valid: false, errors}` for failure (so callers can surface
// every error at once, not just the first).
export type ValidationResult<T> =
| { valid: true; value: T }
| { valid: false; errors: string[] };
// Standard helpers used by every schema. Centralized so naming +
// error message format stay consistent across schemas.
export function requireString(v: unknown, field: string, errors: string[]): v is string {
if (typeof v !== "string") {
errors.push(`${field}: expected string, got ${typeof v}`);
return false;
}
if (v.length === 0) {
errors.push(`${field}: must be non-empty`);
return false;
}
return true;
}
export function requireNumber(v: unknown, field: string, errors: string[]): v is number {
if (typeof v !== "number" || !Number.isFinite(v)) {
errors.push(`${field}: expected finite number, got ${typeof v}`);
return false;
}
return true;
}
export function requireIsoTimestamp(v: unknown, field: string, errors: string[]): v is string {
if (!requireString(v, field, errors)) return false;
// Permissive ISO 8601: YYYY-MM-DDTHH:MM:SS(.fraction)?(Z|±HH:MM)?
const re = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?$/;
if (!re.test(v as string)) {
errors.push(`${field}: not a valid ISO 8601 timestamp: ${(v as string).slice(0, 60)}`);
return false;
}
return true;
}
export function requireSha256(v: unknown, field: string, errors: string[]): v is string {
if (!requireString(v, field, errors)) return false;
if (!/^[0-9a-f]{64}$/.test(v as string)) {
errors.push(`${field}: not a valid hex sha256: ${(v as string).slice(0, 80)}`);
return false;
}
return true;
}
export function requireProvenance(v: unknown, field: string, errors: string[]): v is Provenance {
if (typeof v !== "object" || v === null) {
errors.push(`${field}: expected object, got ${v === null ? "null" : typeof v}`);
return false;
}
const p = v as Record<string, unknown>;
let ok = true;
ok = requireString(p.source_file, `${field}.source_file`, errors) && ok;
ok = requireSha256(p.sig_hash, `${field}.sig_hash`, errors) && ok;
ok = requireIsoTimestamp(p.recorded_at, `${field}.recorded_at`, errors) && ok;
if (p.line_offset !== undefined && typeof p.line_offset !== "number") {
errors.push(`${field}.line_offset: expected number when present`);
ok = false;
}
return ok;
}
export function requireStringArray(v: unknown, field: string, errors: string[]): v is string[] {
if (!Array.isArray(v)) {
errors.push(`${field}: expected array, got ${typeof v}`);
return false;
}
for (let i = 0; i < v.length; i++) {
if (typeof v[i] !== "string") {
errors.push(`${field}[${i}]: expected string, got ${typeof v[i]}`);
return false;
}
}
return true;
}
// Compute the canonical sha256 used for sig_hash. Sorts keys so the
// hash is stable regardless of producer's serialization order. Uses
// Bun.CryptoHasher (sync, fast) rather than node:crypto — matches the
// rest of the auditor.
export async function canonicalSha256(obj: unknown): Promise<string> {
const ordered = orderKeys(obj);
const json = JSON.stringify(ordered);
const hasher = new Bun.CryptoHasher("sha256");
hasher.update(json);
return hasher.digest("hex");
}
function orderKeys(v: unknown): unknown {
if (v === null || typeof v !== "object") return v;
if (Array.isArray(v)) return v.map(orderKeys);
const out: Record<string, unknown> = {};
for (const k of Object.keys(v as object).sort()) {
out[k] = orderKeys((v as Record<string, unknown>)[k]);
}
return out;
}

View File

@ -2,7 +2,7 @@
// if something can't be verified from a check, it goes into `evidence` // if something can't be verified from a check, it goes into `evidence`
// so the verdict is inspectable, not a black box. // so the verdict is inspectable, not a black box.
export type CheckKind = "static" | "dynamic" | "inference" | "kb_query"; export type CheckKind = "static" | "dynamic" | "inference" | "kb_query" | "kimi_architect";
export type Severity = "info" | "warn" | "block"; export type Severity = "info" | "warn" | "block";

View File

@ -13,7 +13,12 @@ import { readFile } from "node:fs/promises";
import { createHash } from "node:crypto"; import { createHash } from "node:crypto";
import type { Gap, Proposal } from "./types.ts"; import type { Gap, Proposal } from "./types.ts";
const SIDECAR_URL = process.env.LH_SIDECAR_URL ?? "http://localhost:3200"; // Phase 44 migration (2026-04-27): bot/propose.ts now flows through
// the gateway's /v1/chat instead of hitting the sidecar's /generate
// directly. /v1/usage tracks the call, Langfuse traces it, observer
// sees it. Same upstream model (CLOUD_MODEL gpt-oss:120b on
// Ollama Cloud) — gateway just owns the routing.
const GATEWAY_URL = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
const REPO_ROOT = "/home/profit/lakehouse"; const REPO_ROOT = "/home/profit/lakehouse";
const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`; const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`;
const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "gpt-oss:120b"; const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "gpt-oss:120b";
@ -72,13 +77,16 @@ export async function generateProposal(gap: Gap, historySummary: string = ""): P
sections.push("Propose a small change that addresses this gap. Respond with the JSON object only."); sections.push("Propose a small change that addresses this gap. Respond with the JSON object only.");
const userPrompt = sections.join("\n"); const userPrompt = sections.join("\n");
const r = await fetch(`${SIDECAR_URL}/generate`, { const r = await fetch(`${GATEWAY_URL}/v1/chat`, {
method: "POST", method: "POST",
headers: { "content-type": "application/json" }, headers: { "content-type": "application/json" },
body: JSON.stringify({ body: JSON.stringify({
model: CLOUD_MODEL, model: CLOUD_MODEL,
system: SYSTEM_PROMPT, provider: "ollama_cloud",
prompt: userPrompt, messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: userPrompt },
],
temperature: 0.2, temperature: 0.2,
max_tokens: MAX_TOKENS, max_tokens: MAX_TOKENS,
think: false, think: false,
@ -86,10 +94,10 @@ export async function generateProposal(gap: Gap, historySummary: string = ""): P
signal: AbortSignal.timeout(180000), // cloud T3 can be slow — 3 min signal: AbortSignal.timeout(180000), // cloud T3 can be slow — 3 min
}); });
if (!r.ok) { if (!r.ok) {
throw new Error(`sidecar ${r.status}: ${await r.text()}`); throw new Error(`gateway /v1/chat ${r.status}: ${await r.text()}`);
} }
const j = await r.json() as any; const j = await r.json() as any;
const raw: string = j.text ?? j.response ?? ""; const raw: string = j?.choices?.[0]?.message?.content ?? "";
const usage = j.usage ?? {}; const usage = j.usage ?? {};
const tokens = (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0); const tokens = (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0);

86
config/modes.toml Normal file
View File

@ -0,0 +1,86 @@
# Mode router config — task_class → mode mapping
#
# `preferred_mode` is the first choice for a task class; `fallback_modes`
# get tried in order if the preferred one isn't available (LLM Team can
# return Unknown mode for some, OR the matrix has stronger signal for a
# fallback). `default_model` seeds the mode runner's model field if the
# caller doesn't override.
#
# Modes are dispatched against LLM Team UI (localhost:5000/api/run) for
# now; future Rust-native runners will short-circuit before the proxy.
# See crates/gateway/src/v1/mode.rs for the dispatch path.
[[task_class]]
name = "scrum_review"
# 2026-04-26 pass5 variance test (5 reps × 4 conditions, grok-4.1-fast,
# pathway_memory.rs): composed corpus LOST 5/5 vs isolation (Δ 1.8
# grounded findings, p=0.031). See docs/MODE_RUNNER_TUNING_PLAN.md.
# Default is now isolation — bug fingerprints + adversarial framing +
# file content carries strong models without matrix noise. The
# `codereview_lakehouse` matrix path remains available via force_mode
# (auto-downgrades to isolation on strong models — see the
# is_strong_model gate in crates/gateway/src/v1/mode.rs).
preferred_mode = "codereview_isolation"
fallback_modes = ["codereview_lakehouse", "codereview", "consensus", "ladder"]
default_model = "qwen3-coder:480b"
# Corpora kept defined so experimental modes (codereview_matrix_only,
# pass2/pass5 sweeps) and weak-model rescue rungs can still pull them.
# scrum_findings_v1 is built but EXCLUDED — bake-off showed 24% OOB
# line citations from cross-file drift, only safe with same-file gating.
matrix_corpus = ["lakehouse_arch_v1", "lakehouse_symbols_v1"]
[[task_class]]
name = "contract_analysis"
preferred_mode = "deep_analysis"
fallback_modes = ["research", "extract"]
default_model = "kimi-k2:1t"
matrix_corpus = "chicago_permits_v1"
[[task_class]]
name = "staffing_inference"
# Staffing-domain native enrichment runner — Pass 4 (2026-04-26).
# Same composer architecture as codereview_lakehouse but with staffing
# framing + workers corpus. Validates that the modes-as-prompt-molders
# pattern generalizes beyond code review.
preferred_mode = "staffing_inference_lakehouse"
fallback_modes = ["ladder", "consensus", "pipeline"]
default_model = "openai/gpt-oss-120b:free"
matrix_corpus = "workers_500k_v8"
[[task_class]]
name = "fact_extract"
preferred_mode = "extract"
fallback_modes = ["distill"]
default_model = "qwen2.5"
matrix_corpus = "kb_team_runs_v1"
[[task_class]]
name = "doc_drift_check"
preferred_mode = "drift"
fallback_modes = ["validator"]
default_model = "gpt-oss:120b"
matrix_corpus = "distilled_factual_v20260423095819"
[[task_class]]
name = "pr_audit"
# Auditor's claim-vs-diff verification mode (2026-04-26 rebuild).
# Replaces the auditor's hand-rolled inference check with the mode-runner
# composer: pathway memory (PR-level patterns) + lakehouse_answers_v1
# corpus (prior accepted reviews + observer escalations) + adversarial
# JSON-shaped framing. Default model is paid Ollama Cloud kimi-k2:1t for
# strong claim-grounding; tie-breaker via auditor-side env override.
preferred_mode = "pr_audit"
fallback_modes = ["consensus", "ladder"]
# kimi-k2:1t broken upstream 2026-04-27 (Ollama Cloud 500 ISE, multi-hour
# sustained outage verified by repeated probes). deepseek-v3.1:671b is
# the drop-in substitute — proven working end-to-end through pr_audit
# during Phase 5 distillation acceptance testing.
default_model = "deepseek-v3.1:671b"
matrix_corpus = "lakehouse_answers_v1"
# Fallback when task_class isn't in the table — useful for ad-hoc calls
# during development that don't yet have a mapped mode.
[default]
preferred_mode = "pipeline"
fallback_modes = ["consensus", "ladder"]
default_model = "qwen3.5:latest"

97
config/providers.toml Normal file
View File

@ -0,0 +1,97 @@
# Phase 39: Provider Registry
#
# Per-provider base_url, auth scheme, and default model. The gateway's
# /v1/chat dispatcher reads this file at boot to populate its provider
# table. Secrets (API keys) come from /etc/lakehouse/secrets.toml or
# environment variables — NEVER inline a key here.
#
# Adding a new provider:
# 1. New [[provider]] block with name, base_url, auth, default_model
# 2. Matching adapter at crates/aibridge/src/providers/<name>.rs
# implementing the ProviderAdapter trait (chat + embed + unload)
# 3. Route arm in crates/gateway/src/v1/mod.rs matching on `name`
# 4. Model-prefix routing hint in resolve_provider() if the provider
# uses an "<name>/..." model prefix (e.g. "openrouter/...")
[[provider]]
name = "ollama"
base_url = "http://localhost:3200"
auth = "none"
default_model = "qwen3.5:latest"
# Hot-path local inference. No bearer needed — Python sidecar on
# localhost handles the Ollama API. Model names are bare
# (e.g. "qwen3.5:latest", not "ollama/qwen3.5:latest").
[[provider]]
name = "ollama_cloud"
base_url = "https://ollama.com"
auth = "bearer"
auth_env = "OLLAMA_CLOUD_KEY"
default_model = "gpt-oss:120b"
# Cloud-tier Ollama. Key resolved from OLLAMA_CLOUD_KEY env at gateway
# boot. Model-prefix routing: "cloud/<model>" auto-routes here
# (see gateway::v1::resolve_provider).
[[provider]]
name = "openrouter"
base_url = "https://openrouter.ai/api/v1"
auth = "bearer"
auth_env = "OPENROUTER_API_KEY"
auth_fallback_files = ["/home/profit/.env", "/root/llm_team_config.json"]
default_model = "openai/gpt-oss-120b:free"
# Multi-provider gateway. Covers Anthropic, Google, OpenAI, MiniMax,
# Qwen, Gemma, etc. Key resolved via crates/gateway/src/v1/openrouter.rs
# resolve_openrouter_key() — env first, then fallback files.
# Model-prefix routing: "openrouter/<vendor>/<model>" auto-routes here,
# prefix stripped before upstream call.
[[provider]]
name = "opencode"
base_url = "https://opencode.ai/zen/v1"
# Unified endpoint — covers BOTH Zen (pay-per-token Anthropic/OpenAI/
# Gemini frontier) AND Go (flat-sub Kimi/GLM/DeepSeek/Qwen/Minimax).
# Upstream bills per-model: Zen models hit Zen balance, Go models hit
# Go subscription cap. /zen/go/v1 is the Go-only sub-path (rejects
# Zen models), kept for reference but not used by this provider.
auth = "bearer"
auth_env = "OPENCODE_API_KEY"
default_model = "claude-opus-4-7"
# OpenCode (Zen + GO unified endpoint). One sk-* key reaches Claude
# Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
# Qwen, plus 4 free-tier models. OpenAI-compatible Chat Completions
# at /v1/chat/completions. Model-prefix routing: "opencode/<name>"
# auto-routes here, prefix stripped before upstream call.
# Key file: /etc/lakehouse/opencode.env (loaded via systemd EnvironmentFile).
# Model catalog: curl -H "Authorization: Bearer ..." https://opencode.ai/zen/v1/models
# Note: /zen/go/v1 is the GO-only sub-path (Kimi/GLM/DeepSeek tier);
# /zen/v1 covers everything including Anthropic (which /zen/go/v1 rejects).
[[provider]]
name = "kimi"
base_url = "https://api.kimi.com/coding/v1"
auth = "bearer"
auth_env = "KIMI_API_KEY"
default_model = "kimi-for-coding"
# Direct Kimi For Coding provider. `api.kimi.com` is a SEPARATE account
# system from `api.moonshot.ai` and `api.moonshot.cn` — keys are NOT
# interchangeable. Used when Ollama Cloud's `kimi-k2:1t` is upstream-
# broken and OpenRouter's `moonshotai/kimi-k2.6` is rate-limited.
# Model id: `kimi-for-coding` (kimi-k2.6 underneath).
# Key file: /etc/lakehouse/kimi.env (loaded via systemd EnvironmentFile).
# Model-prefix routing: "kimi/<model>" auto-routes here, prefix stripped.
# Planned (Phase 40 long-horizon — adapters not yet shipped):
#
# [[provider]]
# name = "gemini"
# base_url = "https://generativelanguage.googleapis.com/v1beta"
# auth = "api_key_query"
# auth_env = "GEMINI_API_KEY"
# default_model = "gemini-2.0-flash"
#
# [[provider]]
# name = "claude"
# base_url = "https://api.anthropic.com/v1"
# auth = "x_api_key"
# auth_env = "ANTHROPIC_API_KEY"
# default_model = "claude-3-5-sonnet-latest"

View File

@ -3,10 +3,26 @@ use serde::{Deserialize, Serialize};
use std::time::Duration; use std::time::Duration;
/// HTTP client for the Python AI sidecar. /// HTTP client for the Python AI sidecar.
///
/// `generate()` has two transport modes:
/// - When `gateway_url` is None (default), it posts to
/// `${base_url}/generate` (sidecar direct).
/// - When `gateway_url` is `Some(url)`, it posts to
/// `${url}/v1/chat` with `provider="ollama"` so the call appears
/// in `/v1/usage` and Langfuse traces.
///
/// `embed()`, `rerank()`, and admin methods always go direct to the
/// sidecar — no `/v1` equivalent yet, no point round-tripping.
///
/// Phase 44 part 2 (2026-04-27): the gateway URL is wired in by
/// callers that want observability (vectord modules); it's left
/// unset by callers that ARE the gateway internals (avoids self-loops
/// + redundant hops).
#[derive(Clone)] #[derive(Clone)]
pub struct AiClient { pub struct AiClient {
client: Client, client: Client,
base_url: String, base_url: String,
gateway_url: Option<String>,
} }
// -- Request/Response types -- // -- Request/Response types --
@ -86,9 +102,22 @@ impl AiClient {
Self { Self {
client, client,
base_url: base_url.trim_end_matches('/').to_string(), base_url: base_url.trim_end_matches('/').to_string(),
gateway_url: None,
} }
} }
/// Same as `new`, but every `generate()` is routed through
/// `${gateway_url}/v1/chat` (provider=ollama) for observability.
/// Use this for callers OUTSIDE the gateway. Inside the gateway
/// itself, prefer `new()` — calling /v1/chat from /v1/chat works
/// (no infinite loop, ollama_arm doesn't use AiClient) but adds
/// a wasted localhost hop.
pub fn new_with_gateway(base_url: &str, gateway_url: &str) -> Self {
let mut c = Self::new(base_url);
c.gateway_url = Some(gateway_url.trim_end_matches('/').to_string());
c
}
pub async fn health(&self) -> Result<serde_json::Value, String> { pub async fn health(&self) -> Result<serde_json::Value, String> {
let resp = self.client let resp = self.client
.get(format!("{}/health", self.base_url)) .get(format!("{}/health", self.base_url))
@ -114,6 +143,13 @@ impl AiClient {
} }
pub async fn generate(&self, req: GenerateRequest) -> Result<GenerateResponse, String> { pub async fn generate(&self, req: GenerateRequest) -> Result<GenerateResponse, String> {
if let Some(gw) = self.gateway_url.as_deref() {
return self.generate_via_gateway(gw, req).await;
}
// Direct-sidecar legacy path. Used by gateway internals (so
// ollama_arm can call sidecar without a self-loop) and by
// any consumer that wants raw transport without /v1/usage
// accounting.
let resp = self.client let resp = self.client
.post(format!("{}/generate", self.base_url)) .post(format!("{}/generate", self.base_url))
.json(&req) .json(&req)
@ -128,6 +164,59 @@ impl AiClient {
resp.json().await.map_err(|e| format!("generate parse error: {e}")) resp.json().await.map_err(|e| format!("generate parse error: {e}"))
} }
/// Phase 44 part 2: route generate() through the gateway's
/// /v1/chat with provider="ollama" so the call lands in
/// /v1/usage + Langfuse. Translates between the sidecar
/// GenerateRequest/Response shape and the OpenAI-compat
/// chat shape on the wire.
async fn generate_via_gateway(&self, gateway_url: &str, req: GenerateRequest) -> Result<GenerateResponse, String> {
let mut messages = Vec::with_capacity(2);
if let Some(sys) = &req.system {
messages.push(serde_json::json!({"role": "system", "content": sys}));
}
messages.push(serde_json::json!({"role": "user", "content": req.prompt}));
let mut body = serde_json::json!({
"messages": messages,
"provider": "ollama",
});
if let Some(m) = &req.model { body["model"] = serde_json::json!(m); }
if let Some(t) = req.temperature { body["temperature"] = serde_json::json!(t); }
if let Some(mt) = req.max_tokens { body["max_tokens"] = serde_json::json!(mt); }
if let Some(th) = req.think { body["think"] = serde_json::json!(th); }
let resp = self.client
.post(format!("{}/v1/chat", gateway_url))
.json(&body)
.send()
.await
.map_err(|e| format!("/v1/chat request failed: {e}"))?;
if !resp.status().is_success() {
let text = resp.text().await.unwrap_or_default();
return Err(format!("/v1/chat error: {text}"));
}
let parsed: serde_json::Value = resp.json().await
.map_err(|e| format!("/v1/chat parse error: {e}"))?;
let text = parsed
.pointer("/choices/0/message/content")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let model = parsed.get("model")
.and_then(|v| v.as_str())
.unwrap_or_else(|| req.model.as_deref().unwrap_or(""))
.to_string();
let prompt_tokens = parsed.pointer("/usage/prompt_tokens").and_then(|v| v.as_u64());
let completion_tokens = parsed.pointer("/usage/completion_tokens").and_then(|v| v.as_u64());
Ok(GenerateResponse {
text,
model,
tokens_evaluated: prompt_tokens,
tokens_generated: completion_tokens,
})
}
pub async fn rerank(&self, req: RerankRequest) -> Result<RerankResponse, String> { pub async fn rerank(&self, req: RerankRequest) -> Result<RerankResponse, String> {
let resp = self.client let resp = self.client
.post(format!("{}/rerank", self.base_url)) .post(format!("{}/rerank", self.base_url))

View File

@ -13,11 +13,9 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::OnceLock; use std::sync::OnceLock;
/// Rough token count. `chars / 4` ceiling. See module docs for why // `estimate_tokens` moved to `shared::model_matrix::ModelMatrix::estimate_tokens`
/// this heuristic is sufficient. // (cdc24d8). All callers migrated; the deprecated wrapper that stood in its
pub fn estimate_tokens(text: &str) -> usize { // place has been removed since it had zero external consumers.
(text.chars().count() + 3) / 4
}
/// Phase 21 — per-model context windows, mirroring the TS table in /// Phase 21 — per-model context windows, mirroring the TS table in
/// `tests/multi-agent/agent.ts`. Anchored on each model's documented /// `tests/multi-agent/agent.ts`. Anchored on each model's documented
@ -84,8 +82,8 @@ pub fn assert_context_budget(
let window = context_window_for(model); let window = context_window_for(model);
let safety = opts.safety_margin.unwrap_or(DEFAULT_SAFETY_MARGIN); let safety = opts.safety_margin.unwrap_or(DEFAULT_SAFETY_MARGIN);
let max_tokens = opts.max_tokens.unwrap_or(DEFAULT_MAX_TOKENS); let max_tokens = opts.max_tokens.unwrap_or(DEFAULT_MAX_TOKENS);
let sys_tokens = opts.system.map(estimate_tokens).unwrap_or(0); let sys_tokens = opts.system.map(shared::model_matrix::ModelMatrix::estimate_tokens).unwrap_or(0);
let estimated = estimate_tokens(prompt) + sys_tokens + max_tokens; let estimated = shared::model_matrix::ModelMatrix::estimate_tokens(prompt) + sys_tokens + max_tokens;
let remaining = window as i64 - estimated as i64 - safety as i64; let remaining = window as i64 - estimated as i64 - safety as i64;
let check = BudgetCheck { estimated, window, remaining }; let check = BudgetCheck { estimated, window, remaining };
if remaining < 0 && !opts.bypass { if remaining < 0 && !opts.bypass {
@ -109,14 +107,10 @@ pub fn overflow_message(model: &str, check: &BudgetCheck, over_by: usize, safety
mod tests { mod tests {
use super::*; use super::*;
#[test] // Deprecated-function behavior is now canonically tested in
fn estimate_tokens_ceiling_divides_by_four() { // crates/shared/src/model_matrix.rs. This test was the legacy
assert_eq!(estimate_tokens(""), 0); // pin that preceded the migration; delete when the deprecated
assert_eq!(estimate_tokens("abc"), 1); // 3 → ceil(3/4) = 1 // wrapper itself goes (see the #[deprecated] attribute).
assert_eq!(estimate_tokens("abcd"), 1); // 4 → ceil(4/4) = 1
assert_eq!(estimate_tokens("abcde"), 2); // 5 → ceil(5/4) = 2
assert_eq!(estimate_tokens(&"x".repeat(400)), 100);
}
#[test] #[test]
fn context_window_known_and_fallback() { fn context_window_known_and_fallback() {
@ -179,7 +173,7 @@ mod tests {
).unwrap(); ).unwrap();
assert!(with_sys.estimated > without_sys.estimated, assert!(with_sys.estimated > without_sys.estimated,
"system prompt should raise estimate"); "system prompt should raise estimate");
assert_eq!(with_sys.estimated - without_sys.estimated, estimate_tokens(&sys)); assert_eq!(with_sys.estimated - without_sys.estimated, shared::model_matrix::ModelMatrix::estimate_tokens(&sys));
} }
#[test] #[test]

View File

@ -138,6 +138,17 @@ pub struct ContinuableOutcome {
pub empty_retries: usize, pub empty_retries: usize,
pub continuations: usize, pub continuations: usize,
pub final_complete: bool, pub final_complete: bool,
/// Sum of `prompt_tokens` across every generator call made to
/// produce this outcome — including empty retries and continuations.
/// Lets callers (gateway execution loop, observability) stamp
/// accurate per-task usage without second-guessing the retry fan-out.
pub prompt_tokens: u32,
/// Sum of `completion_tokens` across every generator call.
pub completion_tokens: u32,
/// Total number of generator calls. `1 + empty_retries +
/// continuations` in the normal case; the field is explicit so
/// callers don't have to re-derive it.
pub calls: u32,
} }
fn make_request(opts: &ContinuableOpts, prompt: String, current_max: u32) -> GenerateRequest { fn make_request(opts: &ContinuableOpts, prompt: String, current_max: u32) -> GenerateRequest {
@ -175,11 +186,20 @@ pub async fn generate_continuable<G: TextGenerator>(
let mut combined = String::new(); let mut combined = String::new();
let mut empty_retries = 0usize; let mut empty_retries = 0usize;
let mut continuations = 0usize; let mut continuations = 0usize;
let mut prompt_tokens: u32 = 0;
let mut completion_tokens: u32 = 0;
let mut calls: u32 = 0;
// Phase 21(a) — empty-response backoff loop. // Phase 21(a) — empty-response backoff loop.
for retry in 0..opts.max_empty_retries { for retry in 0..opts.max_empty_retries {
let req = make_request(opts, prompt.to_string(), current_max); let req = make_request(opts, prompt.to_string(), current_max);
let resp = generator.generate_text(req).await?; let resp = generator.generate_text(req).await?;
calls += 1;
// u32::try_from saturates at u32::MAX instead of silently
// truncating bits when tokens_evaluated/_generated comes back
// as a u64 > 4 billion. Caught 2026-04-27 by Opus self-audit.
prompt_tokens = prompt_tokens.saturating_add(u32::try_from(resp.tokens_evaluated.unwrap_or(0)).unwrap_or(u32::MAX));
completion_tokens = completion_tokens.saturating_add(u32::try_from(resp.tokens_generated.unwrap_or(0)).unwrap_or(u32::MAX));
if !resp.text.trim().is_empty() { if !resp.text.trim().is_empty() {
combined = resp.text; combined = resp.text;
break; break;
@ -188,9 +208,7 @@ pub async fn generate_continuable<G: TextGenerator>(
current_max = (current_max.saturating_mul(2)).min(opts.budget_cap); current_max = (current_max.saturating_mul(2)).min(opts.budget_cap);
} }
// Phase 21(b) — structural-completion continuation loop. Runs on // Phase 21(b) — structural-completion continuation loop.
// the truncated-non-empty case; empty + exhausted retries falls
// through with empty combined and final_complete=false.
for _ in 0..opts.max_continuations { for _ in 0..opts.max_continuations {
if is_structurally_complete(&combined, opts.shape) { if is_structurally_complete(&combined, opts.shape) {
return Ok(ContinuableOutcome { return Ok(ContinuableOutcome {
@ -198,17 +216,22 @@ pub async fn generate_continuable<G: TextGenerator>(
empty_retries, empty_retries,
continuations, continuations,
final_complete: true, final_complete: true,
prompt_tokens,
completion_tokens,
calls,
}); });
} }
if combined.trim().is_empty() { if combined.trim().is_empty() {
// Nothing to continue from — continuing "" is identical to // Nothing to continue from — continuing "" is identical to
// the initial call and would loop. Bail so the caller sees // the initial call and would loop.
// the failure rather than burning N extra calls.
break; break;
} }
let cont_prompt = continuation_prompt(prompt, &combined); let cont_prompt = continuation_prompt(prompt, &combined);
let req = make_request(opts, cont_prompt, current_max.min(opts.budget_cap)); let req = make_request(opts, cont_prompt, current_max.min(opts.budget_cap));
let resp = generator.generate_text(req).await?; let resp = generator.generate_text(req).await?;
calls += 1;
prompt_tokens = prompt_tokens.saturating_add(u32::try_from(resp.tokens_evaluated.unwrap_or(0)).unwrap_or(u32::MAX));
completion_tokens = completion_tokens.saturating_add(u32::try_from(resp.tokens_generated.unwrap_or(0)).unwrap_or(u32::MAX));
combined.push_str(&resp.text); combined.push_str(&resp.text);
continuations += 1; continuations += 1;
} }
@ -219,6 +242,9 @@ pub async fn generate_continuable<G: TextGenerator>(
empty_retries, empty_retries,
continuations, continuations,
final_complete, final_complete,
prompt_tokens,
completion_tokens,
calls,
}) })
} }

View File

@ -40,12 +40,14 @@ struct OpenRouterChoice {
} }
#[derive(Deserialize)] #[derive(Deserialize)]
#[allow(dead_code)]
struct OpenRouterMessageOut { struct OpenRouterMessageOut {
role: String, role: String,
content: String, content: String,
} }
#[derive(Deserialize)] #[derive(Deserialize)]
#[allow(dead_code)]
struct OpenRouterUsage { struct OpenRouterUsage {
prompt_tokens: Option<u32>, prompt_tokens: Option<u32>,
completion_tokens: Option<u32>, completion_tokens: Option<u32>,

View File

@ -1,5 +1,4 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]
pub struct RoutingRule { pub struct RoutingRule {
@ -71,15 +70,19 @@ pub struct RouteDecision {
} }
fn glob_match(pattern: &str, name: &str) -> bool { fn glob_match(pattern: &str, name: &str) -> bool {
if pattern.contains('*') { if !pattern.contains('*') { return pattern == name; }
let parts: Vec<&str> = pattern.split('*').collect(); let parts: Vec<&str> = pattern.split('*').collect();
if parts.len() == 2 { // Multi-* support: first must be prefix, last must be suffix, each
return name.starts_with(parts[0]) && name.ends_with(parts[1]); // interior piece must appear in order. Fixes the iter-9 finding
} else if parts.len() == 1 { // where gpt-*-large* silently fell through to an exact-match path.
return name.starts_with(parts[0]) || name.ends_with(parts[1]); // Also removes the dead `parts.len() == 1` branch that accessed
} // parts[1] and would panic if ever reached (unreachable today
} // since split('*') on a string containing '*' always yields ≥2).
pattern == name if !name.starts_with(parts[0]) || !name.ends_with(parts.last().unwrap()) { return false; }
let mut cursor = parts[0].len();
parts[1..parts.len() - 1].iter().all(|mid| {
name[cursor..].find(mid).map(|pos| { cursor += pos + mid.len(); true }).unwrap_or(false)
})
} }
impl Default for RoutingRule { impl Default for RoutingRule {
@ -92,3 +95,25 @@ impl Default for RoutingRule {
} }
} }
} }
#[cfg(test)]
mod glob_match_tests {
use super::glob_match;
#[test] fn exact_match() { assert!(glob_match("gpt-oss:120b", "gpt-oss:120b")); }
#[test] fn exact_mismatch() { assert!(!glob_match("a", "b")); }
#[test] fn leading_wildcard() { assert!(glob_match("*:120b", "gpt-oss:120b")); }
#[test] fn trailing_wildcard() { assert!(glob_match("gpt-oss:*", "gpt-oss:120b")); }
#[test] fn bare_wildcard() { assert!(glob_match("*", "anything")); }
#[test] fn multi_wildcard_in_order() { assert!(glob_match("gpt-*-oss-*", "gpt-4-oss-120b")); }
#[test] fn multi_wildcard_wrong_order() { assert!(!glob_match("b*a*", "abba")); }
#[test] fn multi_wildcard_panic_safety() {
// Regression: earlier impl had an unreachable `parts.len() == 1`
// branch that indexed parts[1] — would panic if ever hit. Now
// the split('*') invariant guarantees ≥2 parts when * present,
// and we handle all N-part cases explicitly.
assert!(glob_match("a*b*c", "abc"));
assert!(glob_match("a*b*c", "axxxbxxxc"));
assert!(!glob_match("a*b*c", "xxxbxxx"));
}
}

View File

@ -19,8 +19,9 @@
//! we bubble the error up rather than silently truncating. That's the //! we bubble the error up rather than silently truncating. That's the
//! whole point of Phase 21. //! whole point of Phase 21.
use crate::context::{assert_context_budget, BudgetOpts, estimate_tokens, overflow_message, use crate::context::{assert_context_budget, BudgetOpts, overflow_message,
DEFAULT_MAX_TOKENS, DEFAULT_SAFETY_MARGIN}; DEFAULT_MAX_TOKENS, DEFAULT_SAFETY_MARGIN};
use shared::model_matrix::ModelMatrix;
use crate::continuation::{generate_continuable, ContinuableOpts, ResponseShape, TextGenerator}; use crate::continuation::{generate_continuable, ContinuableOpts, ResponseShape, TextGenerator};
/// Callback signatures — caller supplies closures that stitch the /// Callback signatures — caller supplies closures that stitch the
@ -80,12 +81,12 @@ pub struct TreeSplitResult {
/// by `\n— shard N/M digest —\n` so we can find the first one and /// by `\n— shard N/M digest —\n` so we can find the first one and
/// chop everything before its successor. /// chop everything before its successor.
fn truncate_scratchpad(scratchpad: &mut String, budget_tokens: usize) -> bool { fn truncate_scratchpad(scratchpad: &mut String, budget_tokens: usize) -> bool {
if estimate_tokens(scratchpad) <= budget_tokens { return false; } if ModelMatrix::estimate_tokens(scratchpad) <= budget_tokens { return false; }
// Find the second delimiter — everything before it gets dropped. // Find the second delimiter — everything before it gets dropped.
const DELIM_PREFIX: &str = "\n— shard "; const DELIM_PREFIX: &str = "\n— shard ";
let mut cursor = 0; let mut cursor = 0;
let mut truncated = false; let mut truncated = false;
while estimate_tokens(&scratchpad[cursor..]) > budget_tokens { while ModelMatrix::estimate_tokens(&scratchpad[cursor..]) > budget_tokens {
// Skip past a leading delimiter (if we're sitting on one from // Skip past a leading delimiter (if we're sitting on one from
// a previous iteration), then find the next. // a previous iteration), then find the next.
let search_from = cursor + if scratchpad[cursor..].starts_with(DELIM_PREFIX) { let search_from = cursor + if scratchpad[cursor..].starts_with(DELIM_PREFIX) {
@ -278,7 +279,7 @@ mod tests {
// Scratchpad should still fit roughly within the budget // Scratchpad should still fit roughly within the budget
// (post-truncation); the estimator uses chars/4 so the bound // (post-truncation); the estimator uses chars/4 so the bound
// is ~budget*4 chars. Give some slack for the delimiter. // is ~budget*4 chars. Give some slack for the delimiter.
let scratchpad_tokens = estimate_tokens(&result.scratchpad); let scratchpad_tokens = ModelMatrix::estimate_tokens(&result.scratchpad);
assert!(scratchpad_tokens <= opts.scratchpad_budget * 2, assert!(scratchpad_tokens <= opts.scratchpad_budget * 2,
"scratchpad {} tokens vs budget {}", scratchpad_tokens, opts.scratchpad_budget); "scratchpad {} tokens vs budget {}", scratchpad_tokens, opts.scratchpad_budget);
} }

View File

@ -12,6 +12,8 @@ aibridge = { path = "../aibridge" }
ingestd = { path = "../ingestd" } ingestd = { path = "../ingestd" }
vectord = { path = "../vectord" } vectord = { path = "../vectord" }
journald = { path = "../journald" } journald = { path = "../journald" }
truth = { path = "../truth" }
validator = { path = "../validator" }
tokio = { workspace = true } tokio = { workspace = true }
axum = { workspace = true } axum = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
@ -29,3 +31,4 @@ tracing-opentelemetry = { workspace = true }
arrow = { workspace = true } arrow = { workspace = true }
chrono = { workspace = true } chrono = { workspace = true }
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
toml = { workspace = true }

View File

@ -93,7 +93,7 @@ impl AccessControl {
self.roles.write().await.insert(role.agent_name.clone(), role); self.roles.write().await.insert(role.agent_name.clone(), role);
} }
/// Get an agent's role. /// Get an agent's role. Called by `GET /access/roles/{agent}`.
pub async fn get_role(&self, agent: &str) -> Option<AgentRole> { pub async fn get_role(&self, agent: &str) -> Option<AgentRole> {
self.roles.read().await.get(agent).cloned() self.roles.read().await.get(agent).cloned()
} }
@ -113,6 +113,7 @@ impl AccessControl {
} }
/// Determine which fields should be masked for an agent. /// Determine which fields should be masked for an agent.
#[allow(dead_code)]
pub async fn masked_fields( pub async fn masked_fields(
&self, &self,
agent: &str, agent: &str,
@ -138,6 +139,7 @@ impl AccessControl {
} }
/// Log a query for audit. /// Log a query for audit.
#[allow(dead_code)]
pub async fn log_query(&self, audit: QueryAudit) { pub async fn log_query(&self, audit: QueryAudit) {
self.audit_log.write().await.push(audit); self.audit_log.write().await.push(audit);
} }
@ -149,6 +151,9 @@ impl AccessControl {
log[start..].iter().rev().cloned().collect() log[start..].iter().rev().cloned().collect()
} }
/// Reports whether access-control enforcement is active.
/// Called by `GET /access/enabled` — ops tooling / dashboards poll
/// this to confirm the auth posture of the running gateway.
pub fn is_enabled(&self) -> bool { pub fn is_enabled(&self) -> bool {
self.enabled self.enabled
} }

View File

@ -1,6 +1,6 @@
use axum::{ use axum::{
Json, Router, Json, Router,
extract::{Query, State}, extract::{Path, Query, State},
http::StatusCode, http::StatusCode,
response::IntoResponse, response::IntoResponse,
routing::{get, post}, routing::{get, post},
@ -13,6 +13,12 @@ pub fn router(ac: AccessControl) -> Router {
Router::new() Router::new()
.route("/roles", get(list_roles)) .route("/roles", get(list_roles))
.route("/roles", post(set_role)) .route("/roles", post(set_role))
// Scrum iter 11 / P13-001 finding: get_role was #[allow(dead_code)]
// because nothing called it — dead until exposed. Route activates it.
// Returns 404 when the agent isn't registered so clients can
// distinguish "missing role" from "access denied."
.route("/roles/{agent}", get(get_role))
.route("/enabled", get(enabled_status))
.route("/audit", get(query_audit)) .route("/audit", get(query_audit))
.route("/check", post(check_access)) .route("/check", post(check_access))
.with_state(ac) .with_state(ac)
@ -60,3 +66,17 @@ async fn check_access(
"allowed": allowed, "allowed": allowed,
})) }))
} }
async fn get_role(
State(ac): State<AccessControl>,
Path(agent): Path<String>,
) -> impl IntoResponse {
match ac.get_role(&agent).await {
Some(role) => Ok(Json(role)),
None => Err((StatusCode::NOT_FOUND, format!("no role registered for agent '{agent}'"))),
}
}
async fn enabled_status(State(ac): State<AccessControl>) -> impl IntoResponse {
Json(serde_json::json!({ "enabled": ac.is_enabled() }))
}

View File

@ -5,30 +5,51 @@ use axum::{
response::Response, response::Response,
}; };
/// API key auth middleware. Checks X-API-Key header against configured key. // API key auth middleware. Checks X-API-Key header against configured key.
// Fixed P5-001 (2026-04-23): previously #[allow(dead_code)] — the function
// existed but was never layered onto the router, so [auth] enabled=true
// silently enforced nothing. Now wired via from_fn_with_state in main.rs.
pub async fn api_key_auth( pub async fn api_key_auth(
axum::extract::State(expected): axum::extract::State<ApiKey>,
request: Request, request: Request,
next: Next, next: Next,
) -> Result<Response, StatusCode> { ) -> Result<Response, StatusCode> {
// Get the expected key from the request extensions (set by the layer) // /health stays public (LB/systemd probes). Every other route is gated.
let expected_key = request.extensions().get::<ApiKey>().cloned(); if request.uri().path() == "/health" {
return Ok(next.run(request).await);
if let Some(expected) = expected_key {
let provided = request
.headers()
.get("x-api-key")
.and_then(|v| v.to_str().ok());
match provided {
Some(key) if key == expected.0 => {}
_ => {
tracing::warn!("unauthorized request: missing or invalid API key");
return Err(StatusCode::UNAUTHORIZED);
}
}
} }
Ok(next.run(request).await) let provided = request
.headers()
.get("x-api-key")
.and_then(|v| v.to_str().ok());
// Constant-time-ish eq on the raw bytes; good enough for a shared-secret
// X-API-Key. Timing-attack resistance here matters less than the
// equivalent HMAC check would; adopt subtle crate if key-space grows.
match provided {
Some(key) if eq_ct(key.as_bytes(), expected.0.as_bytes()) => {
Ok(next.run(request).await)
}
_ => {
tracing::warn!(
path = %request.uri().path(),
"unauthorized request: missing or invalid API key",
);
Err(StatusCode::UNAUTHORIZED)
}
}
}
fn eq_ct(a: &[u8], b: &[u8]) -> bool {
if a.len() != b.len() {
return false;
}
let mut diff: u8 = 0;
for (x, y) in a.iter().zip(b.iter()) {
diff |= x ^ y;
}
diff == 0
} }
/// Wrapper type for the API key, stored in request extensions. /// Wrapper type for the API key, stored in request extensions.

View File

@ -0,0 +1,388 @@
//! KB context loader — reads recent signal from `data/_kb/*.jsonl` for
//! a given sig_hash + task_class and returns a compact summary.
//!
//! This is the "pipe to the overviewer" from the 2026-04-23 session:
//! the overseer tier (T3, gpt-oss:120b) consumes this context before
//! generating a correction, so its suggestions are informed by
//! historical cost / latency / outcome / prior-correction patterns
//! across ALL profiles that have run this task class — not just the
//! single current loop.
//!
//! Hot-swap profiles read the SAME pool. When a profile activates and
//! starts iterating, its KB context is the shared surface — one
//! profile's learning becomes every profile's starting point.
//!
//! Best-effort throughout: missing files, corrupt rows, empty
//! directories all produce an empty KbContext. The overseer works
//! fine with no history; we just can't seed it then.
use serde::Serialize;
use std::path::Path;
use tokio::io::AsyncBufReadExt;
/// Compact summary returned to the overseer. Bounded size — recent
/// outcomes + corrections plus rolled-up rates. Goal is to fit in a
/// prompt without eating the overseer's context budget.
#[derive(Debug, Clone, Default, Serialize)]
pub struct KbContext {
pub sig_hash: String,
pub task_class: String,
pub recent_outcomes: Vec<OutcomeSummary>,
pub recent_corrections: Vec<CorrectionSummary>,
pub success_rate: Option<f64>,
pub avg_turns: Option<f64>,
pub avg_latency_ms: Option<u64>,
pub total_observed: u32,
}
#[derive(Debug, Clone, Serialize)]
pub struct OutcomeSummary {
pub created_at: String,
pub ok: bool,
pub polarity: String,
pub turns: u32,
pub latency_ms: u64,
pub total_tokens: u64,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct CorrectionSummary {
pub created_at: String,
pub reason: String,
pub correction_preview: String, // first 300 chars
pub applied_at_turn: u32,
}
const OUTCOMES_PATH: &str = "data/_kb/outcomes.jsonl";
const CORRECTIONS_PATH: &str = "data/_kb/overseer_corrections.jsonl";
const RECENT_OUTCOME_LIMIT: usize = 5;
const RECENT_CORRECTION_LIMIT: usize = 3;
const AGGREGATE_WINDOW: usize = 50;
impl KbContext {
/// Build context from the default KB paths.
pub async fn load_for(sig_hash: &str, task_class: &str) -> Self {
Self::load_from(
sig_hash, task_class,
Path::new(OUTCOMES_PATH), Path::new(CORRECTIONS_PATH),
).await
}
/// Path-taking variant — tests inject tmp files without touching
/// the real KB directory (same pattern as append_outcomes_row_at).
pub async fn load_from(
sig_hash: &str,
task_class: &str,
outcomes_path: &Path,
corrections_path: &Path,
) -> Self {
let mut ctx = KbContext {
sig_hash: sig_hash.to_string(),
task_class: task_class.to_string(),
..Default::default()
};
// Scan outcomes — matches on sig_hash primary, task_class
// secondary (so different geos for the same task_class still
// contribute to aggregate rates even though they won't make
// the top-5 recent). The bounded window keeps scan cost
// linear in file size — we're reading tail only.
let outcome_rows = tail_matching(
outcomes_path, AGGREGATE_WINDOW * 4,
|row| {
let row_sig = row.get("sig_hash").and_then(|v| v.as_str()).unwrap_or("");
let row_tc = row.get("task_class").and_then(|v| v.as_str()).unwrap_or("");
row_sig == sig_hash || row_tc == task_class
},
).await;
// Recent outcomes: exact sig_hash match first (strongest
// signal), then task_class fallback up to the limit.
let mut exact: Vec<OutcomeSummary> = Vec::new();
let mut loose: Vec<OutcomeSummary> = Vec::new();
for row in &outcome_rows {
let row_sig = row.get("sig_hash").and_then(|v| v.as_str()).unwrap_or("");
let summary = summarize_outcome(row);
if row_sig == sig_hash { exact.push(summary); }
else { loose.push(summary); }
}
ctx.recent_outcomes = exact.into_iter().rev().take(RECENT_OUTCOME_LIMIT).collect();
if ctx.recent_outcomes.len() < RECENT_OUTCOME_LIMIT {
let need = RECENT_OUTCOME_LIMIT - ctx.recent_outcomes.len();
ctx.recent_outcomes.extend(loose.into_iter().rev().take(need));
}
// Aggregate rates across the full matched window (both
// sig_hash and task_class matches — gives a stable rate even
// on sparse sig_hash history).
let window = outcome_rows.iter().rev().take(AGGREGATE_WINDOW);
let mut ok_count = 0u32;
let mut total = 0u32;
let mut turn_sum = 0u32;
let mut latency_sum = 0u64;
for row in window {
total += 1;
if row.get("ok").and_then(|v| v.as_bool()).unwrap_or(false) { ok_count += 1; }
turn_sum += row.get("turns").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
latency_sum += row.get("usage")
.and_then(|u| u.get("latency_ms"))
.and_then(|v| v.as_u64()).unwrap_or(0);
}
if total > 0 {
ctx.total_observed = total;
ctx.success_rate = Some(ok_count as f64 / total as f64);
ctx.avg_turns = Some(turn_sum as f64 / total as f64);
ctx.avg_latency_ms = Some(latency_sum / total as u64);
}
// Overseer corrections. Prefer sig_hash match; fall back to
// task_class. The overseer reading its OWN prior corrections
// is the main point — if the last 3 attempts produced
// corrections X, Y, Z, the new correction should acknowledge
// those patterns rather than suggest X for the fourth time.
let correction_rows = tail_matching(
corrections_path, RECENT_CORRECTION_LIMIT * 4,
|row| {
let row_sig = row.get("sig_hash").and_then(|v| v.as_str()).unwrap_or("");
let row_tc = row.get("task_class").and_then(|v| v.as_str()).unwrap_or("");
row_sig == sig_hash || row_tc == task_class
},
).await;
let mut c_exact: Vec<CorrectionSummary> = Vec::new();
let mut c_loose: Vec<CorrectionSummary> = Vec::new();
for row in &correction_rows {
let row_sig = row.get("sig_hash").and_then(|v| v.as_str()).unwrap_or("");
let summary = summarize_correction(row);
if row_sig == sig_hash { c_exact.push(summary); }
else { c_loose.push(summary); }
}
ctx.recent_corrections = c_exact.into_iter().rev().take(RECENT_CORRECTION_LIMIT).collect();
if ctx.recent_corrections.len() < RECENT_CORRECTION_LIMIT {
let need = RECENT_CORRECTION_LIMIT - ctx.recent_corrections.len();
ctx.recent_corrections.extend(c_loose.into_iter().rev().take(need));
}
ctx
}
/// Compact string form for the overseer prompt. Deterministic
/// ordering + bounded length so prompt caching stays stable
/// across iterations on the same task.
pub fn to_prompt_section(&self) -> String {
let mut s = String::new();
s.push_str("## Knowledge Base Context\n");
if let (Some(rate), Some(turns), Some(lat)) = (self.success_rate, self.avg_turns, self.avg_latency_ms) {
s.push_str(&format!(
"Across {} prior similar runs: success_rate={:.1}%, avg_turns={:.1}, avg_latency_ms={}\n",
self.total_observed, rate * 100.0, turns, lat,
));
} else {
s.push_str("No prior similar runs recorded.\n");
}
if !self.recent_outcomes.is_empty() {
s.push_str(&format!("\nRecent {} outcomes:\n", self.recent_outcomes.len()));
for o in &self.recent_outcomes {
let err = o.error.as_deref().map(|e| format!("{}", truncate(e, 80))).unwrap_or_default();
s.push_str(&format!(
" [{}] ok={} turns={} tokens={} lat={}ms{}\n",
&o.created_at[..19.min(o.created_at.len())],
o.ok, o.turns, o.total_tokens, o.latency_ms, err,
));
}
}
if !self.recent_corrections.is_empty() {
s.push_str(&format!("\nRecent {} overseer corrections (yours — don't repeat):\n", self.recent_corrections.len()));
for c in &self.recent_corrections {
s.push_str(&format!(
" [{}] turn={} reason={} correction={}\n",
&c.created_at[..19.min(c.created_at.len())],
c.applied_at_turn,
truncate(&c.reason, 40),
truncate(&c.correction_preview, 200),
));
}
}
s
}
}
fn summarize_outcome(row: &serde_json::Value) -> OutcomeSummary {
OutcomeSummary {
created_at: row.get("created_at").and_then(|v| v.as_str()).unwrap_or("").to_string(),
ok: row.get("ok").and_then(|v| v.as_bool()).unwrap_or(false),
polarity: row.get("polarity").and_then(|v| v.as_str()).unwrap_or("").to_string(),
turns: row.get("turns").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
latency_ms: row.get("usage").and_then(|u| u.get("latency_ms"))
.and_then(|v| v.as_u64()).unwrap_or(0),
total_tokens: row.get("usage").and_then(|u| u.get("total_tokens"))
.and_then(|v| v.as_u64()).unwrap_or(0),
error: row.get("error").and_then(|v| v.as_str()).map(String::from),
}
}
fn summarize_correction(row: &serde_json::Value) -> CorrectionSummary {
let preview = row.get("correction").and_then(|v| v.as_str()).unwrap_or("");
CorrectionSummary {
created_at: row.get("created_at").and_then(|v| v.as_str()).unwrap_or("").to_string(),
reason: row.get("reason").and_then(|v| v.as_str()).unwrap_or("").to_string(),
correction_preview: truncate(preview, 300),
applied_at_turn: row.get("applied_at_turn").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
}
}
fn truncate(s: &str, n: usize) -> String {
if s.len() <= n { s.to_string() } else { format!("{}", &s[..n]) }
}
/// Read a JSONL file from the tail, returning at most `limit` rows
/// that match `filter`. Missing file returns empty. Corrupt lines are
/// skipped. Limit is honored from the tail — a full-file scan with an
/// in-memory ring would be wasteful for large outcomes histories, but
/// we cap at reading the whole file and filtering post-hoc for now
/// (reverse-seek line iteration is a real engineering task and the
/// file is bounded by ingest rate; revisit when it bites).
async fn tail_matching<F>(
path: &Path,
limit: usize,
filter: F,
) -> Vec<serde_json::Value>
where
F: Fn(&serde_json::Value) -> bool,
{
let Ok(file) = tokio::fs::File::open(path).await else { return Vec::new(); };
let reader = tokio::io::BufReader::new(file);
let mut lines = reader.lines();
let mut matches: Vec<serde_json::Value> = Vec::new();
while let Ok(Some(line)) = lines.next_line().await {
let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) else { continue };
if filter(&v) {
matches.push(v);
if matches.len() > limit {
// Keep the most-recent window only — drop from the
// front as we go rather than buffering everything.
matches.remove(0);
}
}
}
matches
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::io::AsyncWriteExt;
async fn write_fixture(path: &Path, rows: Vec<serde_json::Value>) {
if let Some(dir) = path.parent() {
tokio::fs::create_dir_all(dir).await.unwrap();
}
let mut f = tokio::fs::OpenOptions::new()
.create(true).write(true).truncate(true).open(path).await.unwrap();
for r in rows {
let mut line = serde_json::to_string(&r).unwrap();
line.push('\n');
f.write_all(line.as_bytes()).await.unwrap();
}
}
fn tmp_path(name: &str) -> std::path::PathBuf {
let nanos = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_nanos();
std::env::temp_dir().join(format!("lh_kb_ctx_{}_{}_{}", std::process::id(), nanos, name))
}
#[tokio::test]
async fn empty_files_produce_empty_context() {
let op = tmp_path("outcomes.jsonl");
let cp = tmp_path("corrections.jsonl");
let ctx = KbContext::load_from("sig123", "staffing.fill", &op, &cp).await;
assert!(ctx.recent_outcomes.is_empty());
assert!(ctx.recent_corrections.is_empty());
assert!(ctx.success_rate.is_none());
assert_eq!(ctx.total_observed, 0);
}
#[tokio::test]
async fn exact_sig_hash_matches_take_priority() {
let op = tmp_path("outcomes.jsonl");
let cp = tmp_path("corrections.jsonl");
write_fixture(&op, vec![
// Other sig_hash, same task_class — loose match
serde_json::json!({
"sig_hash": "other", "task_class": "staffing.fill",
"ok": false, "polarity": "failure_pattern", "turns": 1,
"usage": {"latency_ms": 1000, "total_tokens": 100},
"created_at": "2026-04-22T10:00:00Z",
}),
// Exact sig_hash — should lead
serde_json::json!({
"sig_hash": "sig123", "task_class": "staffing.fill",
"ok": true, "polarity": "success_confirmation", "turns": 3,
"usage": {"latency_ms": 2000, "total_tokens": 500},
"created_at": "2026-04-23T10:00:00Z",
}),
]).await;
write_fixture(&cp, vec![]).await;
let ctx = KbContext::load_from("sig123", "staffing.fill", &op, &cp).await;
assert_eq!(ctx.recent_outcomes.len(), 2);
assert_eq!(ctx.recent_outcomes[0].created_at, "2026-04-23T10:00:00Z");
assert_eq!(ctx.recent_outcomes[0].ok, true);
assert_eq!(ctx.total_observed, 2);
assert!((ctx.success_rate.unwrap() - 0.5).abs() < 0.001);
}
#[tokio::test]
async fn corrupt_rows_are_skipped() {
let op = tmp_path("outcomes.jsonl");
let cp = tmp_path("corrections.jsonl");
// Mix valid + invalid — invalid should be silently skipped.
if let Some(dir) = op.parent() { tokio::fs::create_dir_all(dir).await.unwrap(); }
tokio::fs::write(&op, "not json\n{\"sig_hash\":\"sig1\",\"task_class\":\"tc\",\"ok\":true,\"turns\":1,\"usage\":{}}\ngarbage\n").await.unwrap();
write_fixture(&cp, vec![]).await;
let ctx = KbContext::load_from("sig1", "tc", &op, &cp).await;
assert_eq!(ctx.recent_outcomes.len(), 1);
}
#[tokio::test]
async fn corrections_preview_is_truncated() {
let op = tmp_path("outcomes.jsonl");
let cp = tmp_path("corrections.jsonl");
let long = "x".repeat(500);
write_fixture(&op, vec![]).await;
write_fixture(&cp, vec![serde_json::json!({
"sig_hash": "sig1", "task_class": "tc",
"reason": "abort", "correction": long, "applied_at_turn": 3,
"created_at": "2026-04-23T10:00:00Z",
})]).await;
let ctx = KbContext::load_from("sig1", "tc", &op, &cp).await;
assert_eq!(ctx.recent_corrections.len(), 1);
// 300-char cap + 3-byte UTF-8 ellipsis character = 303-byte worst case.
assert!(ctx.recent_corrections[0].correction_preview.len() <= 303);
}
#[test]
fn prompt_section_is_stable_for_empty_context() {
let ctx = KbContext::default();
let s = ctx.to_prompt_section();
assert!(s.contains("No prior similar runs recorded"));
}
#[test]
fn prompt_section_reports_aggregate_rates() {
let ctx = KbContext {
total_observed: 10,
success_rate: Some(0.7),
avg_turns: Some(4.2),
avg_latency_ms: Some(45000),
..Default::default()
};
let s = ctx.to_prompt_section();
assert!(s.contains("success_rate=70.0%"));
assert!(s.contains("avg_turns=4.2"));
assert!(s.contains("avg_latency_ms=45000"));
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
mod access; mod access;
mod access_service; mod access_service;
mod auth; mod auth;
mod execution_loop;
mod observability; mod observability;
mod tools; mod tools;
mod v1; mod v1;
@ -67,14 +68,62 @@ async fn main() {
let access = access::AccessControl::new(config.auth.enabled); let access = access::AccessControl::new(config.auth.enabled);
access.register_defaults().await; access.register_defaults().await;
// Phase 42 — file-backed truth rules. Probes the `truth/` directory
// at repo root (or $LAKEHOUSE_TRUTH_DIR override) and logs how many
// rules load. Current request paths still build their own stores
// via truth::default_truth_store() / truth::sql_query_guard_store();
// the composed-at-boot store gets plumbed through V1State in a
// follow-up. This boot probe catches parse errors + duplicate-ID
// collisions early rather than at first request.
{
let truth_dir = std::env::var("LAKEHOUSE_TRUTH_DIR")
.unwrap_or_else(|_| "/home/profit/lakehouse/truth".to_string());
if std::path::Path::new(&truth_dir).exists() {
let mut probe_store = truth::default_truth_store();
match truth::loader::load_from_dir(&mut probe_store, &truth_dir) {
Ok(n) => tracing::info!("truth: loaded {n} file-backed rule(s) from {truth_dir}"),
Err(e) => tracing::warn!("truth: failed to load rules from {truth_dir}: {e}"),
}
} else {
tracing::debug!("truth: no rule dir at {truth_dir}, skipping file-backed load");
}
}
// Workspace manager for agent-specific overlays // Workspace manager for agent-specific overlays
let workspace_mgr = queryd::workspace::WorkspaceManager::new(store.clone()); let workspace_mgr = queryd::workspace::WorkspaceManager::new(store.clone());
if let Err(e) = workspace_mgr.rebuild().await { if let Err(e) = workspace_mgr.rebuild().await {
tracing::warn!("workspace rebuild: {e}"); tracing::warn!("workspace rebuild: {e}");
} }
// AI sidecar client // AI sidecar clients — Phase 44 part 3 (2026-04-27).
let ai_client = aibridge::client::AiClient::new(&config.sidecar.url); //
// Two flavors of the same client:
// - `ai_client_direct` posts directly to ${sidecar}/generate. Used
// inside the gateway by V1State + the legacy /ai proxy. These
// call sites are themselves the implementation of /v1/chat
// (or its sidecar shim), so routing them through /v1/chat
// would self-loop.
// - `ai_client_observable` posts via ${gateway}/v1/chat with
// provider="ollama". Used by vectord modules (autotune agent,
// /vectors service) so their LLM calls land in /v1/usage and
// Langfuse traces. Adds one localhost HTTP hop per call (~ms);
// accepted for the observability gain.
//
// The gateway can call its own /v1/chat over localhost during
// boot's transient period because we don't fire any LLM calls
// until the listener is up — the observable client is just
// configured here, not exercised.
let ai_client_direct = aibridge::client::AiClient::new(&config.sidecar.url);
let gateway_self_url = format!("http://{}:{}", config.gateway.host, config.gateway.port);
let ai_client_observable = aibridge::client::AiClient::new_with_gateway(
&config.sidecar.url,
&gateway_self_url,
);
// Backwards-compat alias for the (many) existing references in this file.
// Defaults to direct so the existing wiring (V1State, /ai proxy)
// keeps its non-self-loop transport. New vectord wiring below
// explicitly uses ai_client_observable.
let ai_client = ai_client_direct.clone();
// Vector service components — built before the router because both the // Vector service components — built before the router because both the
// /vectors service AND ingestd need the agent handle to enqueue triggers. // /vectors service AND ingestd need the agent handle to enqueue triggers.
@ -92,6 +141,12 @@ async fn main() {
// operators call POST /vectors/playbook_memory/rebuild to populate. // operators call POST /vectors/playbook_memory/rebuild to populate.
let pbm = vectord::playbook_memory::PlaybookMemory::new(store.clone()); let pbm = vectord::playbook_memory::PlaybookMemory::new(store.clone());
let _ = pbm.load_from_storage().await; let _ = pbm.load_from_storage().await;
// Pathway memory — consensus-designed sidecar for full-context
// backtracking + hot-swap of successful review pathways. Same
// load-on-boot pattern as playbook_memory: empty state is fine,
// operators start populating via scrum_master_pipeline.ts.
let pwm = vectord::pathway_memory::PathwayMemory::new(store.clone());
let _ = pwm.load_from_storage().await;
// Phase 16.2: spawn the autotune agent. When config.agent.enabled=false // Phase 16.2: spawn the autotune agent. When config.agent.enabled=false
// this returns a handle that drops triggers silently — no surprise load. // this returns a handle that drops triggers silently — no surprise load.
@ -106,7 +161,9 @@ async fn main() {
agent_cfg, agent_cfg,
vectord::agent::AgentDeps { vectord::agent::AgentDeps {
store: store.clone(), store: store.clone(),
ai_client: ai_client.clone(), // Observable: autotune agent's LLM calls go through
// /v1/chat for /v1/usage + Langfuse visibility.
ai_client: ai_client_observable.clone(),
catalog: registry.clone(), catalog: registry.clone(),
index_registry: index_reg.clone(), index_registry: index_reg.clone(),
hnsw_store: hnsw.clone(), hnsw_store: hnsw.clone(),
@ -153,10 +210,17 @@ async fn main() {
agent_handle: agent_handle.clone(), agent_handle: agent_handle.clone(),
index_registry: index_reg.clone(), index_registry: index_reg.clone(),
schedules: sched_store, schedules: sched_store,
// P9-001 fix 2026-04-23: journal reference flows into ingest so
// successful uploads emit a record_ingest event. Journal is Clone
// (Arc<RwLock> inside) so the /journal nest below still sees the
// same buffer + persistence.
journal: Some(journal.clone()),
})) }))
.nest("/vectors", vectord::service::router(vectord::service::VectorState { .nest("/vectors", vectord::service::router(vectord::service::VectorState {
store: store.clone(), store: store.clone(),
ai_client: ai_client.clone(), // Observable: /vectors service's LLM calls (RAG, summary,
// playbook synthesis, etc.) flow through /v1/chat.
ai_client: ai_client_observable.clone(),
job_tracker: vectord::jobs::JobTracker::new(), job_tracker: vectord::jobs::JobTracker::new(),
index_registry: index_reg.clone(), index_registry: index_reg.clone(),
hnsw_store: hnsw, hnsw_store: hnsw,
@ -172,17 +236,19 @@ async fn main() {
bucket_registry.clone(), index_reg.clone(), bucket_registry.clone(), index_reg.clone(),
), ),
playbook_memory: pbm, playbook_memory: pbm,
pathway_memory: pwm,
embed_semaphore: std::sync::Arc::new(tokio::sync::Semaphore::new(1)), embed_semaphore: std::sync::Arc::new(tokio::sync::Semaphore::new(1)),
})) }))
.nest("/workspaces", queryd::workspace_service::router(workspace_mgr)) .nest("/workspaces", queryd::workspace_service::router(workspace_mgr))
.nest("/journal", journald::service::router(journal)) .nest("/journal", journald::service::router(journal))
.nest("/access", access_service::router(access)) .nest("/access", access_service::router(access))
.nest("/tools", tools::service::router({ .nest("/tools", tools::service::router({
let tool_reg = tools::registry::ToolRegistry::new_with_defaults(); let tool_reg = tools::registry::ToolRegistry::new();
tool_reg.register_defaults().await; tool_reg.register_defaults().await;
tools::ToolState { tools::ToolState {
registry: tool_reg, registry: tool_reg,
query_fn: tools::QueryExecutor::new(engine.clone()), query_fn: tools::QueryExecutor::new(engine.clone()),
truth: std::sync::Arc::new(truth::sql_query_guard_store()),
} }
})) }))
// Phase 38 — Universal API skeleton. Thin OpenAI-compatible // Phase 38 — Universal API skeleton. Thin OpenAI-compatible
@ -204,6 +270,86 @@ async fn main() {
} }
k k
}, },
openrouter_key: {
// 2026-04-24 free-tier rescue rung for iter 5+. Shares
// the LLM Team UI's OPENROUTER_API_KEY so both systems
// draw from one quota.
let k = v1::openrouter::resolve_openrouter_key();
if k.is_some() {
tracing::info!("v1: OpenRouter key loaded — /v1/chat provider=openrouter enabled");
} else {
tracing::warn!("v1: no OpenRouter key — openrouter rescue rung will 503");
}
k
},
gemini_key: {
// Phase 40 provider. GEMINI_API_KEY in env or .env.
let k = v1::gemini::resolve_gemini_key();
if k.is_some() {
tracing::info!("v1: Gemini key loaded — /v1/chat provider=gemini enabled");
} else {
tracing::debug!("v1: no Gemini key — provider=gemini will 503");
}
k
},
claude_key: {
// Phase 40 provider. ANTHROPIC_API_KEY in env or .env.
let k = v1::claude::resolve_claude_key();
if k.is_some() {
tracing::info!("v1: Claude key loaded — /v1/chat provider=claude enabled");
} else {
tracing::debug!("v1: no Claude key — provider=claude will 503");
}
k
},
kimi_key: {
// Direct Kimi For Coding (api.kimi.com) — bypasses the
// broken-upstream kimi-k2:1t and OpenRouter rate caps.
// Key from /etc/lakehouse/kimi.env (KIMI_API_KEY=sk-kimi-…).
let k = v1::kimi::resolve_kimi_key();
if k.is_some() {
tracing::info!("v1: Kimi key loaded — /v1/chat provider=kimi enabled (model=kimi-for-coding)");
} else {
tracing::debug!("v1: no Kimi key — provider=kimi will 503");
}
k
},
opencode_key: {
// OpenCode GO multi-vendor gateway — Claude Opus 4.7,
// GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
// Qwen + free-tier. Key from /etc/lakehouse/opencode.env.
let k = v1::opencode::resolve_opencode_key();
if k.is_some() {
tracing::info!("v1: OpenCode key loaded — /v1/chat provider=opencode enabled (40 models)");
} else {
tracing::debug!("v1: no OpenCode key — provider=opencode will 503");
}
k
},
validate_workers: {
// Load workers_500k.parquet snapshot for /v1/validate.
// Path overridable via LH_WORKERS_PARQUET env. Missing
// file is non-fatal — validators run schema/PII checks
// unaffected; only worker-existence checks fail clean.
let path_str = std::env::var("LH_WORKERS_PARQUET")
.unwrap_or_else(|_| "/home/profit/lakehouse/data/datasets/workers_500k.parquet".into());
let path = std::path::Path::new(&path_str);
if path.exists() {
match validator::staffing::parquet_lookup::load_workers_parquet(path) {
Ok(lookup) => {
tracing::info!("v1: workers parquet loaded from {} — /v1/validate worker-existence checks enabled", path_str);
lookup
}
Err(e) => {
tracing::warn!("v1: workers parquet at {} unreadable ({e}) — /v1/validate worker-existence checks will fail Consistency", path_str);
std::sync::Arc::new(validator::InMemoryWorkerLookup::new())
}
}
} else {
tracing::warn!("v1: workers parquet at {} not found — /v1/validate worker-existence checks will fail Consistency", path_str);
std::sync::Arc::new(validator::InMemoryWorkerLookup::new())
}
},
// Phase 40 early deliverable — Langfuse trace emitter. // Phase 40 early deliverable — Langfuse trace emitter.
// Defaults match mcp-server/tracing.ts conventions so // Defaults match mcp-server/tracing.ts conventions so
// gateway traces land in the same staffing project. // gateway traces land in the same staffing project.
@ -218,14 +364,19 @@ async fn main() {
}, },
})); }));
// Auth middleware (if enabled) // Auth middleware (if enabled) — P5-001 fix 2026-04-23:
// previously only inserted the ApiKey as an extension and never layered
// the middleware, so auth.enabled=true enforced nothing. Now wraps the
// router with from_fn_with_state, which calls api_key_auth on every
// request. /health is exempted inside the middleware (LB probes).
if config.auth.enabled { if config.auth.enabled {
if let Some(ref key) = config.auth.api_key { if let Some(ref key) = config.auth.api_key {
tracing::info!("API key auth enabled"); tracing::info!("API key auth enabled — enforcing on all routes except /health");
let api_key = auth::ApiKey(key.clone()); let api_key = auth::ApiKey(key.clone());
app = app.layer(axum::Extension(api_key)); app = app.layer(axum::middleware::from_fn_with_state(
// Note: auth middleware applied per-route in production api_key,
// For now, the ApiKey extension is available for handlers to check auth::api_key_auth,
));
} else { } else {
tracing::warn!("auth enabled but no api_key set — all requests allowed"); tracing::warn!("auth enabled but no api_key set — all requests allowed");
} }

View File

@ -3,12 +3,18 @@ pub mod service;
use queryd::context::QueryEngine; use queryd::context::QueryEngine;
use arrow::json::writer::{JsonArray, Writer as JsonWriter}; use arrow::json::writer::{JsonArray, Writer as JsonWriter};
use std::sync::Arc;
use truth::TruthStore;
/// State for the tool system. /// State for the tool system.
#[derive(Clone)] #[derive(Clone)]
pub struct ToolState { pub struct ToolState {
pub registry: registry::ToolRegistry, pub registry: registry::ToolRegistry,
pub query_fn: QueryExecutor, pub query_fn: QueryExecutor,
/// SQL guard (shared with queryd). Mirrors the queryd /sql truth
/// gate from P42-002 (9cc0ceb) — tools also execute model-
/// originated SQL, need the same destructive-verb block.
pub truth: Arc<TruthStore>,
} }
/// Wraps QueryEngine to provide a simple execute interface for tools. /// Wraps QueryEngine to provide a simple execute interface for tools.

View File

@ -67,24 +67,14 @@ pub struct ToolRegistry {
} }
impl ToolRegistry { impl ToolRegistry {
/// Build an empty registry. Callers in an async context should follow
/// this with `.register_defaults().await` if they want the built-in
/// staffing tools pre-installed — main.rs does exactly that.
pub fn new() -> Self { pub fn new() -> Self {
let registry = Self { Self {
tools: Arc::new(RwLock::new(HashMap::new())), tools: Arc::new(RwLock::new(HashMap::new())),
audit_log: Arc::new(RwLock::new(Vec::new())), audit_log: Arc::new(RwLock::new(Vec::new())),
}; }
// Register built-in staffing tools
tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(registry.register_defaults())
});
registry
}
pub fn new_with_defaults() -> Self {
let registry = Self {
tools: Arc::new(RwLock::new(HashMap::new())),
audit_log: Arc::new(RwLock::new(Vec::new())),
};
registry
} }
/// Register default staffing tools. /// Register default staffing tools.

View File

@ -7,7 +7,7 @@ use axum::{
}; };
use serde::Deserialize; use serde::Deserialize;
use super::registry::{Permission, ToolInvocation, ToolRegistry}; use super::registry::{ToolInvocation, ToolRegistry};
use crate::tools::ToolState; use crate::tools::ToolState;
pub fn router(state: ToolState) -> Router { pub fn router(state: ToolState) -> Router {
@ -92,6 +92,32 @@ async fn call_tool(
} }
}; };
// Truth gate — same contract as queryd /sql (P42-002). Rejects
// destructive verbs + empty SQL. Scrum iter 11 CF-1 + CF-2 on this
// file: tools executed model-provided SQL parameters without any
// validation. Close the gap here so the parallel surface has the
// same safety floor as queryd.
let ctx = serde_json::json!({ "sql": sql });
for outcome in state.truth.evaluate("sql_query", &ctx) {
if outcome.passed {
if let truth::RuleAction::Reject { message } | truth::RuleAction::Block { message } = &outcome.action {
tracing::warn!("tool {name}: SQL blocked by truth gate ({}): {message}", outcome.rule_id);
state.registry.log_invocation(ToolInvocation {
id: format!("inv-{}", chrono::Utc::now().timestamp_millis()),
tool_name: name.clone(),
agent: req.agent.clone(),
params: req.params.clone(),
permission: tool.permission.clone(),
timestamp: chrono::Utc::now(),
success: false,
error: Some(format!("truth gate: {message}")),
rows_returned: None,
}).await;
return Err((StatusCode::FORBIDDEN, message.clone()));
}
}
}
// Execute via query engine // Execute via query engine
let result = state.query_fn.execute(&sql).await; let result = state.query_fn.execute(&sql).await;

View File

@ -0,0 +1,222 @@
//! Claude (Anthropic) adapter.
//!
//! POST `https://api.anthropic.com/v1/messages`. Auth via `x-api-key`
//! header (not bearer) + required `anthropic-version` header. Payload
//! is NOT OpenAI-compatible — response text lives at
//! `content[0].text`. Phase 40 deliverable. System prompts travel in
//! a top-level `system` field, separate from the `messages` array.
use std::time::Duration;
use serde::{Deserialize, Serialize};
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
const CLAUDE_BASE_URL: &str = "https://api.anthropic.com/v1";
const CLAUDE_API_VERSION: &str = "2023-06-01";
const CLAUDE_TIMEOUT_SECS: u64 = 180;
pub fn resolve_claude_key() -> Option<String> {
if let Ok(k) = std::env::var("ANTHROPIC_API_KEY") {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
for path in ["/home/profit/.env", "/root/.env"] {
if let Ok(raw) = std::fs::read_to_string(path) {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("ANTHROPIC_API_KEY=") {
let k = rest.trim().trim_matches('"').trim_matches('\'');
if !k.is_empty() { return Some(k.to_string()); }
}
}
}
}
None
}
pub async fn chat(
key: &str,
req: &ChatRequest,
) -> Result<ChatResponse, String> {
// Strip the "claude/" prefix if the caller used the namespaced form.
let model = req.model.strip_prefix("claude/").unwrap_or(&req.model).to_string();
// Anthropic carries system prompts outside the messages array.
// Concatenate any system-role messages into a single system string;
// keep user + assistant messages in `messages`.
let mut system_parts: Vec<String> = Vec::new();
let mut msgs: Vec<AnMessage> = Vec::new();
for m in &req.messages {
if m.role == "system" {
system_parts.push(m.text());
} else {
// Anthropic expects strictly "user" or "assistant"; anything
// else we normalize to "user".
let role = if m.role == "assistant" { "assistant" } else { "user" };
msgs.push(AnMessage { role: role.to_string(), content: m.text() });
}
}
let system = if system_parts.is_empty() {
None
} else {
Some(system_parts.join("\n\n"))
};
let body = AnChatBody {
model: model.clone(),
messages: msgs,
max_tokens: req.max_tokens.unwrap_or(800),
temperature: req.temperature.unwrap_or(0.3),
system,
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(CLAUDE_TIMEOUT_SECS))
.build()
.map_err(|e| format!("build client: {e}"))?;
let t0 = std::time::Instant::now();
let resp = client
.post(format!("{}/messages", CLAUDE_BASE_URL))
.header("x-api-key", key)
.header("anthropic-version", CLAUDE_API_VERSION)
.json(&body)
.send()
.await
.map_err(|e| format!("api.anthropic.com unreachable: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_else(|_| "?".into());
return Err(format!("claude {}: {}", status, body));
}
let parsed: AnChatResponse = resp.json().await
.map_err(|e| format!("invalid claude response: {e}"))?;
let latency_ms = t0.elapsed().as_millis();
let text = parsed.content.into_iter()
.find(|b| b.block_type == "text")
.map(|b| b.text)
.unwrap_or_default();
let prompt_tokens = parsed.usage.as_ref().map(|u| u.input_tokens).unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
((chars + 3) / 4) as u32
});
let completion_tokens = parsed.usage.as_ref().map(|u| u.output_tokens).unwrap_or_else(|| {
((text.chars().count() + 3) / 4) as u32
});
tracing::info!(
target: "v1.chat",
provider = "claude",
model = %model,
prompt_tokens,
completion_tokens,
latency_ms = latency_ms as u64,
"claude chat completed",
);
Ok(ChatResponse {
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
object: "chat.completion",
created: chrono::Utc::now().timestamp(),
model,
choices: vec![Choice {
index: 0,
message: Message::new_text("assistant", text),
finish_reason: parsed.stop_reason.unwrap_or_else(|| "stop".into()),
}],
usage: UsageBlock {
prompt_tokens,
completion_tokens,
total_tokens: prompt_tokens + completion_tokens,
},
})
}
// -- Anthropic Messages API wire shapes --
#[derive(Serialize)]
struct AnChatBody {
model: String,
messages: Vec<AnMessage>,
max_tokens: u32,
temperature: f64,
#[serde(skip_serializing_if = "Option::is_none")]
system: Option<String>,
}
#[derive(Serialize)]
struct AnMessage { role: String, content: String }
#[derive(Deserialize)]
struct AnChatResponse {
content: Vec<AnContentBlock>,
#[serde(default, rename = "stop_reason")]
stop_reason: Option<String>,
#[serde(default)]
usage: Option<AnUsage>,
}
#[derive(Deserialize)]
struct AnContentBlock {
#[serde(rename = "type")]
block_type: String,
#[serde(default)]
text: String,
}
#[derive(Deserialize)]
struct AnUsage { input_tokens: u32, output_tokens: u32 }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_claude_key_does_not_panic() {
let _ = resolve_claude_key();
}
#[test]
fn chat_body_serializes_with_separate_system() {
let body = AnChatBody {
model: "claude-3-5-sonnet-latest".into(),
messages: vec![
AnMessage { role: "user".into(), content: "hi".into() },
],
max_tokens: 800,
temperature: 0.3,
system: Some("You are helpful.".into()),
};
let json = serde_json::to_string(&body).unwrap();
assert!(json.contains("\"system\":\"You are helpful.\""));
assert!(json.contains("\"messages\""));
assert!(json.contains("\"max_tokens\":800"));
}
#[test]
fn body_omits_system_when_none() {
let body = AnChatBody {
model: "claude-3-5-sonnet-latest".into(),
messages: vec![AnMessage { role: "user".into(), content: "hi".into() }],
max_tokens: 800,
temperature: 0.3,
system: None,
};
let json = serde_json::to_string(&body).unwrap();
assert!(!json.contains("\"system\""), "system field should be skipped when None: {json}");
}
#[test]
fn model_prefix_strip_preserves_bare_names() {
let cases = [
("claude/claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
("claude-3-5-sonnet-latest", "claude-3-5-sonnet-latest"),
];
for (input, expected) in cases {
let out = input.strip_prefix("claude/").unwrap_or(input);
assert_eq!(out, expected);
}
}
}

View File

@ -0,0 +1,230 @@
//! Gemini adapter — Google's Generative Language API.
//!
//! POST `https://generativelanguage.googleapis.com/v1beta/models/
//! {model}:generateContent?key=<API_KEY>`. Auth via query-string key
//! (not bearer). Payload shape is NOT OpenAI-compatible — we map
//! messages → contents + parts, extract response from `candidates[0]
//! .content.parts[0].text`. Phase 40 deliverable; gate: `/v1/chat`
//! with a prefixed or explicit gemini model returns normally.
use std::time::Duration;
use serde::{Deserialize, Serialize};
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
const GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
const GEMINI_TIMEOUT_SECS: u64 = 180;
pub fn resolve_gemini_key() -> Option<String> {
if let Ok(k) = std::env::var("GEMINI_API_KEY") {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
for path in ["/home/profit/.env", "/root/.env"] {
if let Ok(raw) = std::fs::read_to_string(path) {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("GEMINI_API_KEY=") {
let k = rest.trim().trim_matches('"').trim_matches('\'');
if !k.is_empty() { return Some(k.to_string()); }
}
}
}
}
None
}
pub async fn chat(
key: &str,
req: &ChatRequest,
) -> Result<ChatResponse, String> {
// Strip the "gemini/" prefix if the caller used the namespaced form.
let model = req.model.strip_prefix("gemini/").unwrap_or(&req.model).to_string();
// Gemini splits system prompt from conversation differently.
// Simplest working mapping: concatenate any system messages at the
// top of a single user turn, then append user/assistant turns as
// separate contents entries. Covers the common single-turn case
// the scrum pipeline uses.
let mut contents: Vec<GmContent> = Vec::new();
for m in &req.messages {
let role = match m.role.as_str() {
"system" | "user" => "user",
_ => "model",
};
contents.push(GmContent {
role: role.to_string(),
parts: vec![GmPart { text: m.text() }],
});
}
let body = GmChatBody {
contents,
generation_config: GmGenerationConfig {
temperature: req.temperature.unwrap_or(0.3),
max_output_tokens: req.max_tokens.unwrap_or(800),
},
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(GEMINI_TIMEOUT_SECS))
.build()
.map_err(|e| format!("build client: {e}"))?;
let url = format!("{}/models/{}:generateContent?key={}", GEMINI_BASE_URL, model, key);
let t0 = std::time::Instant::now();
let resp = client
.post(&url)
.json(&body)
.send()
.await
.map_err(|e| format!("generativelanguage.googleapis.com unreachable: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_else(|_| "?".into());
return Err(format!("gemini {}: {}", status, body));
}
let parsed: GmChatResponse = resp.json().await
.map_err(|e| format!("invalid gemini response: {e}"))?;
let latency_ms = t0.elapsed().as_millis();
let candidate = parsed.candidates.into_iter().next()
.ok_or_else(|| "gemini returned no candidates".to_string())?;
let text = candidate.content.parts.into_iter()
.next()
.map(|p| p.text)
.unwrap_or_default();
let prompt_tokens = parsed.usage_metadata.as_ref()
.map(|u| u.prompt_token_count)
.unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
((chars + 3) / 4) as u32
});
let completion_tokens = parsed.usage_metadata.as_ref()
.map(|u| u.candidates_token_count)
.unwrap_or_else(|| ((text.chars().count() + 3) / 4) as u32);
tracing::info!(
target: "v1.chat",
provider = "gemini",
model = %model,
prompt_tokens,
completion_tokens,
latency_ms = latency_ms as u64,
"gemini chat completed",
);
Ok(ChatResponse {
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
object: "chat.completion",
created: chrono::Utc::now().timestamp(),
model,
choices: vec![Choice {
index: 0,
message: Message::new_text("assistant", text),
finish_reason: candidate.finish_reason.unwrap_or_else(|| "stop".into()),
}],
usage: UsageBlock {
prompt_tokens,
completion_tokens,
total_tokens: prompt_tokens + completion_tokens,
},
})
}
// -- Gemini wire shapes --
#[derive(Serialize)]
struct GmChatBody {
contents: Vec<GmContent>,
#[serde(rename = "generationConfig")]
generation_config: GmGenerationConfig,
}
#[derive(Serialize)]
struct GmContent {
role: String,
parts: Vec<GmPart>,
}
#[derive(Serialize)]
struct GmPart { text: String }
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GmGenerationConfig {
temperature: f64,
max_output_tokens: u32,
}
#[derive(Deserialize)]
struct GmChatResponse {
candidates: Vec<GmCandidate>,
#[serde(default, rename = "usageMetadata")]
usage_metadata: Option<GmUsage>,
}
#[derive(Deserialize)]
struct GmCandidate {
content: GmContentResp,
#[serde(default, rename = "finishReason")]
finish_reason: Option<String>,
}
#[derive(Deserialize)]
struct GmContentResp { parts: Vec<GmPartResp> }
#[derive(Deserialize)]
struct GmPartResp { #[serde(default)] text: String }
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct GmUsage {
prompt_token_count: u32,
candidates_token_count: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_gemini_key_does_not_panic() {
let _ = resolve_gemini_key();
}
#[test]
fn chat_body_serializes_to_gemini_shape() {
let body = GmChatBody {
contents: vec![
GmContent {
role: "user".into(),
parts: vec![GmPart { text: "hello".into() }],
},
],
generation_config: GmGenerationConfig {
temperature: 0.3,
max_output_tokens: 800,
},
};
let json = serde_json::to_string(&body).unwrap();
assert!(json.contains("\"contents\""));
assert!(json.contains("\"parts\""));
// camelCase per Gemini API
assert!(json.contains("\"generationConfig\""));
assert!(json.contains("\"maxOutputTokens\":800"));
}
#[test]
fn model_prefix_strip_preserves_bare_names() {
let cases = [
("gemini/gemini-2.0-flash", "gemini-2.0-flash"),
("gemini-2.0-flash", "gemini-2.0-flash"),
];
for (input, expected) in cases {
let out = input.strip_prefix("gemini/").unwrap_or(input);
assert_eq!(out, expected);
}
}
}

View File

@ -0,0 +1,313 @@
//! /v1/iterate — the Phase 43 PRD's "generate → validate → correct → retry" loop.
//!
//! Closes the "0→85% with iteration" thesis structurally. A caller
//! posts a prompt + artifact kind + validation context; the gateway:
//! 1. Generates a JSON artifact via /v1/chat (any provider/model)
//! 2. Extracts the JSON object from the model output
//! 3. Validates via /v1/validate (FillValidator / EmailValidator /
//! PlaybookValidator with the shared WorkerLookup)
//! 4. On ValidationError, appends the error to the prompt and
//! retries up to `max_iterations` (default 3)
//! 5. Returns the accepted artifact + Report on success, OR the
//! attempt history + final error on max-iter exhaustion
//!
//! Internal calls go via HTTP loopback to localhost:gateway_port so
//! the same /v1/usage tracking and Langfuse traces apply. A small
//! latency cost (~1-3ms per loopback hop) for clean separation of
//! concerns and observability.
//!
//! 2026-04-27 Phase 43 v3 part 3: this endpoint makes the iteration
//! loop a first-class lakehouse capability rather than a per-caller
//! re-implementation. Staffing executors, agent loops, and future
//! validators all reach the same code path.
use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
use serde::{Deserialize, Serialize};
const DEFAULT_MAX_ITERATIONS: u32 = 3;
const LOOPBACK_TIMEOUT_SECS: u64 = 240;
#[derive(Deserialize)]
pub struct IterateRequest {
/// "fill" | "email" | "playbook" — picks which validator runs.
pub kind: String,
/// The prompt to seed generation. Validation errors from prior
/// attempts are appended on retry.
pub prompt: String,
/// Provider/model passed through to /v1/chat. e.g. "ollama_cloud"
/// + "kimi-k2.6", or "opencode" + "claude-haiku-4-5".
pub provider: String,
pub model: String,
/// Optional system prompt — sent to /v1/chat as the system message.
#[serde(default)]
pub system: Option<String>,
/// Validation context (target_count, city, state, role, client_id
/// for fills; candidate_id for emails). Forwarded to /v1/validate.
#[serde(default)]
pub context: Option<serde_json::Value>,
/// Cap on iteration count. Defaults to 3 per the Phase 43 PRD.
#[serde(default)]
pub max_iterations: Option<u32>,
/// Forwarded to /v1/chat. Defaults to 0.2 if unset.
#[serde(default)]
pub temperature: Option<f64>,
/// Forwarded to /v1/chat. Defaults to 4096 if unset.
#[serde(default)]
pub max_tokens: Option<u32>,
}
#[derive(Serialize)]
pub struct IterateAttempt {
pub iteration: u32,
pub raw: String,
pub status: AttemptStatus,
}
#[derive(Serialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum AttemptStatus {
/// Model output didn't contain extractable JSON.
NoJson,
/// JSON extracted but failed validation; carries the error.
ValidationFailed { error: serde_json::Value },
/// Validation passed (last attempt's terminal status).
Accepted,
}
#[derive(Serialize)]
pub struct IterateResponse {
pub artifact: serde_json::Value,
pub validation: serde_json::Value,
pub iterations: u32,
pub history: Vec<IterateAttempt>,
}
#[derive(Serialize)]
pub struct IterateFailure {
pub error: String,
pub iterations: u32,
pub history: Vec<IterateAttempt>,
}
pub async fn iterate(
State(state): State<super::V1State>,
Json(req): Json<IterateRequest>,
) -> impl IntoResponse {
let max_iter = req.max_iterations.unwrap_or(DEFAULT_MAX_ITERATIONS).max(1);
let temperature = req.temperature.unwrap_or(0.2);
let max_tokens = req.max_tokens.unwrap_or(4096);
let mut history: Vec<IterateAttempt> = Vec::with_capacity(max_iter as usize);
let mut current_prompt = req.prompt.clone();
let client = match reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(LOOPBACK_TIMEOUT_SECS))
.build() {
Ok(c) => c,
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("client build: {e}")).into_response(),
};
// Self-loopback to the gateway port. Carries gateway internal
// calls through /v1/chat + /v1/validate so /v1/usage tracks them.
let gateway = "http://127.0.0.1:3100";
for iteration in 0..max_iter {
// ── Generate ──
let mut messages = Vec::with_capacity(2);
if let Some(sys) = &req.system {
messages.push(serde_json::json!({"role": "system", "content": sys}));
}
messages.push(serde_json::json!({"role": "user", "content": current_prompt}));
let chat_body = serde_json::json!({
"messages": messages,
"provider": req.provider,
"model": req.model,
"temperature": temperature,
"max_tokens": max_tokens,
});
let raw = match call_chat(&client, gateway, &chat_body).await {
Ok(r) => r,
Err(e) => return (StatusCode::BAD_GATEWAY, format!("/v1/chat hop failed at iter {iteration}: {e}")).into_response(),
};
// ── Extract JSON ──
let artifact = match extract_json(&raw) {
Some(a) => a,
None => {
history.push(IterateAttempt {
iteration,
raw: raw.chars().take(2000).collect(),
status: AttemptStatus::NoJson,
});
current_prompt = format!(
"{}\n\nYour previous attempt did not contain a JSON object. Reply with ONLY a valid JSON object matching the requested artifact shape.",
req.prompt,
);
continue;
}
};
// ── Validate ──
let validate_body = serde_json::json!({
"kind": req.kind,
"artifact": artifact,
"context": req.context.clone().unwrap_or(serde_json::Value::Null),
});
match call_validate(&client, gateway, &validate_body).await {
Ok(report) => {
history.push(IterateAttempt {
iteration,
raw: raw.chars().take(2000).collect(),
status: AttemptStatus::Accepted,
});
return (StatusCode::OK, Json(IterateResponse {
artifact,
validation: report,
iterations: iteration + 1,
history,
})).into_response();
}
Err(err) => {
let err_summary = err.to_string();
history.push(IterateAttempt {
iteration,
raw: raw.chars().take(2000).collect(),
status: AttemptStatus::ValidationFailed {
error: serde_json::to_value(&err_summary).unwrap_or(serde_json::Value::Null),
},
});
// Append validation feedback to prompt for next iter.
// The model sees concrete failure mode + retries with
// corrective context. This is the "observer correction"
// in Phase 43 PRD shape, simplified — the validator
// itself IS the observer for now.
current_prompt = format!(
"{}\n\nPrior attempt failed validation:\n{}\n\nFix the specific issue above and respond with a corrected JSON object.",
req.prompt, err_summary,
);
continue;
}
}
}
(StatusCode::UNPROCESSABLE_ENTITY, Json(IterateFailure {
error: format!("max iterations reached ({max_iter}) without passing validation"),
iterations: max_iter,
history,
})).into_response()
}
async fn call_chat(client: &reqwest::Client, gateway: &str, body: &serde_json::Value) -> Result<String, String> {
let resp = client.post(format!("{gateway}/v1/chat"))
.json(body)
.send()
.await
.map_err(|e| format!("chat hop: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_default();
return Err(format!("chat {}: {}", status, body.chars().take(300).collect::<String>()));
}
let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("chat parse: {e}"))?;
Ok(parsed.pointer("/choices/0/message/content")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string())
}
async fn call_validate(client: &reqwest::Client, gateway: &str, body: &serde_json::Value) -> Result<serde_json::Value, String> {
let resp = client.post(format!("{gateway}/v1/validate"))
.json(body)
.send()
.await
.map_err(|e| format!("validate hop: {e}"))?;
let status = resp.status();
let parsed: serde_json::Value = resp.json().await.map_err(|e| format!("validate parse: {e}"))?;
if status.is_success() {
Ok(parsed)
} else {
// The /v1/validate endpoint returns a ValidationError JSON
// on 422; surface its structure verbatim so the prompt-
// appending step gets specific failure detail.
Err(serde_json::to_string(&parsed).unwrap_or_else(|_| format!("validation {} (unparseable body)", status)))
}
}
/// Extract the first JSON object from a model's output. Handles
/// fenced code blocks (```json ... ```), bare braces, and stray
/// prose around the JSON. Returns None on no extractable object.
fn extract_json(raw: &str) -> Option<serde_json::Value> {
// Try fenced first.
let candidates: Vec<String> = {
let mut out = vec![];
let mut s = raw;
while let Some(start) = s.find("```") {
let after = &s[start + 3..];
// Skip optional language tag (json, etc.)
let body_start = after.find('\n').map(|n| n + 1).unwrap_or(0);
let body = &after[body_start..];
if let Some(end) = body.find("```") {
out.push(body[..end].trim().to_string());
s = &body[end + 3..];
} else { break; }
}
out
};
for c in &candidates {
if let Ok(v) = serde_json::from_str::<serde_json::Value>(c) {
if v.is_object() { return Some(v); }
}
}
// Fall back to outermost {...} balance.
let bytes = raw.as_bytes();
let mut depth = 0i32;
let mut start: Option<usize> = None;
for (i, &b) in bytes.iter().enumerate() {
match b {
b'{' => { if start.is_none() { start = Some(i); } depth += 1; }
b'}' => {
depth -= 1;
if depth == 0 {
if let Some(s) = start {
let slice = &raw[s..=i];
if let Ok(v) = serde_json::from_str::<serde_json::Value>(slice) {
if v.is_object() { return Some(v); }
}
start = None;
}
}
}
_ => {}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_json_from_fenced_block() {
let raw = "Here's my answer:\n```json\n{\"fills\": [{\"candidate_id\": \"W-1\"}]}\n```\nDone.";
let v = extract_json(raw).unwrap();
assert!(v.get("fills").is_some());
}
#[test]
fn extract_json_from_bare_braces() {
let raw = "Here you go: {\"fills\": [{\"candidate_id\": \"W-2\"}]}";
let v = extract_json(raw).unwrap();
assert!(v.get("fills").is_some());
}
#[test]
fn extract_json_returns_none_on_no_object() {
assert!(extract_json("just prose, no json").is_none());
}
#[test]
fn extract_json_picks_first_balanced() {
let raw = "{\"a\":1} then {\"b\":2}";
let v = extract_json(raw).unwrap();
assert_eq!(v.get("a").and_then(|v| v.as_i64()), Some(1));
}
}

View File

@ -0,0 +1,227 @@
//! Kimi For Coding adapter — direct provider for `kimi-for-coding`
//! (kimi-k2.6 underneath). Used when Ollama Cloud's `kimi-k2:1t` is
//! returning sustained 5xx (broken upstream) and OpenRouter's
//! `moonshotai/kimi-k2.6` is rate-limited.
//!
//! Endpoint per `kimi.com/code/docs` and `moonshotai.github.io/kimi-cli`:
//! base_url: https://api.kimi.com/coding/v1
//! model id: kimi-for-coding
//! auth: Bearer sk-kimi-…
//! protocol: OpenAI Chat Completions compatible
//!
//! IMPORTANT: `api.kimi.com` is a separate account system from
//! `api.moonshot.ai` and `api.moonshot.cn`. Keys are NOT interchangeable.
//! This adapter is for `sk-kimi-*` keys provisioned via the Kimi
//! membership console only.
//!
//! Key sourcing priority:
//! 1. Env var `KIMI_API_KEY` (loaded from /etc/lakehouse/kimi.env via
//! systemd EnvironmentFile=)
//! 2. /etc/lakehouse/kimi.env directly (rescue path if env not loaded)
//!
//! First hit wins. Resolved once at gateway startup, stored on
//! `V1State.kimi_key`.
use std::time::Duration;
use serde::{Deserialize, Serialize};
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
const KIMI_BASE_URL: &str = "https://api.kimi.com/coding/v1";
// Default 600s — kimi-for-coding is a reasoning model; on large
// code-audit prompts (~50KB+ input + 8K output) it routinely needs
// 3-8 min to think + emit. Override with KIMI_TIMEOUT_SECS env var.
const KIMI_TIMEOUT_SECS_DEFAULT: u64 = 600;
fn kimi_timeout_secs() -> u64 {
std::env::var("KIMI_TIMEOUT_SECS")
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
.filter(|&n| n > 0)
.unwrap_or(KIMI_TIMEOUT_SECS_DEFAULT)
}
pub fn resolve_kimi_key() -> Option<String> {
if let Ok(k) = std::env::var("KIMI_API_KEY") {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/kimi.env") {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("KIMI_API_KEY=") {
let k = rest.trim().trim_matches('"').trim_matches('\'');
if !k.is_empty() { return Some(k.to_string()); }
}
}
}
None
}
pub async fn chat(
key: &str,
req: &ChatRequest,
) -> Result<ChatResponse, String> {
// Strip the "kimi/" namespace prefix if the caller used it so the
// upstream API sees the bare model id (e.g. "kimi-for-coding").
let model = req.model.strip_prefix("kimi/").unwrap_or(&req.model).to_string();
// Flatten content to a plain String. api.kimi.com is text-only on
// the coding endpoint; the OpenAI multimodal array shape
// ([{type:"text",text:"..."},{type:"image_url",...}]) returns 400.
// Message::text() concats text-parts and drops non-text. Caught
// 2026-04-27 by Kimi's self-audit (kimi.rs:137 — content as raw
// serde_json::Value risked upstream rejection).
let body = KimiChatBody {
model: model.clone(),
messages: req.messages.iter().map(|m| KimiMessage {
role: m.role.clone(),
content: serde_json::Value::String(m.text()),
}).collect(),
max_tokens: req.max_tokens.unwrap_or(800),
temperature: req.temperature.unwrap_or(0.3),
stream: false,
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(kimi_timeout_secs()))
.build()
.map_err(|e| format!("build client: {e}"))?;
let t0 = std::time::Instant::now();
let resp = client
.post(format!("{}/chat/completions", KIMI_BASE_URL))
.bearer_auth(key)
// api.kimi.com gates this endpoint by User-Agent — only sanctioned
// coding agents (Claude Code, Kimi CLI, Roo Code, Kilo Code) get
// through. Generic clients receive 403 access_terminated_error.
// J accepted the TOS risk on 2026-04-27; revisit if Moonshot
// tightens enforcement.
.header("User-Agent", "claude-code/1.0.0")
.json(&body)
.send()
.await
.map_err(|e| format!("api.kimi.com unreachable: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_else(|_| "?".into());
return Err(format!("api.kimi.com {}: {}", status, body));
}
let parsed: KimiChatResponse = resp.json().await
.map_err(|e| format!("invalid kimi response: {e}"))?;
let latency_ms = t0.elapsed().as_millis();
let choice = parsed.choices.into_iter().next()
.ok_or_else(|| "kimi returned no choices".to_string())?;
let text = choice.message.content;
let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
((chars + 3) / 4) as u32
});
let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
((text.chars().count() + 3) / 4) as u32
});
tracing::info!(
target: "v1.chat",
provider = "kimi",
model = %model,
prompt_tokens,
completion_tokens,
latency_ms = latency_ms as u64,
"kimi chat completed",
);
Ok(ChatResponse {
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
object: "chat.completion",
created: chrono::Utc::now().timestamp(),
model,
choices: vec![Choice {
index: 0,
message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
}],
usage: UsageBlock {
prompt_tokens,
completion_tokens,
total_tokens: prompt_tokens + completion_tokens,
},
})
}
// -- Kimi wire shapes (OpenAI-compatible) --
#[derive(Serialize)]
struct KimiChatBody {
model: String,
messages: Vec<KimiMessage>,
max_tokens: u32,
temperature: f64,
stream: bool,
}
#[derive(Serialize)]
struct KimiMessage { role: String, content: serde_json::Value }
#[derive(Deserialize)]
struct KimiChatResponse {
choices: Vec<KimiChoice>,
#[serde(default)]
usage: Option<KimiUsage>,
}
#[derive(Deserialize)]
struct KimiChoice {
message: KimiMessageResp,
#[serde(default)]
finish_reason: Option<String>,
}
#[derive(Deserialize)]
struct KimiMessageResp { content: String }
#[derive(Deserialize)]
struct KimiUsage { prompt_tokens: u32, completion_tokens: u32 }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_kimi_key_does_not_panic() {
let _ = resolve_kimi_key();
}
#[test]
fn chat_body_serializes_to_openai_shape() {
let body = KimiChatBody {
model: "kimi-for-coding".into(),
messages: vec![
KimiMessage { role: "user".into(), content: "review this".into() },
],
max_tokens: 800,
temperature: 0.3,
stream: false,
};
let json = serde_json::to_string(&body).unwrap();
assert!(json.contains("\"model\":\"kimi-for-coding\""));
assert!(json.contains("\"messages\""));
assert!(json.contains("\"max_tokens\":800"));
assert!(json.contains("\"stream\":false"));
}
#[test]
fn model_prefix_strip() {
let cases = [
("kimi/kimi-for-coding", "kimi-for-coding"),
("kimi-for-coding", "kimi-for-coding"),
("kimi/kimi-k2.6", "kimi-k2.6"),
];
for (input, expected) in cases {
let out = input.strip_prefix("kimi/").unwrap_or(input);
assert_eq!(out, expected, "{input} should become {expected}");
}
}
}

View File

@ -13,7 +13,17 @@
pub mod ollama; pub mod ollama;
pub mod ollama_cloud; pub mod ollama_cloud;
pub mod openrouter;
pub mod gemini;
pub mod claude;
pub mod kimi;
pub mod opencode;
pub mod validate;
pub mod iterate;
pub mod langfuse_trace; pub mod langfuse_trace;
pub mod mode;
pub mod respond;
pub mod truth;
use axum::{ use axum::{
Router, Router,
@ -24,7 +34,7 @@ use axum::{
Json, Json,
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{collections::HashMap, sync::Arc}; use std::sync::Arc;
use tokio::sync::RwLock; use tokio::sync::RwLock;
#[derive(Clone)] #[derive(Clone)]
@ -34,6 +44,41 @@ pub struct V1State {
/// Ollama Cloud bearer token. Loaded at startup via /// Ollama Cloud bearer token. Loaded at startup via
/// `ollama_cloud::resolve_cloud_key()`. None = cloud routes 503. /// `ollama_cloud::resolve_cloud_key()`. None = cloud routes 503.
pub ollama_cloud_key: Option<String>, pub ollama_cloud_key: Option<String>,
/// OpenRouter bearer token — free-tier rescue rung. Loaded at
/// startup via `openrouter::resolve_openrouter_key()`. None means
/// provider="openrouter" calls 503 rather than attempt. Same key
/// sourcing as LLM Team UI so the two share one API quota.
pub openrouter_key: Option<String>,
/// Gemini API key (Google Generative Language). Loaded at startup
/// via `gemini::resolve_gemini_key()`. None = provider="gemini"
/// calls 503. Phase 40 deliverable.
pub gemini_key: Option<String>,
/// Anthropic Claude API key. Loaded at startup via
/// `claude::resolve_claude_key()`. None = provider="claude" calls
/// 503. Phase 40 deliverable.
pub claude_key: Option<String>,
/// Kimi For Coding (api.kimi.com) bearer token — direct provider
/// for `kimi-for-coding`. Used when Ollama Cloud's `kimi-k2:1t` is
/// upstream-broken. Loaded at startup via `kimi::resolve_kimi_key()`
/// from `KIMI_API_KEY` env or `/etc/lakehouse/kimi.env`. None =
/// provider="kimi" calls 503.
pub kimi_key: Option<String>,
/// OpenCode GO (opencode.ai) bearer token — multi-vendor curated
/// gateway. One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro,
/// Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM, Qwen + free-tier.
/// Loaded at startup via `opencode::resolve_opencode_key()` from
/// `OPENCODE_API_KEY` env or `/etc/lakehouse/opencode.env`. None =
/// provider="opencode" calls 503.
pub opencode_key: Option<String>,
/// Shared WorkerLookup loaded once at startup from
/// workers_500k.parquet (path: LH_WORKERS_PARQUET env, default
/// data/datasets/workers_500k.parquet). Used by /v1/validate to
/// run FillValidator/EmailValidator with worker-existence checks.
/// Falls back to an empty InMemoryWorkerLookup if the file is
/// missing — validators still run schema/PII checks but every
/// worker-existence check fails (Consistency error), which is
/// the correct behavior when the roster isn't configured.
pub validate_workers: std::sync::Arc<dyn validator::WorkerLookup>,
/// Phase 40 early deliverable — Langfuse client. None = tracing /// Phase 40 early deliverable — Langfuse client. None = tracing
/// disabled (keys missing or container unreachable). Traces are /// disabled (keys missing or container unreachable). Traces are
/// fire-and-forget: never block the response path. /// fire-and-forget: never block the response path.
@ -61,20 +106,73 @@ pub struct ProviderUsage {
pub fn router(state: V1State) -> Router { pub fn router(state: V1State) -> Router {
Router::new() Router::new()
.route("/chat", post(chat)) .route("/chat", post(chat))
// Canonical OpenAI path alias — lets any client built on the
// openai SDK (pi-ai, langchain-js, etc.) treat the gateway as
// a drop-in middleware via OPENAI_BASE_URL=http://gw/v1 alone.
// Same handler as /chat; same OpenAI-compatible request shape.
.route("/chat/completions", post(chat))
.route("/respond", post(respond::respond))
.route("/usage", get(usage)) .route("/usage", get(usage))
.route("/sessions", get(sessions)) .route("/sessions", get(sessions))
.route("/context", get(truth::context))
.route("/mode", post(mode::route))
.route("/mode/list", get(mode::list))
.route("/mode/execute", post(mode::execute))
.route("/validate", post(validate::validate))
.route("/iterate", post(iterate::iterate))
.route("/health", get(health))
.with_state(state) .with_state(state)
} }
// -- Shared types (OpenAI-compatible) -- // -- Shared types (OpenAI-compatible) --
/// OpenAI-compatible message. `content` accepts either a plain string or
/// an array of content parts (the modern multimodal shape:
/// `[{type:"text", text:"..."}, {type:"image_url", ...}]`). We store as
/// `serde_json::Value` to preserve client shape on forward; downstream
/// providers can take it verbatim. `Message::text()` flattens for
/// places that need a plain string (Ollama prompt assembly, char
/// counts, the assistant's own response synthesis).
#[derive(Serialize, Deserialize, Clone, Debug)] #[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Message { pub struct Message {
pub role: String, pub role: String,
pub content: String, pub content: serde_json::Value,
} }
#[derive(Deserialize, Debug)] impl Message {
/// Construct a plain text message — the common shape for callers
/// that don't need multimodal content. Wraps the body in
/// `serde_json::Value::String` so downstream serializers see the
/// canonical OpenAI shape.
pub fn new_text(role: impl Into<String>, body: impl Into<String>) -> Self {
Self {
role: role.into(),
content: serde_json::Value::String(body.into()),
}
}
/// Flatten content to a plain string. Strings pass through; content-
/// part arrays concatenate the `text` fields with newlines and skip
/// non-text parts (images etc.) — Phase 38/39 callers are text-only,
/// real multimodal forwarding is queued.
pub fn text(&self) -> String {
match &self.content {
serde_json::Value::String(s) => s.clone(),
serde_json::Value::Array(parts) => {
let mut out = String::new();
for p in parts {
if let Some(t) = p.get("text").and_then(|v| v.as_str()) {
if !out.is_empty() { out.push('\n'); }
out.push_str(t);
}
}
out
}
other => other.to_string(),
}
}
}
#[derive(Deserialize, Debug, Clone)]
pub struct ChatRequest { pub struct ChatRequest {
pub model: String, pub model: String,
pub messages: Vec<Message>, pub messages: Vec<Message>,
@ -130,6 +228,137 @@ pub struct UsageBlock {
// -- Handlers -- // -- Handlers --
/// Phase 39: resolve (provider, effective_model) from a ChatRequest.
///
/// Explicit `req.provider` wins. If absent, infer from a model-name
/// prefix: "openrouter/..." → openrouter (strip prefix), "cloud/..." →
/// ollama_cloud (strip prefix). Bare names default to "ollama".
///
/// The stripped model is what the upstream adapter expects:
/// OpenRouter's API wants "openai/gpt-4o-mini", not
/// "openrouter/openai/gpt-4o-mini".
fn resolve_provider(req: &ChatRequest) -> (String, String) {
if let Some(p) = req.provider.as_deref() {
return (p.to_ascii_lowercase(), req.model.clone());
}
if let Some(rest) = req.model.strip_prefix("openrouter/") {
return ("openrouter".to_string(), rest.to_string());
}
if let Some(rest) = req.model.strip_prefix("cloud/") {
return ("ollama_cloud".to_string(), rest.to_string());
}
if let Some(rest) = req.model.strip_prefix("gemini/") {
return ("gemini".to_string(), rest.to_string());
}
if let Some(rest) = req.model.strip_prefix("claude/") {
return ("claude".to_string(), rest.to_string());
}
if let Some(rest) = req.model.strip_prefix("kimi/") {
return ("kimi".to_string(), rest.to_string());
}
if let Some(rest) = req.model.strip_prefix("opencode/") {
return ("opencode".to_string(), rest.to_string());
}
// Bare `vendor/model` shape (e.g. `x-ai/grok-4.1-fast`,
// `moonshotai/kimi-k2`, `openai/gpt-oss-120b:free`) → OpenRouter.
// This makes the gateway a drop-in OpenAI-compatible middleware:
// clients using the official `openai` SDK only set OPENAI_BASE_URL
// + a model name and get correct upstream routing without needing
// our custom `provider` field. Ollama models in J's stack use
// `model:tag` form with NO slash (`qwen3.5:latest`, `kimi-k2:1t`),
// so a slash here unambiguously means "namespaced provider/model".
if req.model.contains('/') {
return ("openrouter".to_string(), req.model.clone());
}
// Vendor-bare model names (no slash, no colon) — `gpt-4o-mini`,
// `claude-3-5-sonnet-20241022`, etc. Tools like pi-ai validate
// models against an OpenAI-style catalog (no namespace prefix),
// so they send the bare name. Map to OpenRouter's namespaced form
// by inferring the vendor from the leading token. Falls through to
// ollama if no pattern matches — preserves existing behavior.
if !req.model.contains(':') && !req.model.contains('/') {
let m = req.model.as_str();
if m.starts_with("gpt-") || m.starts_with("o1-") || m.starts_with("o3-") || m.starts_with("o4-") || m == "o1" || m == "o3" || m == "o4-mini" {
return ("openrouter".to_string(), format!("openai/{}", m));
}
if m.starts_with("claude-") {
return ("openrouter".to_string(), format!("anthropic/{}", m));
}
if m.starts_with("grok-") {
return ("openrouter".to_string(), format!("x-ai/{}", m));
}
}
("ollama".to_string(), req.model.clone())
}
#[cfg(test)]
mod resolve_provider_tests {
use super::*;
fn mk_req(provider: Option<&str>, model: &str) -> ChatRequest {
ChatRequest {
model: model.to_string(),
messages: vec![],
temperature: None,
max_tokens: None,
stream: None,
think: None,
provider: provider.map(|s| s.to_string()),
}
}
#[test]
fn explicit_provider_wins() {
let r = mk_req(Some("openrouter"), "qwen3.5:latest");
assert_eq!(resolve_provider(&r), ("openrouter".into(), "qwen3.5:latest".into()));
}
#[test]
fn bare_model_defaults_to_ollama() {
let r = mk_req(None, "qwen3.5:latest");
assert_eq!(resolve_provider(&r), ("ollama".into(), "qwen3.5:latest".into()));
}
#[test]
fn openrouter_prefix_infers_and_strips() {
let r = mk_req(None, "openrouter/openai/gpt-4o-mini");
assert_eq!(resolve_provider(&r), ("openrouter".into(), "openai/gpt-4o-mini".into()));
}
#[test]
fn cloud_prefix_infers_and_strips() {
let r = mk_req(None, "cloud/kimi-k2:1t");
assert_eq!(resolve_provider(&r), ("ollama_cloud".into(), "kimi-k2:1t".into()));
}
#[test]
fn explicit_provider_preserves_full_model_even_with_prefix() {
// If caller provides both provider and a model with a prefix,
// trust them — don't strip. The adapter will get the full model
// string as-is.
let r = mk_req(Some("openrouter"), "openrouter/openai/gpt-4o-mini");
assert_eq!(resolve_provider(&r), ("openrouter".into(), "openrouter/openai/gpt-4o-mini".into()));
}
#[test]
fn gemini_prefix_infers_and_strips() {
let r = mk_req(None, "gemini/gemini-2.0-flash");
assert_eq!(resolve_provider(&r), ("gemini".into(), "gemini-2.0-flash".into()));
}
#[test]
fn claude_prefix_infers_and_strips() {
let r = mk_req(None, "claude/claude-3-5-sonnet-latest");
assert_eq!(resolve_provider(&r), ("claude".into(), "claude-3-5-sonnet-latest".into()));
}
#[test]
fn kimi_prefix_infers_and_strips() {
let r = mk_req(None, "kimi/kimi-for-coding");
assert_eq!(resolve_provider(&r), ("kimi".into(), "kimi-for-coding".into()));
}
}
async fn chat( async fn chat(
State(state): State<V1State>, State(state): State<V1State>,
Json(req): Json<ChatRequest>, Json(req): Json<ChatRequest>,
@ -141,13 +370,29 @@ async fn chat(
tracing::warn!("/v1/chat: stream=true requested but Phase 38 returns non-streaming"); tracing::warn!("/v1/chat: stream=true requested but Phase 38 returns non-streaming");
} }
let provider = req.provider.as_deref().unwrap_or("ollama").to_ascii_lowercase(); // Provider resolution: explicit `req.provider` wins; otherwise
// infer from a model-name prefix. Phase 39 PRD gate example:
// `model: "openrouter/openai/gpt-4o-mini"` → provider "openrouter",
// adapter gets the stripped "openai/gpt-4o-mini".
let (provider, effective_model) = resolve_provider(&req);
let start_time = chrono::Utc::now(); let start_time = chrono::Utc::now();
let start_instant = std::time::Instant::now(); let start_instant = std::time::Instant::now();
// If we stripped a prefix, clone req with the effective model so
// the adapter sees what the upstream provider expects (OpenRouter
// wants "openai/gpt-4o-mini", not "openrouter/openai/gpt-4o-mini").
let req_for_adapter: std::borrow::Cow<'_, ChatRequest> =
if effective_model == req.model {
std::borrow::Cow::Borrowed(&req)
} else {
let mut cloned = req.clone();
cloned.model = effective_model.clone();
std::borrow::Cow::Owned(cloned)
};
let (resp, used_provider) = match provider.as_str() { let (resp, used_provider) = match provider.as_str() {
"ollama" | "local" | "" => { "ollama" | "local" | "" => {
let r = ollama::chat(&state.ai_client, &req) let r = ollama::chat(&state.ai_client, &*req_for_adapter)
.await .await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama local: {e}")))?; .map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama local: {e}")))?;
(r, "ollama".to_string()) (r, "ollama".to_string())
@ -157,15 +402,79 @@ async fn chat(
StatusCode::SERVICE_UNAVAILABLE, StatusCode::SERVICE_UNAVAILABLE,
"OLLAMA_CLOUD_KEY not configured".to_string(), "OLLAMA_CLOUD_KEY not configured".to_string(),
))?; ))?;
let r = ollama_cloud::chat(key, &req) let r = ollama_cloud::chat(key, &*req_for_adapter)
.await .await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama cloud: {e}")))?; .map_err(|e| (StatusCode::BAD_GATEWAY, format!("ollama cloud: {e}")))?;
(r, "ollama_cloud".to_string()) (r, "ollama_cloud".to_string())
} }
"openrouter" | "openrouter_free" => {
// Free-tier rescue rung. Added 2026-04-24 after iter 5
// repeated Ollama Cloud 502s on kimi-k2:1t — OpenRouter
// gives a different provider backbone as fallback.
let key = state.openrouter_key.as_deref().ok_or((
StatusCode::SERVICE_UNAVAILABLE,
"OPENROUTER_API_KEY not configured".to_string(),
))?;
let r = openrouter::chat(key, &*req_for_adapter)
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("openrouter: {e}")))?;
(r, "openrouter".to_string())
}
"gemini" => {
// Phase 40 provider adapter. Google Generative Language
// API via query-string key auth (not bearer).
let key = state.gemini_key.as_deref().ok_or((
StatusCode::SERVICE_UNAVAILABLE,
"GEMINI_API_KEY not configured".to_string(),
))?;
let r = gemini::chat(key, &*req_for_adapter)
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("gemini: {e}")))?;
(r, "gemini".to_string())
}
"claude" | "anthropic" => {
// Phase 40 provider adapter. Anthropic Messages API via
// x-api-key header + anthropic-version:2023-06-01.
let key = state.claude_key.as_deref().ok_or((
StatusCode::SERVICE_UNAVAILABLE,
"ANTHROPIC_API_KEY not configured".to_string(),
))?;
let r = claude::chat(key, &*req_for_adapter)
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("claude: {e}")))?;
(r, "claude".to_string())
}
"kimi" => {
// Direct Kimi For Coding provider — bypasses Ollama Cloud's
// upstream-broken kimi-k2:1t and OpenRouter's rate-limited
// moonshotai/kimi-k2.6. Uses sk-kimi-* keys from the Kimi
// membership console.
let key = state.kimi_key.as_deref().ok_or((
StatusCode::SERVICE_UNAVAILABLE,
"KIMI_API_KEY not configured".to_string(),
))?;
let r = kimi::chat(key, &*req_for_adapter)
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("kimi: {e}")))?;
(r, "kimi".to_string())
}
"opencode" => {
// OpenCode GO multi-vendor gateway — Claude Opus 4.7,
// GPT-5.5-pro, Gemini 3.1-pro, Kimi K2.6, DeepSeek, GLM,
// Qwen, free-tier. OpenAI-compat at opencode.ai/zen/go/v1.
let key = state.opencode_key.as_deref().ok_or((
StatusCode::SERVICE_UNAVAILABLE,
"OPENCODE_API_KEY not configured".to_string(),
))?;
let r = opencode::chat(key, &*req_for_adapter)
.await
.map_err(|e| (StatusCode::BAD_GATEWAY, format!("opencode: {e}")))?;
(r, "opencode".to_string())
}
other => { other => {
return Err(( return Err((
StatusCode::BAD_REQUEST, StatusCode::BAD_REQUEST,
format!("unknown provider '{other}' — supported: ollama, ollama_cloud"), format!("unknown provider '{other}' — supported: ollama, ollama_cloud, openrouter, gemini, claude, kimi, opencode"),
)); ));
} }
}; };
@ -179,7 +488,7 @@ async fn chat(
// untouched. // untouched.
if let Some(lf) = &state.langfuse { if let Some(lf) = &state.langfuse {
let output = resp.choices.first() let output = resp.choices.first()
.map(|c| c.message.content.clone()) .map(|c| c.message.text())
.unwrap_or_default(); .unwrap_or_default();
lf.emit_chat(langfuse_trace::ChatTrace { lf.emit_chat(langfuse_trace::ChatTrace {
provider: used_provider.clone(), provider: used_provider.clone(),
@ -197,6 +506,46 @@ async fn chat(
}); });
} }
// Phase 40 part 2 — fire-and-forget /event to observer at :3800.
// Same ring-buffer that scrum + scenario events land in, so any
// tool-routed-through-our-gateway (Pi, Archon, openai SDK clients)
// shows up alongside scrum_master events for KB consolidation +
// pathway-memory + bug-fingerprint compounding. Best-effort:
// observer being down doesn't block the chat response.
{
let provider = used_provider.clone();
let model = resp.model.clone();
let prompt_tokens = resp.usage.prompt_tokens;
let completion_tokens = resp.usage.completion_tokens;
let success = true;
tokio::spawn(async move {
let body = serde_json::json!({
"endpoint": "/v1/chat",
"source": "v1.chat",
"event_kind": "chat_completion",
"input_summary": format!(
"{} {} prompt={}t",
provider, model, prompt_tokens
),
"output_summary": format!(
"completion={}t {}ms",
completion_tokens, latency_ms
),
"success": success,
"duration_ms": latency_ms,
});
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(2))
.build()
.unwrap_or_else(|_| reqwest::Client::new());
let _ = client
.post("http://localhost:3800/event")
.json(&body)
.send()
.await;
});
}
// Phase 40: per-provider usage tracking // Phase 40: per-provider usage tracking
{ {
let mut u = state.usage.write().await; let mut u = state.usage.write().await;
@ -220,6 +569,43 @@ async fn usage(State(state): State<V1State>) -> impl IntoResponse {
Json(snapshot) Json(snapshot)
} }
/// Production operational health endpoint.
///
/// `/v1/health` reports per-subsystem status as a JSON object so an
/// operator (or the lakehouse-auditor service, or a load balancer)
/// can verify the gateway is fully booted, has its provider keys
/// loaded, the worker roster is hot, and Langfuse is reachable.
/// Returns 200 always — fields are observed-state, not pass/fail
/// gates. A monitoring tool should evaluate the booleans + counts
/// against its own thresholds.
async fn health(State(state): State<V1State>) -> impl IntoResponse {
// Honest worker count via WorkerLookup::len. Production switchover
// verification: after swapping workers_500k.parquet → real Chicago
// data and restarting, this number should match the row count of
// the new file. 0 means the file was missing / unreadable / had a
// schema mismatch and the gateway booted with the empty fallback.
let workers_count = state.validate_workers.len();
let providers_configured = serde_json::json!({
"ollama_cloud": state.ollama_cloud_key.is_some(),
"openrouter": state.openrouter_key.is_some(),
"kimi": state.kimi_key.is_some(),
"opencode": state.opencode_key.is_some(),
"gemini": state.gemini_key.is_some(),
"claude": state.claude_key.is_some(),
});
let langfuse_configured = state.langfuse.is_some();
let usage_snapshot = state.usage.read().await.clone();
Json(serde_json::json!({
"status": "ok",
"workers_count": workers_count,
"workers_loaded": workers_count > 0,
"providers_configured": providers_configured,
"langfuse_configured": langfuse_configured,
"usage_total_requests": usage_snapshot.requests,
"usage_by_provider": usage_snapshot.by_provider.keys().collect::<Vec<_>>(),
}))
}
// Phase 38 is stateless — no session persistence yet. Return an empty // Phase 38 is stateless — no session persistence yet. Return an empty
// list in OpenAI-ish shape so clients that probe this endpoint don't // list in OpenAI-ish shape so clients that probe this endpoint don't
// 404. Real session state lands in Phase 41 with the profile-system // 404. Real session state lands in Phase 41 with the profile-system
@ -251,7 +637,7 @@ mod tests {
assert_eq!(r.model, "qwen3.5:latest"); assert_eq!(r.model, "qwen3.5:latest");
assert_eq!(r.messages.len(), 2); assert_eq!(r.messages.len(), 2);
assert_eq!(r.messages[0].role, "system"); assert_eq!(r.messages[0].role, "system");
assert_eq!(r.messages[1].content, "Hi"); assert_eq!(r.messages[1].text(), "Hi");
assert_eq!(r.temperature, Some(0.2)); assert_eq!(r.temperature, Some(0.2));
assert_eq!(r.max_tokens, Some(100)); assert_eq!(r.max_tokens, Some(100));
} }

File diff suppressed because it is too large Load Diff

View File

@ -60,10 +60,7 @@ pub async fn chat(client: &AiClient, req: &ChatRequest) -> Result<ChatResponse,
model: resp.model, model: resp.model,
choices: vec![Choice { choices: vec![Choice {
index: 0, index: 0,
message: Message { message: Message::new_text("assistant", resp.text),
role: "assistant".into(),
content: resp.text,
},
finish_reason: "stop".into(), finish_reason: "stop".into(),
}], }],
usage: UsageBlock { usage: UsageBlock {
@ -89,13 +86,14 @@ fn flatten_messages(messages: &[Message]) -> (String, String) {
let mut system = String::new(); let mut system = String::new();
let mut prompt = String::new(); let mut prompt = String::new();
for m in messages { for m in messages {
let body = m.text();
if m.role == "system" { if m.role == "system" {
if !system.is_empty() { system.push('\n'); } if !system.is_empty() { system.push('\n'); }
system.push_str(&m.content); system.push_str(&body);
} else { } else {
prompt.push_str(&m.role); prompt.push_str(&m.role);
prompt.push_str(": "); prompt.push_str(": ");
prompt.push_str(&m.content); prompt.push_str(&body);
prompt.push_str("\n\n"); prompt.push_str("\n\n");
} }
} }
@ -104,7 +102,7 @@ fn flatten_messages(messages: &[Message]) -> (String, String) {
} }
fn estimate_prompt_tokens(messages: &[Message]) -> u32 { fn estimate_prompt_tokens(messages: &[Message]) -> u32 {
let chars: usize = messages.iter().map(|m| m.content.chars().count()).sum(); let chars: usize = messages.iter().map(|m| m.text().chars().count()).sum();
((chars + 3) / 4) as u32 ((chars + 3) / 4) as u32
} }

View File

@ -88,7 +88,7 @@ pub async fn chat(
let text = parsed.response.unwrap_or_default(); let text = parsed.response.unwrap_or_default();
let prompt_tokens = parsed.prompt_eval_count.unwrap_or_else(|| { let prompt_tokens = parsed.prompt_eval_count.unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.content.chars().count()).sum(); let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
((chars + 3) / 4) as u32 ((chars + 3) / 4) as u32
}); });
let completion_tokens = parsed.eval_count.unwrap_or_else(|| { let completion_tokens = parsed.eval_count.unwrap_or_else(|| {
@ -112,7 +112,7 @@ pub async fn chat(
model: parsed.model.unwrap_or_else(|| req.model.clone()), model: parsed.model.unwrap_or_else(|| req.model.clone()),
choices: vec![Choice { choices: vec![Choice {
index: 0, index: 0,
message: Message { role: "assistant".into(), content: text }, message: Message::new_text("assistant", text),
finish_reason: "stop".into(), finish_reason: "stop".into(),
}], }],
usage: UsageBlock { usage: UsageBlock {

View File

@ -0,0 +1,228 @@
//! OpenCode GO adapter — multi-vendor curated gateway via opencode.ai/zen/go.
//!
//! One sk-* key reaches Claude Opus 4.7, GPT-5.5-pro, Gemini 3.1-pro,
//! Kimi K2.6, DeepSeek, GLM, Qwen, plus 4 free-tier models.
//! OpenAI-compatible Chat Completions; auth via Bearer.
//!
//! Why a separate adapter (vs reusing openrouter.rs):
//! - Different account, different key, different base_url
//! - No HTTP-Referer / X-Title headers (those are OpenRouter-specific)
//! - Future-proof for any opencode-only request shaping
//!
//! Key sourcing priority:
//! 1. Env var `OPENCODE_API_KEY` (loaded from /etc/lakehouse/opencode.env
//! via systemd EnvironmentFile=)
//! 2. /etc/lakehouse/opencode.env directly (rescue path if env missing)
//!
//! Resolved once at gateway startup, stored on `V1State.opencode_key`.
//! Model-prefix routing: "opencode/<model>" auto-routes here, prefix
//! stripped before upstream call.
use std::time::Duration;
use serde::{Deserialize, Serialize};
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
// /zen/v1 is the unified OpenCode endpoint that covers BOTH the
// Zen pay-per-token tier (Claude/GPT/Gemini frontier) AND the Go
// subscription tier (Kimi/GLM/DeepSeek/Qwen/Minimax/mimo). When the
// caller has both, opencode bills per-model: Zen models charge Zen
// balance, Go models charge against the Go subscription cap.
//
// /zen/go/v1 exists as a Go-only sub-path (rejects Zen models with
// "Model not supported"); we use the unified /zen/v1 since the same
// key works for both with correct billing routing upstream.
const OPENCODE_BASE_URL: &str = "https://opencode.ai/zen/v1";
// 600s default — opencode upstream models include reasoning-heavy
// variants (Claude Opus, Kimi K2.6, GLM-5.1) that legitimately take
// 3-5 min on big audit prompts. Override via OPENCODE_TIMEOUT_SECS.
const OPENCODE_TIMEOUT_SECS_DEFAULT: u64 = 600;
fn opencode_timeout_secs() -> u64 {
std::env::var("OPENCODE_TIMEOUT_SECS")
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
.filter(|&n| n > 0)
.unwrap_or(OPENCODE_TIMEOUT_SECS_DEFAULT)
}
pub fn resolve_opencode_key() -> Option<String> {
if let Ok(k) = std::env::var("OPENCODE_API_KEY") {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
if let Ok(raw) = std::fs::read_to_string("/etc/lakehouse/opencode.env") {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("OPENCODE_API_KEY=") {
let k = rest.trim().trim_matches('"').trim_matches('\'');
if !k.is_empty() { return Some(k.to_string()); }
}
}
}
None
}
pub async fn chat(
key: &str,
req: &ChatRequest,
) -> Result<ChatResponse, String> {
// Strip the "opencode/" namespace prefix so the upstream sees the
// bare model id (e.g. "claude-opus-4-7", "kimi-k2.6").
let model = req.model.strip_prefix("opencode/").unwrap_or(&req.model).to_string();
// Anthropic models on opencode reject `temperature` with a 400
// "temperature is deprecated for this model" error. Strip the
// field for claude-* and the new gpt-5.x reasoning lineages
// (Anthropic/OpenAI's reasoning models all moved away from temp).
// Other models keep the caller's value or default to 0.3.
let drop_temp = model.starts_with("claude-")
|| model.starts_with("gpt-5")
|| model.starts_with("o1")
|| model.starts_with("o3")
|| model.starts_with("o4");
let body = OCChatBody {
model: model.clone(),
messages: req.messages.iter().map(|m| OCMessage {
role: m.role.clone(),
content: m.content.clone(),
}).collect(),
// filter(|&n| n > 0) catches Some(0) — same trap that bit the
// Kimi adapter when callers passed empty-env-parsed-to-0.
max_tokens: req.max_tokens.filter(|&n| n > 0).unwrap_or(800),
temperature: if drop_temp { None } else { Some(req.temperature.unwrap_or(0.3)) },
stream: false,
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(opencode_timeout_secs()))
.build()
.map_err(|e| format!("build client: {e}"))?;
let t0 = std::time::Instant::now();
let resp = client
.post(format!("{}/chat/completions", OPENCODE_BASE_URL))
.bearer_auth(key)
.json(&body)
.send()
.await
.map_err(|e| format!("opencode.ai unreachable: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_else(|_| "?".into());
return Err(format!("opencode.ai {}: {}", status, body));
}
let parsed: OCChatResponse = resp.json().await
.map_err(|e| format!("invalid opencode response: {e}"))?;
let latency_ms = t0.elapsed().as_millis();
let choice = parsed.choices.into_iter().next()
.ok_or_else(|| "opencode returned no choices".to_string())?;
let text = choice.message.content;
let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
((chars + 3) / 4) as u32
});
let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
((text.chars().count() + 3) / 4) as u32
});
tracing::info!(
target: "v1.chat",
provider = "opencode",
model = %model,
prompt_tokens,
completion_tokens,
latency_ms = latency_ms as u64,
"opencode chat completed",
);
Ok(ChatResponse {
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
object: "chat.completion",
created: chrono::Utc::now().timestamp(),
model,
choices: vec![Choice {
index: 0,
message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
}],
usage: UsageBlock {
prompt_tokens,
completion_tokens,
total_tokens: prompt_tokens + completion_tokens,
},
})
}
// -- OpenCode wire shapes (OpenAI-compatible) --
#[derive(Serialize)]
struct OCChatBody {
model: String,
messages: Vec<OCMessage>,
max_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
temperature: Option<f64>,
stream: bool,
}
#[derive(Serialize)]
struct OCMessage { role: String, content: serde_json::Value }
#[derive(Deserialize)]
struct OCChatResponse {
choices: Vec<OCChoice>,
#[serde(default)]
usage: Option<OCUsage>,
}
#[derive(Deserialize)]
struct OCChoice {
message: OCMessageResp,
#[serde(default)]
finish_reason: Option<String>,
}
#[derive(Deserialize)]
struct OCMessageResp { content: String }
#[derive(Deserialize)]
struct OCUsage { prompt_tokens: u32, completion_tokens: u32 }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_opencode_key_does_not_panic() {
let _ = resolve_opencode_key();
}
#[test]
fn model_prefix_strip() {
let cases = [
("opencode/claude-opus-4-7", "claude-opus-4-7"),
("opencode/kimi-k2.6", "kimi-k2.6"),
("claude-opus-4-7", "claude-opus-4-7"),
];
for (input, expected) in cases {
let out = input.strip_prefix("opencode/").unwrap_or(input);
assert_eq!(out, expected);
}
}
#[test]
fn max_tokens_filters_zero() {
// The trap: empty env -> Number("") -> 0 -> Some(0). Adapter
// must not pass 0 upstream; should fall to 800.
let some_zero: Option<u32> = Some(0);
let result = some_zero.filter(|&n| n > 0).unwrap_or(800);
assert_eq!(result, 800);
let some_real: Option<u32> = Some(4096);
assert_eq!(some_real.filter(|&n| n > 0).unwrap_or(800), 4096);
let none_val: Option<u32> = None;
assert_eq!(none_val.filter(|&n| n > 0).unwrap_or(800), 800);
}
}

View File

@ -0,0 +1,220 @@
//! OpenRouter adapter — free-tier rescue rung for /v1/chat.
//!
//! Direct HTTPS call to `https://openrouter.ai/api/v1/chat/completions`
//! with Bearer auth. Mirrors the OpenAI-compatible shape so the model
//! list can be expanded without code changes. Added 2026-04-24 after
//! iter 5 hit repeated Ollama Cloud 502s on kimi-k2:1t — OpenRouter
//! free-tier models give us a different provider backbone as fallback.
//!
//! Key sourcing priority:
//! 1. Env var `OPENROUTER_API_KEY`
//! 2. `/home/profit/.env` (LLM Team convention)
//! 3. `/root/llm_team_config.json` → providers.openrouter.api_key
//!
//! First hit wins. Key is resolved once at gateway startup and stored
//! on `V1State.openrouter_key`.
use std::time::Duration;
use serde::{Deserialize, Serialize};
use super::{ChatRequest, ChatResponse, Choice, Message, UsageBlock};
const OR_BASE_URL: &str = "https://openrouter.ai/api/v1";
const OR_TIMEOUT_SECS: u64 = 180;
pub fn resolve_openrouter_key() -> Option<String> {
if let Ok(k) = std::env::var("OPENROUTER_API_KEY") {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
// LLM Team UI writes its key to ~/.env on the host user — pick it up
// from the same source so the free-tier rescue path works without
// an explicit systemd Environment= line.
for path in ["/home/profit/.env", "/root/.env"] {
if let Ok(raw) = std::fs::read_to_string(path) {
for line in raw.lines() {
if let Some(rest) = line.strip_prefix("OPENROUTER_API_KEY=") {
let k = rest.trim().trim_matches('"').trim_matches('\'');
if !k.is_empty() { return Some(k.to_string()); }
}
}
}
}
if let Ok(raw) = std::fs::read_to_string("/root/llm_team_config.json") {
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&raw) {
if let Some(k) = v.pointer("/providers/openrouter/api_key").and_then(|x| x.as_str()) {
if !k.trim().is_empty() { return Some(k.trim().to_string()); }
}
}
}
None
}
pub async fn chat(
key: &str,
req: &ChatRequest,
) -> Result<ChatResponse, String> {
// Strip the "openrouter/" prefix if the caller used the namespaced
// form so OpenRouter sees the raw model id (e.g. "openai/gpt-oss-120b:free").
let model = req.model.strip_prefix("openrouter/").unwrap_or(&req.model).to_string();
let body = ORChatBody {
model: model.clone(),
// Pass content through verbatim — preserves OpenAI's multimodal
// content-parts shape (`[{type:"text",text:"..."}, ...]`) so the
// upstream provider sees exactly what the client sent.
messages: req.messages.iter().map(|m| ORMessage {
role: m.role.clone(),
content: m.content.clone(),
}).collect(),
max_tokens: req.max_tokens.unwrap_or(800),
temperature: req.temperature.unwrap_or(0.3),
stream: false,
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(OR_TIMEOUT_SECS))
.build()
.map_err(|e| format!("build client: {e}"))?;
let t0 = std::time::Instant::now();
let resp = client
.post(format!("{}/chat/completions", OR_BASE_URL))
.bearer_auth(key)
// OpenRouter recommends Referer + Title for attribution; absent
// headers do not fail the call but help us see our traffic in
// their dashboard.
.header("HTTP-Referer", "https://vcp.devop.live")
.header("X-Title", "Lakehouse Scrum")
.json(&body)
.send()
.await
.map_err(|e| format!("openrouter.ai unreachable: {e}"))?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_else(|_| "?".into());
return Err(format!("openrouter.ai {}: {}", status, body));
}
let parsed: ORChatResponse = resp.json().await
.map_err(|e| format!("invalid openrouter response: {e}"))?;
let latency_ms = t0.elapsed().as_millis();
let choice = parsed.choices.into_iter().next()
.ok_or_else(|| "openrouter returned no choices".to_string())?;
let text = choice.message.content;
let prompt_tokens = parsed.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or_else(|| {
let chars: usize = req.messages.iter().map(|m| m.text().chars().count()).sum();
((chars + 3) / 4) as u32
});
let completion_tokens = parsed.usage.as_ref().map(|u| u.completion_tokens).unwrap_or_else(|| {
((text.chars().count() + 3) / 4) as u32
});
tracing::info!(
target: "v1.chat",
provider = "openrouter",
model = %model,
prompt_tokens,
completion_tokens,
latency_ms = latency_ms as u64,
"openrouter chat completed",
);
Ok(ChatResponse {
id: format!("chatcmpl-{}", chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0)),
object: "chat.completion",
created: chrono::Utc::now().timestamp(),
model,
choices: vec![Choice {
index: 0,
message: Message { role: "assistant".into(), content: serde_json::Value::String(text) },
finish_reason: choice.finish_reason.unwrap_or_else(|| "stop".into()),
}],
usage: UsageBlock {
prompt_tokens,
completion_tokens,
total_tokens: prompt_tokens + completion_tokens,
},
})
}
// -- OpenRouter wire shapes (OpenAI-compatible) --
#[derive(Serialize)]
struct ORChatBody {
model: String,
messages: Vec<ORMessage>,
max_tokens: u32,
temperature: f64,
stream: bool,
}
#[derive(Serialize)]
struct ORMessage { role: String, content: serde_json::Value }
#[derive(Deserialize)]
struct ORChatResponse {
choices: Vec<ORChoice>,
#[serde(default)]
usage: Option<ORUsage>,
}
#[derive(Deserialize)]
struct ORChoice {
message: ORMessageResp,
#[serde(default)]
finish_reason: Option<String>,
}
#[derive(Deserialize)]
struct ORMessageResp { content: String }
#[derive(Deserialize)]
struct ORUsage { prompt_tokens: u32, completion_tokens: u32 }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_openrouter_key_does_not_panic() {
// Smoke test — all three sources may or may not be set depending
// on environment; just confirm the call returns cleanly.
let _ = resolve_openrouter_key();
}
#[test]
fn chat_body_serializes_to_openai_shape() {
let body = ORChatBody {
model: "openai/gpt-oss-120b:free".into(),
messages: vec![
ORMessage { role: "user".into(), content: "review this".into() },
],
max_tokens: 800,
temperature: 0.3,
stream: false,
};
let json = serde_json::to_string(&body).unwrap();
assert!(json.contains("\"model\":\"openai/gpt-oss-120b:free\""));
assert!(json.contains("\"messages\""));
assert!(json.contains("\"max_tokens\":800"));
assert!(json.contains("\"stream\":false"));
}
#[test]
fn model_prefix_strip_preserves_unprefixed() {
// If caller passes "openrouter/openai/gpt-oss-120b:free" we strip.
// If caller passes "openai/gpt-oss-120b:free" unchanged, we keep.
let cases = [
("openrouter/openai/gpt-oss-120b:free", "openai/gpt-oss-120b:free"),
("openai/gpt-oss-120b:free", "openai/gpt-oss-120b:free"),
("google/gemma-3-27b-it:free", "google/gemma-3-27b-it:free"),
];
for (input, expected) in cases {
let out = input.strip_prefix("openrouter/").unwrap_or(input);
assert_eq!(out, expected, "{input} should become {expected}");
}
}
}

View File

@ -0,0 +1,150 @@
//! `/v1/respond` — the **execution** API (distinct from `/v1/chat`, the
//! completion API).
//!
//! This is the consolidation move called out in the 2026-04-23 session:
//! lift the proven pipeline from `tests/multi-agent/orchestrator.ts`
//! (executor → reviewer → escalate → validate → seal playbook →
//! write-through to KB) into the gateway, so the production path has
//! the intelligence the tests already proved.
//!
//! `/v1/chat` stays a naive completion proxy for callers that want one.
//! `/v1/respond` is where the loop lives. Every orchestrator-style
//! caller migrates here and the TS harnesses become thin clients.
//!
//! This file holds the HTTP surface + request/response shapes. The loop
//! itself lives in `execution_loop::ExecutionLoop`.
use axum::{extract::State, http::StatusCode, Json};
use serde::{Deserialize, Serialize};
use super::V1State;
use crate::execution_loop::{ExecutionLoop, LogEntry, RespondOutcome};
/// A structured task — mirrors `TaskSpec` in `tests/multi-agent/agent.ts`.
/// Kept deliberately open so non-staffing task classes (code-gen,
/// DevOps-long-horizon) can land without a schema fight.
#[derive(Deserialize, Debug, Clone)]
pub struct RespondRequest {
/// Task class — routes to the right truth rules + validator. For the
/// staffing substrate: `staffing.fill`, `staffing.rescue`,
/// `staffing.sms_draft`. Truth-layer lookup is a no-op until a rule
/// set is registered for the class.
pub task_class: String,
/// Human-readable operation description — becomes the playbook
/// `operation` field on seal, and the primary signal for
/// playbook_memory embedding.
pub operation: String,
/// Free-form structured context. Passed to the executor prompt and
/// to the playbook seeder. Staffing tasks expect
/// `{target_role, target_count, target_city, target_state, approach_hint}`
/// but nothing here validates that — the validator crate will (Phase 43).
#[serde(default)]
pub spec: serde_json::Value,
/// Executor model. Defaults to the hot-path local model if omitted.
/// See orchestrator.ts:28 (`EXECUTOR_MODEL = "qwen3.5:latest"`).
#[serde(default)]
pub executor_model: Option<String>,
/// Reviewer model. Defaults to the hot-path local reviewer.
/// See orchestrator.ts:29 (`REVIEWER_MODEL = "qwen3:latest"`).
#[serde(default)]
pub reviewer_model: Option<String>,
/// Hard cap on executor turns. Default matches orchestrator.ts:30
/// (`MAX_TURNS = 12`). Cloud escalation counts as a turn.
#[serde(default)]
pub max_turns: Option<u32>,
}
#[derive(Serialize)]
pub struct RespondResponse {
/// `ok` = consensus reached, playbook sealed. `failed` = loop ran
/// out of turns or hit the drift cap. `blocked` = truth-layer
/// veto (Phase 42 rule citation in `error`).
pub status: &'static str,
/// The final artifact — for staffing fills, `{fills: [{candidate_id, name}]}`.
/// Empty on failure / block.
pub artifact: serde_json::Value,
/// Structured cross-turn log. Same shape as orchestrator.ts LogEntry
/// so existing tooling (kb extractors, fact_extractor.ts) reads it
/// without change.
pub log: Vec<LogEntry>,
/// Iteration count actually used. ≤ max_turns. Stamped on
/// outcomes.jsonl per the indicator audit (2026-04-23).
pub iterations: u32,
/// Error message on non-ok status. Truth-rule citations land here
/// when `status == "blocked"`.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
}
pub async fn respond(
State(state): State<V1State>,
Json(req): Json<RespondRequest>,
) -> Result<Json<RespondResponse>, (StatusCode, String)> {
if req.operation.is_empty() {
return Err((StatusCode::BAD_REQUEST, "operation must be non-empty".into()));
}
if req.task_class.is_empty() {
return Err((StatusCode::BAD_REQUEST, "task_class must be non-empty".into()));
}
let mut loop_runner = ExecutionLoop::new(state, req);
let outcome = loop_runner.run().await.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, format!("execution loop: {e}"))
})?;
let (status, error) = match &outcome {
RespondOutcome::Ok { .. } => ("ok", None),
RespondOutcome::Failed { reason, .. } => ("failed", Some(reason.clone())),
RespondOutcome::Blocked { reason, .. } => ("blocked", Some(reason.clone())),
};
Ok(Json(RespondResponse {
status,
artifact: outcome.artifact(),
log: outcome.into_log(),
iterations: loop_runner.turns_used(),
error,
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn respond_request_parses_minimal() {
let raw = r#"{
"task_class": "staffing.fill",
"operation": "fill: Welder x2 in Toledo, OH"
}"#;
let r: RespondRequest = serde_json::from_str(raw).unwrap();
assert_eq!(r.task_class, "staffing.fill");
assert_eq!(r.executor_model, None);
assert_eq!(r.max_turns, None);
}
#[test]
fn respond_request_parses_full() {
let raw = r#"{
"task_class": "staffing.fill",
"operation": "fill: Welder x2 in Toledo, OH",
"spec": {"target_role": "Welder", "target_count": 2, "target_city": "Toledo", "target_state": "OH"},
"executor_model": "qwen3.5:latest",
"reviewer_model": "qwen3:latest",
"max_turns": 12
}"#;
let r: RespondRequest = serde_json::from_str(raw).unwrap();
assert_eq!(r.executor_model.as_deref(), Some("qwen3.5:latest"));
assert_eq!(r.max_turns, Some(12));
assert_eq!(r.spec["target_count"], 2);
}
}

View File

@ -0,0 +1,47 @@
use serde::Serialize;
use truth::default_truth_store;
// Note: truth_router() was a stub wrapper around a single /context route
// that nothing called — v1/mod.rs wires get(truth::context) directly
// onto its own router. Removed 2026-04-24 along with its #[allow(dead_code)]
// attribute; the handler below is the real surface.
#[derive(Serialize)]
pub struct ContextResponse {
pub task_classes: Vec<String>,
pub rules: Vec<RuleInfo>,
}
#[derive(Serialize)]
pub struct RuleInfo {
pub id: String,
pub task_class: String,
pub description: String,
}
pub async fn context() -> axum::Json<ContextResponse> {
let store = default_truth_store();
let task_classes: Vec<String> = vec![
"staffing.fill".to_string(),
"staffing.rescue".to_string(),
"staffing.sms_draft".to_string(),
"staffing.any".to_string(),
];
let mut rules = Vec::new();
for tc in &task_classes {
for rule in store.get_rules(tc) {
rules.push(RuleInfo {
id: rule.id.clone(),
task_class: rule.task_class.clone(),
description: rule.description.clone(),
});
}
}
axum::Json(ContextResponse {
task_classes,
rules,
})
}

View File

@ -0,0 +1,82 @@
//! /v1/validate — gateway-side artifact validation endpoint.
//!
//! Phase 43 v3 part 2: makes the validator crate network-callable.
//! Any caller (scrum loop, test harness, future agent) can POST a
//! generated artifact and get back a Report (success) or
//! ValidationError (failure with structured field/reason).
//!
//! Request shape:
//! POST /v1/validate
//! {
//! "kind": "fill" | "email" | "playbook",
//! "artifact": { ... },
//! "context": { ... } // optional — folded into artifact._context
//! }
//!
//! Response on success: 200 + Report JSON
//! Response on failure: 422 + ValidationError JSON
//! Response on bad request: 400 + plain-text error
//!
//! The shared WorkerLookup is loaded once at gateway startup from
//! workers_500k.parquet (path configurable via LH_WORKERS_PARQUET
//! env, defaults to data/datasets/workers_500k.parquet). Falls back
//! to an empty InMemoryWorkerLookup if the file is missing — the
//! validators will still run schema/length/PII checks but worker-
//! existence checks will all fail (Consistency error), which is the
//! correct behavior when the roster isn't configured.
use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
use serde::Deserialize;
use validator::{
Artifact, Validator, ValidationError,
staffing::{
fill::FillValidator,
email::EmailValidator,
playbook::PlaybookValidator,
},
};
#[derive(Deserialize)]
pub struct ValidateRequest {
/// `"fill" | "email" | "playbook"` — picks which validator runs.
pub kind: String,
/// The artifact JSON (free-form; shape depends on `kind`).
pub artifact: serde_json::Value,
/// Optional context bag — merged into `artifact._context` so the
/// validator can read fields like `target_count`, `city`,
/// `client_id`, `candidate_id` without callers having to embed
/// `_context` in the artifact themselves.
#[serde(default)]
pub context: Option<serde_json::Value>,
}
pub async fn validate(
State(state): State<super::V1State>,
Json(req): Json<ValidateRequest>,
) -> impl IntoResponse {
// Merge context into artifact under `_context` so validators can
// pull contract metadata uniformly.
let mut artifact_value = req.artifact;
if let Some(ctx) = req.context {
if let Some(obj) = artifact_value.as_object_mut() {
obj.insert("_context".to_string(), ctx);
}
}
// Dispatch.
let workers = state.validate_workers.clone();
let result: Result<validator::Report, ValidationError> = match req.kind.as_str() {
"fill" => FillValidator::new(workers).validate(&Artifact::FillProposal(artifact_value)),
"email" => EmailValidator::new(workers).validate(&Artifact::EmailDraft(artifact_value)),
"playbook" => PlaybookValidator.validate(&Artifact::Playbook(artifact_value)),
other => return (
StatusCode::BAD_REQUEST,
format!("unknown kind '{other}' — expected fill | email | playbook"),
).into_response(),
};
match result {
Ok(report) => (StatusCode::OK, Json(report)).into_response(),
Err(e) => (StatusCode::UNPROCESSABLE_ENTITY, Json(e)).into_response(),
}
}

View File

@ -8,6 +8,7 @@ shared = { path = "../shared" }
storaged = { path = "../storaged" } storaged = { path = "../storaged" }
catalogd = { path = "../catalogd" } catalogd = { path = "../catalogd" }
vectord = { path = "../vectord" } vectord = { path = "../vectord" }
journald = { path = "../journald" }
tokio = { workspace = true } tokio = { workspace = true }
axum = { workspace = true, features = ["multipart"] } axum = { workspace = true, features = ["multipart"] }
lopdf = { workspace = true } lopdf = { workspace = true }

View File

@ -2,10 +2,9 @@
/// When a source changes format (columns renamed, added, removed, type changed), /// When a source changes format (columns renamed, added, removed, type changed),
/// the system detects the diff and can auto-map using AI or heuristic matching. /// the system detects the diff and can auto-map using AI or heuristic matching.
use arrow::datatypes::{DataType, Schema, SchemaRef}; use arrow::datatypes::{DataType, SchemaRef};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc;
/// A detected change between two schema versions. /// A detected change between two schema versions.
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@ -223,7 +222,8 @@ fn find_similar_column<'a>(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use arrow::datatypes::Field; use arrow::datatypes::{Field, Schema};
use std::sync::Arc;
fn schema(fields: Vec<(&str, DataType)>) -> SchemaRef { fn schema(fields: Vec<(&str, DataType)>) -> SchemaRef {
Arc::new(Schema::new( Arc::new(Schema::new(

View File

@ -3,7 +3,7 @@ use axum::{
extract::{Multipart, Path, Query, State}, extract::{Multipart, Path, Query, State},
http::{HeaderMap, StatusCode}, http::{HeaderMap, StatusCode},
response::IntoResponse, response::IntoResponse,
routing::{delete, get, patch, post}, routing::{get, post},
}; };
use bytes::Bytes; use bytes::Bytes;
use object_store::ObjectStore; use object_store::ObjectStore;
@ -33,6 +33,11 @@ pub struct IngestState {
/// Scheduled-ingest registry. The scheduler task runs against this /// Scheduled-ingest registry. The scheduler task runs against this
/// store; HTTP CRUD endpoints write through it. /// store; HTTP CRUD endpoints write through it.
pub schedules: schedule::ScheduleStore, pub schedules: schedule::ScheduleStore,
/// Event journal for ADR-012 mutation history. Optional for back-compat
/// with callers (like scheduled ingest tests) that don't wire it yet.
/// When present, successful ingests emit a record_ingest event — closes
/// P9-001 on the file-upload path. (2026-04-23)
pub journal: Option<journald::journal::Journal>,
} }
/// Push `DatasetAppended` triggers for every HNSW index bound to this /// Push `DatasetAppended` triggers for every HNSW index bound to this
@ -136,6 +141,22 @@ async fn ingest_file(
Ok(result) => { Ok(result) => {
if !result.deduplicated { if !result.deduplicated {
notify_agent_on_append(&state, &result.dataset_name).await; notify_agent_on_append(&state, &result.dataset_name).await;
// P9-001 fix (2026-04-23): emit a mutation event on every
// non-deduplicated ingest. Dedup no-ops don't need events
// (ADR-020 register() is already idempotent on same fingerprint).
if let Some(ref journal) = state.journal {
if let Err(e) = journal.record_ingest(
&result.dataset_name,
result.rows as usize,
"ingest_api",
&filename,
).await {
tracing::warn!(
"journal record_ingest failed for '{}': {}",
result.dataset_name, e,
);
}
}
} }
if result.deduplicated { if result.deduplicated {
Ok((StatusCode::OK, Json(result))) Ok((StatusCode::OK, Json(result)))
@ -630,3 +651,108 @@ async fn run_schedule_now(
} }
Ok(Json(outcome)) Ok(Json(outcome))
} }
// ─── Tests ───
#[cfg(test)]
mod journal_integration_tests {
//! P9-001 integration test: prove that a successful ingest produces a
//! journal.record_ingest event. Block 2 on PR #10 was "journal event
//! verified live" being unbacked by the diff. This test makes the
//! verification committed and reproducible.
use journald::journal::{Event, Journal};
use object_store::memory::InMemory;
use std::sync::Arc;
// Helper: build a bare Journal against an in-memory object store.
// Flush threshold 1 so every recorded event is persisted immediately.
fn test_journal() -> Journal {
let store: Arc<dyn object_store::ObjectStore> = Arc::new(InMemory::new());
Journal::new(store, 1)
}
#[tokio::test]
async fn journal_record_ingest_increments_counter() {
// Arrange — fresh journal, counter starts at zero.
let journal = test_journal();
let stats0 = journal.stats().await;
assert_eq!(stats0.total_events_created, 0);
assert_eq!(stats0.buffer_events, 0);
// Act — simulate what the /ingest/file success path does.
journal
.record_ingest("test_dataset", 42, "ingest_api", "probe.csv")
.await
.expect("record_ingest should succeed");
// Assert — counter advanced, event exists. With threshold=1 the
// event flushed to store; with threshold>N it would be in-buffer.
let stats1 = journal.stats().await;
assert_eq!(stats1.total_events_created, 1, "counter should reflect one recorded event");
// Assert — the event is retrievable by entity.
let history = journal
.get_entity_history("batch:42")
.await
.expect("history lookup");
assert_eq!(history.len(), 1, "one event should be visible in history");
let ev = &history[0];
assert_eq!(ev.action, "ingest");
assert_eq!(ev.entity_type, "test_dataset");
assert_eq!(ev.actor, "ingest_api");
assert!(
ev.new_value.contains("probe.csv"),
"new_value should carry source filename, got: {}",
ev.new_value
);
}
#[tokio::test]
async fn optional_journal_field_none_is_valid_back_compat() {
// IngestState.journal is Option<Journal>. Back-compat path: when
// the field is None, the ingest handler MUST still succeed — the
// journal call is fire-and-forget, never load-bearing.
//
// This test asserts the type shape: Option<Journal> is what we
// expect. If a refactor makes it mandatory, this test forces an
// explicit re-consideration.
let none_journal: Option<Journal> = None;
assert!(none_journal.is_none());
let some_journal: Option<Journal> = Some(test_journal());
assert!(some_journal.is_some());
}
#[tokio::test]
async fn journal_record_event_fields_match_adr_012_schema() {
// ADR-012 locks the event schema: entity_type, entity_id, field,
// action, old_value, new_value, actor, source, workspace_id plus
// the auto-assigned event_id + timestamp. This test pins the
// field names so a future refactor can't silently drop one.
let journal = test_journal();
let base = Event {
event_id: String::new(),
timestamp: chrono::Utc::now(),
entity_type: "candidate".into(),
entity_id: "CAND-0001".into(),
field: "phone".into(),
action: "update".into(),
old_value: "555-0000".into(),
new_value: "555-9999".into(),
actor: "recruiter".into(),
source: "api".into(),
workspace_id: "ws-x".into(),
};
journal.record(base).await.expect("record should accept full-schema event");
let h = journal
.get_entity_history("CAND-0001")
.await
.expect("lookup");
assert_eq!(h.len(), 1);
assert_eq!(h[0].field, "phone");
assert_eq!(h[0].old_value, "555-0000");
assert_eq!(h[0].new_value, "555-9999");
assert_eq!(h[0].workspace_id, "ws-x");
}
}

View File

@ -72,12 +72,14 @@ async fn process_inbox(
let path = entry.path(); let path = entry.path();
// Skip directories and hidden files // Skip directories and hidden files. Bind filename once via
if path.is_dir() || path.file_name().map_or(true, |n| n.to_string_lossy().starts_with('.')) { // let-else so the subsequent use is unwrap-free — previous
continue; // version relied on a map_or guard above + an .unwrap() here
} // being consistent, which is a fragile invariant.
if path.is_dir() { continue; }
let filename = path.file_name().unwrap().to_string_lossy().to_string(); let Some(fn_os) = path.file_name() else { continue; };
let filename = fn_os.to_string_lossy().to_string();
if filename.starts_with('.') { continue; }
tracing::info!("watcher: found new file '{}'", filename); tracing::info!("watcher: found new file '{}'", filename);
// Read file // Read file

View File

@ -5,7 +5,7 @@
/// Storage: events buffer in memory, flush to Parquet periodically. /// Storage: events buffer in memory, flush to Parquet periodically.
/// Query: load Parquet files, filter by entity/field/actor/time. /// Query: load Parquet files, filter by entity/field/actor/time.
use arrow::array::{ArrayRef, RecordBatch, StringArray, UInt64Array}; use arrow::array::{ArrayRef, RecordBatch, StringArray};
use arrow::datatypes::{DataType, Field, Schema}; use arrow::datatypes::{DataType, Field, Schema};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use object_store::ObjectStore; use object_store::ObjectStore;

View File

@ -7,6 +7,7 @@ edition = "2024"
shared = { path = "../shared" } shared = { path = "../shared" }
catalogd = { path = "../catalogd" } catalogd = { path = "../catalogd" }
storaged = { path = "../storaged" } storaged = { path = "../storaged" }
truth = { path = "../truth" }
tokio = { workspace = true } tokio = { workspace = true }
axum = { workspace = true } axum = { workspace = true }
serde = { workspace = true } serde = { workspace = true }

View File

@ -84,14 +84,17 @@ pub async fn compact(
// Load deltas // Load deltas
let delta_batches = load_deltas(store, dataset_name).await?; let delta_batches = load_deltas(store, dataset_name).await?;
let delta_count = delta_batches.len(); let delta_count = delta_batches.len();
// Row counts captured before extend; previously base_rows subtracted delta_count (files) from rows — unit mismatch.
let base_row_count: usize = base_batches.iter().map(|b| b.num_rows()).sum();
let delta_row_count: usize = delta_batches.iter().map(|b| b.num_rows()).sum();
let has_tombstones = !tombstones.is_empty(); let has_tombstones = !tombstones.is_empty();
let nothing_to_do = delta_batches.is_empty() && !has_tombstones; let nothing_to_do = delta_batches.is_empty() && !has_tombstones;
if nothing_to_do { if nothing_to_do {
return Ok(CompactResult { return Ok(CompactResult {
base_rows: base_batches.iter().map(|b| b.num_rows()).sum(), base_rows: base_row_count,
delta_rows: 0, delta_rows: 0,
final_rows: base_batches.iter().map(|b| b.num_rows()).sum(), final_rows: base_row_count,
deltas_merged: 0, deltas_merged: 0,
tombstones_applied: 0, tombstones_applied: 0,
rows_dropped_by_tombstones: 0, rows_dropped_by_tombstones: 0,
@ -99,7 +102,7 @@ pub async fn compact(
} }
base_batches.extend(delta_batches); base_batches.extend(delta_batches);
let pre_filter_rows: usize = base_batches.iter().map(|b| b.num_rows()).sum(); let pre_filter_rows: usize = base_row_count + delta_row_count;
// If primary key specified, deduplicate (keep last occurrence) // If primary key specified, deduplicate (keep last occurrence)
let merged_batches = if let Some(_pk) = primary_key_col { let merged_batches = if let Some(_pk) = primary_key_col {
@ -183,8 +186,8 @@ pub async fn compact(
); );
Ok(CompactResult { Ok(CompactResult {
base_rows: pre_filter_rows - delta_count, // rough base-before-deltas base_rows: base_row_count,
delta_rows: delta_count, delta_rows: delta_row_count,
final_rows, final_rows,
deltas_merged: delta_count, deltas_merged: delta_count,
tombstones_applied: tombstones.len(), tombstones_applied: tombstones.len(),

View File

@ -9,7 +9,9 @@ use axum::{
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::cache::CacheStats; use std::sync::Arc;
use truth::{RuleAction, TruthStore};
use crate::context::QueryEngine; use crate::context::QueryEngine;
use crate::delta; use crate::delta;
use crate::paged::ResultStore; use crate::paged::ResultStore;
@ -18,12 +20,26 @@ use crate::paged::ResultStore;
pub struct QueryState { pub struct QueryState {
pub engine: QueryEngine, pub engine: QueryEngine,
pub result_store: ResultStore, pub result_store: ResultStore,
// Policy gate for incoming SQL. Every /sql and /paged request is
// evaluated against this store before hitting DataFusion. Added for
// P42-002 ("raw SQL forwarded without schema or policy gate") after
// the scrum master's queryd/service.rs finding looped across iters
// 3-5 without ever being reachable by the 6-line auto-applier.
pub truth: Arc<TruthStore>,
} }
pub fn router(engine: QueryEngine) -> Router { pub fn router(engine: QueryEngine) -> Router {
router_with_truth(engine, Arc::new(truth::sql_query_guard_store()))
}
/// Test/integration hook: construct the router with a caller-supplied
/// TruthStore so tests can assert reject/pass behavior deterministically
/// without depending on the default needle list.
pub fn router_with_truth(engine: QueryEngine, truth: Arc<TruthStore>) -> Router {
let state = QueryState { let state = QueryState {
engine: engine.clone(), engine: engine.clone(),
result_store: ResultStore::new(100, 50), // 100 rows/page, keep 50 results result_store: ResultStore::new(100, 50), // 100 rows/page, keep 50 results
truth,
}; };
Router::new() Router::new()
.route("/health", get(health)) .route("/health", get(health))
@ -53,6 +69,11 @@ struct QueryResponse {
columns: Vec<ColumnInfo>, columns: Vec<ColumnInfo>,
rows: serde_json::Value, rows: serde_json::Value,
row_count: usize, row_count: usize,
// Elapsed wall time from handler entry to response. Required for
// audit-log parity — gateway's audit row previously stored null here.
// Scrum iter 9 finding, populated from std::time::Instant captured
// at the top of execute_query / paged_query.
latency_ms: u64,
} }
#[derive(Serialize)] #[derive(Serialize)]
@ -72,12 +93,41 @@ fn batches_to_json(batches: &[RecordBatch]) -> Result<serde_json::Value, String>
serde_json::from_slice(&buf).map_err(|e| format!("JSON parse error: {e}")) serde_json::from_slice(&buf).map_err(|e| format!("JSON parse error: {e}"))
} }
/// Evaluate the request SQL against the configured TruthStore. Returns
/// the Reject/Block message on the first failing mandatory rule so the
/// handler can short-circuit. Returns None when all rules pass (or when
/// the failures' declared action is non-mandatory like Redact/Pass).
fn sql_policy_check(truth: &TruthStore, sql: &str) -> Option<String> {
let ctx = serde_json::json!({ "sql": sql });
for outcome in truth.evaluate("sql_query", &ctx) {
if !outcome.passed {
// FieldEmpty / FieldContainsAny etc. are enforced only when
// condition HOLDS (i.e. passed=true). Below means "passed=false",
// so the rule condition did not hold — no enforcement.
continue;
}
match &outcome.action {
RuleAction::Reject { message } | RuleAction::Block { message } => {
return Some(message.clone());
}
_ => {}
}
}
None
}
async fn execute_query( async fn execute_query(
State(state): State<QueryState>, State(state): State<QueryState>,
Json(req): Json<QueryRequest>, Json(req): Json<QueryRequest>,
) -> impl IntoResponse { ) -> impl IntoResponse {
let started = std::time::Instant::now();
tracing::info!("executing query: {}", req.sql); tracing::info!("executing query: {}", req.sql);
if let Some(reason) = sql_policy_check(&state.truth, &req.sql) {
tracing::warn!("sql rejected by truth gate: {reason}");
return Err((StatusCode::FORBIDDEN, reason));
}
match state.engine.query(&req.sql).await { match state.engine.query(&req.sql).await {
Ok(batches) => { Ok(batches) => {
if batches.is_empty() { if batches.is_empty() {
@ -85,6 +135,7 @@ async fn execute_query(
columns: vec![], columns: vec![],
rows: serde_json::Value::Array(vec![]), rows: serde_json::Value::Array(vec![]),
row_count: 0, row_count: 0,
latency_ms: started.elapsed().as_millis() as u64,
})); }));
} }
@ -103,6 +154,7 @@ async fn execute_query(
columns, columns,
rows, rows,
row_count, row_count,
latency_ms: started.elapsed().as_millis() as u64,
})) }))
} }
Err(e) => Err((StatusCode::BAD_REQUEST, e)), Err(e) => Err((StatusCode::BAD_REQUEST, e)),
@ -116,6 +168,10 @@ async fn paged_query(
Json(req): Json<QueryRequest>, Json(req): Json<QueryRequest>,
) -> impl IntoResponse { ) -> impl IntoResponse {
tracing::info!("paged query: {}", req.sql); tracing::info!("paged query: {}", req.sql);
if let Some(reason) = sql_policy_check(&state.truth, &req.sql) {
tracing::warn!("paged sql rejected by truth gate: {reason}");
return Err((StatusCode::FORBIDDEN, reason));
}
match state.result_store.execute_and_store(&state.engine, &req.sql).await { match state.result_store.execute_and_store(&state.engine, &req.sql).await {
Ok(handle) => Ok(Json(handle)), Ok(handle) => Ok(Json(handle)),
Err(e) => Err((StatusCode::BAD_REQUEST, e)), Err(e) => Err((StatusCode::BAD_REQUEST, e)),
@ -212,3 +268,65 @@ async fn compact_dataset(
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)), Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
} }
} }
#[cfg(test)]
mod sql_policy_tests {
use super::*;
use truth::sql_query_guard_store;
// These tests exercise the policy gate without spinning up a DataFusion
// engine — they only need `TruthStore`. Purpose: prove the P42-002
// enforcement point actually rejects destructive SQL. This is the
// regression guard for the queryd/service.rs finding that looped
// across scrum iters 3-5.
#[test]
fn blocks_drop_table() {
let store = sql_query_guard_store();
let reason = sql_policy_check(&store, "DROP TABLE users").expect("must reject");
assert!(reason.contains("destructive"), "reason: {reason}");
}
#[test]
fn blocks_delete_from() {
let store = sql_query_guard_store();
assert!(sql_policy_check(&store, "delete from t where 1=1").is_some());
}
#[test]
fn blocks_truncate() {
let store = sql_query_guard_store();
assert!(sql_policy_check(&store, "TRUNCATE workers").is_some());
}
#[test]
fn blocks_empty_sql() {
let store = sql_query_guard_store();
assert!(sql_policy_check(&store, "").is_some());
}
#[test]
fn allows_benign_select() {
let store = sql_query_guard_store();
assert!(sql_policy_check(&store, "SELECT count(*) FROM workers").is_none());
}
#[test]
fn allows_select_with_deleted_word_in_column() {
// Substring match is narrow ("delete from", not "delete"), so a
// column named `deleted_at` doesn't trip the guard. Important
// check — false positives on benign queries would make the gate
// unusable in practice.
let store = sql_query_guard_store();
assert!(
sql_policy_check(&store, "SELECT deleted_at FROM t").is_none(),
"column names containing 'delete' must not be rejected"
);
}
#[test]
fn case_insensitive_match_catches_mixed_case() {
let store = sql_query_guard_store();
assert!(sql_policy_check(&store, "Drop Table X").is_some());
}
}

View File

@ -2,15 +2,12 @@
/// Each workspace tracks an agent's activity on a specific contract or search, /// Each workspace tracks an agent's activity on a specific contract or search,
/// with daily/weekly/monthly tiers and instant handoff capability. /// with daily/weekly/monthly tiers and instant handoff capability.
use arrow::array::{ArrayRef, RecordBatch, StringArray, Int64Array};
use arrow::datatypes::{DataType, Field, Schema};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::RwLock; use tokio::sync::RwLock;
use crate::delta;
use object_store::ObjectStore; use object_store::ObjectStore;
/// Retention tier for workspace data. /// Retention tier for workspace data.

View File

@ -4,3 +4,5 @@ pub mod arrow_helpers;
pub mod config; pub mod config;
pub mod pii; pub mod pii;
pub mod secrets; pub mod secrets;
pub mod model_matrix;
pub mod profiles;

View File

@ -0,0 +1,69 @@
//! Per-model token accounting. Entry point for the ModelMatrix work
//! the aibridge `context::estimate_tokens` deprecation has been pointing
//! at. Starts minimal — just `estimate_tokens` — so call sites can
//! migrate off the deprecated helper. Extend with per-model context
//! windows, max_tokens defaults, provider hints, etc. as we move the
//! rest of `aibridge::context::known_windows` over.
/// Namespace for per-model token + context accounting. Methods are
/// associated functions — no instance required — because the underlying
/// estimates are deterministic and stateless.
pub struct ModelMatrix;
impl ModelMatrix {
/// Rough token count — char count divided by 4, rounded up. This
/// is the same heuristic OpenAI's cookbook uses for English text;
/// it's within ±15% of BPE tokenizers for code + prose and doesn't
/// require a tokenizer lookup. Good enough for budget math where
/// the goal is "don't blow the context window" rather than exact
/// billing.
///
/// Moved from `aibridge::context::estimate_tokens` (still there with
/// a `#[deprecated]` pointer — callers should migrate here). Empty
/// string → 0; one char → 1 (ceiling of 1/4 = 1).
pub fn estimate_tokens(text: &str) -> usize {
(text.chars().count() + 3) / 4
}
}
#[cfg(test)]
mod tests {
use super::ModelMatrix;
#[test]
fn empty_string_is_zero_tokens() {
assert_eq!(ModelMatrix::estimate_tokens(""), 0);
}
#[test]
fn three_chars_is_one_token() {
// 3 → ceil(3/4) = 1. Matches the deprecated helper's behavior
// so the migration is a drop-in replacement.
assert_eq!(ModelMatrix::estimate_tokens("abc"), 1);
}
#[test]
fn four_chars_is_one_token() {
assert_eq!(ModelMatrix::estimate_tokens("abcd"), 1);
}
#[test]
fn five_chars_is_two_tokens() {
assert_eq!(ModelMatrix::estimate_tokens("abcde"), 2);
}
#[test]
fn counts_chars_not_bytes() {
// Multi-byte UTF-8 chars count as 1 char each — important for
// prompts with emoji or non-ASCII text. "héllo" is 5 chars
// (5 unicode scalars) → ceil(5/4) = 2 tokens, same as "hello".
assert_eq!(ModelMatrix::estimate_tokens("héllo"), 2);
assert_eq!(ModelMatrix::estimate_tokens("📚📚📚📚"), 1); // 4 chars
}
#[test]
fn large_text_scales_linearly() {
assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(400)), 100);
assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(401)), 101);
}
}

View File

@ -0,0 +1,14 @@
//! ExecutionProfile — the Phase 41 rename of Phase 17's ModelProfile.
//!
//! Carries what's needed to RUN inference: model tag, dataset bindings,
//! HNSW config, embed model, bucket binding. Today this is a type
//! alias over `crate::types::ModelProfile` — the PRD's
//! "Backward compat: ModelProfile still loads, aliased to
//! ExecutionProfile" line, honored literally.
//!
//! When the migration off the old name finishes, this file can either
//! absorb the full struct definition or continue as an alias. Callers
//! should reference `ExecutionProfile` going forward; `ModelProfile`
//! stays exported from `types` for on-disk schema compat.
pub use crate::types::ModelProfile as ExecutionProfile;

View File

@ -0,0 +1,38 @@
//! MemoryProfile — how the agent's execution memory is kept.
//!
//! Phase 41 decomposition: the Phase 19 playbook_memory + Phase 26
//! successful_playbooks + Phase 45 doc_refs all need per-profile
//! tuning. Rather than bolt those onto ExecutionProfile, they live
//! here so a "thin" execution profile can reuse a "fat" memory
//! profile and vice versa.
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MemoryProfile {
pub id: String,
#[serde(default)]
pub description: String,
/// Phase 19: ceiling for playbook_memory boost on retrieval. 0
/// disables the boost entirely.
#[serde(default = "default_boost_ceiling")]
pub playbook_boost_ceiling: f32,
/// Phase 26: max history entries retained before rotation.
#[serde(default = "default_history_cap")]
pub history_cap: usize,
/// Phase 45: stale threshold for doc_refs before drift check
/// fires (hours).
#[serde(default = "default_stale_hours")]
pub doc_stale_hours: u32,
/// Phase 28: auto-retire playbooks that fail 3+ consecutive runs.
#[serde(default = "default_true")]
pub auto_retire_on_failure: bool,
pub created_at: chrono::DateTime<chrono::Utc>,
#[serde(default)]
pub created_by: String,
}
fn default_boost_ceiling() -> f32 { 0.35 }
fn default_history_cap() -> usize { 1000 }
fn default_stale_hours() -> u32 { 168 } // one week
fn default_true() -> bool { true }

View File

@ -0,0 +1,28 @@
//! Phase 41 profile types.
//!
//! The existing `ModelProfile` (Phase 17) is aliased as
//! `ExecutionProfile` here — it continues to carry the model +
//! bindings + HNSW config needed to run inference. Three new profile
//! types land alongside: `RetrievalProfile`, `MemoryProfile`,
//! `ObserverProfile` — each owns a distinct slice of what used to be
//! bundled.
//!
//! Backward-compat rule (PRD Phase 41): existing `ModelProfile` on
//! disk continues to deserialize unchanged. New fields on the new
//! profile types are `#[serde(default)]` so old payloads load with
//! empty defaults.
//!
//! These are the canonical shapes — downstream code converts via
//! `From<ModelProfile> for ExecutionProfile` (they're the same struct
//! today, just named differently) and constructs the other three
//! as needed.
pub mod execution;
pub mod retrieval;
pub mod memory;
pub mod observer;
pub use execution::ExecutionProfile;
pub use memory::MemoryProfile;
pub use observer::ObserverProfile;
pub use retrieval::RetrievalProfile;

View File

@ -0,0 +1,38 @@
//! ObserverProfile — how loudly the observer logs this workload.
//!
//! Phase 41 decomposition: the observer's alert thresholds, escalation
//! cadence, and log retention need per-workload tuning. Hot-path
//! staffing workflows want aggressive alerting; batch backfills want
//! quieter. This profile is read by mcp-server/observer.ts at
//! activation-time.
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ObserverProfile {
pub id: String,
#[serde(default)]
pub description: String,
/// How many consecutive failures trigger a cluster escalation to
/// LLM Team `/v1/chat` (qwen3-coder:480b).
#[serde(default = "default_failure_cluster_size")]
pub failure_cluster_size: u32,
/// Minimum seconds between alert emails for the same sig_hash.
/// Prevents alert storms during a regression.
#[serde(default = "default_alert_cooldown")]
pub alert_cooldown_secs: u32,
/// Observer ring buffer size. Older events fall off when full.
#[serde(default = "default_ring_size")]
pub ring_size: usize,
/// Whether to forward events to external Langfuse
/// (/v1/langfuse_trace). Off by default.
#[serde(default)]
pub forward_to_langfuse: bool,
pub created_at: chrono::DateTime<chrono::Utc>,
#[serde(default)]
pub created_by: String,
}
fn default_failure_cluster_size() -> u32 { 3 }
fn default_alert_cooldown() -> u32 { 300 } // 5 minutes
fn default_ring_size() -> usize { 2000 }

View File

@ -0,0 +1,52 @@
//! RetrievalProfile — what + how the agent reaches into memory.
//!
//! Phase 41 decomposition: the old ModelProfile bundled "what dataset
//! can I read" (bound_datasets) AND "how do I rank results"
//! (hnsw_config) with the model tag. Retrieval concerns split out here
//! so a profile can swap its retrieval strategy without re-activating
//! the model.
//!
//! Fields chosen for what's actually varied per-workload today:
//! - `top_k` / `rerank_top_k` — how many hits to fetch + rerank
//! - `freshness_cutoff_days` — Phase 45 doc-drift uses this
//! - `boost_playbook_memory` — Phase 19 meta-index feedback
//! - `enforce_sensitivity_gates` — Phase 13 access-control integration
//!
//! All fields are `#[serde(default)]` so loading a profile file that
//! predates Phase 41 works without migration.
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct RetrievalProfile {
/// Unique id — slug form, separate namespace from ExecutionProfile.
pub id: String,
/// Free-text operator description.
#[serde(default)]
pub description: String,
/// Default top-K for /vectors/search + /vectors/hybrid.
#[serde(default = "default_top_k")]
pub top_k: u32,
/// How many of the top-K to pass through the reranker. 0 disables
/// reranking for this profile.
#[serde(default = "default_rerank_top_k")]
pub rerank_top_k: u32,
/// Don't consider playbooks / docs older than this (days). 0 or
/// absent = no freshness filter.
#[serde(default)]
pub freshness_cutoff_days: u32,
/// Phase 19: boost workers/results by playbook_memory similarity.
#[serde(default)]
pub boost_playbook_memory: bool,
/// Phase 13: apply access-control masking on sensitive columns.
/// Default on — safety-first.
#[serde(default = "default_true")]
pub enforce_sensitivity_gates: bool,
pub created_at: chrono::DateTime<chrono::Utc>,
#[serde(default)]
pub created_by: String,
}
fn default_top_k() -> u32 { 10 }
fn default_rerank_top_k() -> u32 { 5 }
fn default_true() -> bool { true }

11
crates/truth/Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "truth"
version = "0.1.0"
edition = "2024"
[dependencies]
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
toml = { workspace = true }

View File

@ -0,0 +1,49 @@
//! DevOps task-class rules — scaffold for the long-horizon phase.
//!
//! Phase 42 PRD: "Terraform/Ansible rule shapes are scaffolded but
//! unpopulated until the long-horizon phase. Keeps the dispatcher
//! signature stable so no refactor needed later."
//!
//! This module is intentionally minimal. It registers no rules yet.
//! The `devops_rules` function exists so callers can compose it onto
//! a store (e.g. `devops_rules(staffing_rules(TruthStore::new()))`)
//! without branching on whether the DevOps phase has landed.
//!
//! When the long-horizon phase fleshes out the DevOps rule set, the
//! implementations drop in here — same `RuleCondition` primitives, same
//! `TruthStore::evaluate` contract, zero upstream refactor.
use crate::TruthStore;
/// Register DevOps rules on the store. Currently a no-op scaffold —
/// no rules are added. Safe to compose with other rule-set functions.
///
/// Planned task classes (not yet populated):
/// - `devops.terraform_plan` — `terraform validate` + pre-plan
/// sanity checks (no destroys without confirm flag, etc.)
/// - `devops.ansible_playbook` — `ansible-lint` + privileged-task
/// gates (no `become: true` on untagged hosts)
/// - `devops.shell_command` — whitelist / blocklist for
/// AI-generated shell invocations (covers what Phase 42
/// queryd SQL gate does for SQL — same idea, shell surface)
pub fn devops_rules(store: TruthStore) -> TruthStore {
// Intentionally empty. See module-level doc for the phased rollout.
store
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn devops_rules_is_a_noop_for_now() {
// Scaffold guarantee: composing devops_rules onto an empty
// store must not add any rules. Future long-horizon work will
// populate this and the assertion shifts to counting the
// expected additions.
let store = devops_rules(TruthStore::new());
assert_eq!(store.get_rules("devops.terraform_plan").len(), 0);
assert_eq!(store.get_rules("devops.ansible_playbook").len(), 0);
assert_eq!(store.get_rules("devops.shell_command").len(), 0);
}
}

610
crates/truth/src/lib.rs Normal file
View File

@ -0,0 +1,610 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
pub mod staffing;
pub mod devops;
pub mod loader;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TruthRule {
pub id: String,
pub task_class: String,
pub description: String,
pub condition: RuleCondition,
pub action: RuleAction,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RuleCondition {
Always,
FieldEquals { field: String, value: String },
FieldMismatch { field: String, value: String },
FieldEmpty { field: String },
FieldGreater { field: String, threshold: i64 },
// Case-insensitive substring scan — true if the field value contains
// ANY of `needles`. Added for SQL/command guards where rules of the
// form "sql must not contain DROP/DELETE/TRUNCATE" need to express
// enforcement as a passing precondition being absent.
FieldContainsAny { field: String, needles: Vec<String> },
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RuleAction {
Pass,
Reject { message: String },
Redact { fields: Vec<String> },
Block { message: String },
}
#[derive(Default)]
pub struct TruthStore {
rules: HashMap<String, Vec<TruthRule>>,
}
impl TruthStore {
pub fn new() -> Self {
Self::default()
}
pub fn add_rule(&mut self, rule: TruthRule) {
self.rules
.entry(rule.task_class.clone())
.or_default()
.push(rule);
}
/// All rule IDs across every task class. Used by the file loader
/// to detect duplicate-ID collisions before registering new rules.
pub fn all_rule_ids(&self) -> std::collections::HashSet<String> {
self.rules
.values()
.flat_map(|v| v.iter().map(|r| r.id.clone()))
.collect()
}
pub fn get_rules(&self, task_class: &str) -> Vec<&TruthRule> {
self.rules
.get(task_class)
.map(|v| v.iter().collect())
.unwrap_or_default()
}
/// Legacy API: returns the list of actions registered for a task class
/// without evaluating conditions. Retained for backward compatibility
/// with callers that only want the action catalog. New callers should
/// prefer `evaluate()`, which actually walks `RuleCondition` against
/// a context and reports per-rule pass/fail.
pub fn check(&self, task_class: &str) -> Vec<RuleAction> {
let rules = self.get_rules(task_class);
rules
.into_iter()
.map(|r| r.action.clone())
.collect()
}
/// Evaluate every rule registered for `task_class` against `ctx`,
/// returning one `RuleOutcome` per rule. `passed = true` means the
/// rule's `condition` held; the rule's action is still attached so
/// callers can distinguish "passed and therefore no-op" (RuleAction::Pass)
/// from "passed and apply Redact". `passed = false` means the condition
/// failed — callers should treat the attached action as the enforcement
/// response (Reject/Block).
///
/// Fixed P42-001 (2026-04-23): previously `check()` returned all actions
/// unconditionally — the `RuleCondition` field was ignored. Now every
/// rule is actually walked against the provided context.
pub fn evaluate(&self, task_class: &str, ctx: &serde_json::Value) -> Vec<RuleOutcome> {
self.get_rules(task_class)
.into_iter()
.map(|r| RuleOutcome {
rule_id: r.id.clone(),
passed: evaluate_condition(&r.condition, ctx),
action: r.action.clone(),
})
.collect()
}
}
/// Result of evaluating one rule against a context. `passed` reports
/// whether the condition held; `action` is the rule's declared action
/// regardless (callers decide how to apply it based on `passed`).
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct RuleOutcome {
pub rule_id: String,
pub passed: bool,
pub action: RuleAction,
}
fn evaluate_condition(cond: &RuleCondition, ctx: &serde_json::Value) -> bool {
match cond {
RuleCondition::Always => true,
RuleCondition::FieldEquals { field, value } => {
field_as_string(ctx, field)
.map(|s| s == *value)
.unwrap_or(false)
}
RuleCondition::FieldMismatch { field, value } => {
field_as_string(ctx, field)
.map(|s| s != *value)
.unwrap_or(false)
}
RuleCondition::FieldEmpty { field } => {
match lookup(ctx, field) {
None => true,
Some(v) => v.is_null() || v.as_str().map(|s| s.is_empty()).unwrap_or(false),
}
}
RuleCondition::FieldGreater { field, threshold } => {
lookup(ctx, field)
.and_then(|v| v.as_i64().or_else(|| v.as_f64().map(|f| f as i64)))
.map(|n| n > *threshold)
.unwrap_or(false)
}
RuleCondition::FieldContainsAny { field, needles } => {
match field_as_string(ctx, field) {
None => false,
Some(s) => {
let haystack = s.to_ascii_lowercase();
needles.iter().any(|n| haystack.contains(&n.to_ascii_lowercase()))
}
}
}
}
}
/// Walk a dot-separated path through a serde_json::Value. `"worker.status"`
/// → `ctx["worker"]["status"]`. Returns None if any segment is missing or
/// a non-object is encountered mid-path.
fn lookup<'a>(ctx: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
let mut cur = ctx;
for seg in path.split('.') {
cur = cur.get(seg)?;
}
Some(cur)
}
fn field_as_string(ctx: &serde_json::Value, path: &str) -> Option<String> {
lookup(ctx, path).and_then(|v| match v {
serde_json::Value::String(s) => Some(s.clone()),
serde_json::Value::Bool(b) => Some(b.to_string()),
serde_json::Value::Number(n) => Some(n.to_string()),
_ => None,
})
}
/// Minimal SQL guard — rejects destructive verbs (DROP/TRUNCATE/DELETE).
/// queryd/src/service.rs loads this into its `QueryState` and evaluates
/// every `/sql` request against it before hitting the DataFusion engine.
/// This is the P42-002 enforcement point flagged across scrum iters 3-5
/// ("raw SQL forwarded without schema or policy gate").
///
/// Intentionally narrow: it's a safety net, not a full SQL parser. If
/// callers need richer AST-aware enforcement they should extend this with
/// structured rules rather than new needles.
pub fn sql_query_guard_store() -> TruthStore {
let mut store = TruthStore::new();
store.add_rule(TruthRule {
id: "no-destructive-sql".to_string(),
task_class: "sql_query".to_string(),
description: "SQL must not contain destructive verbs".to_string(),
condition: RuleCondition::FieldContainsAny {
field: "sql".to_string(),
needles: vec![
"drop table".to_string(),
"drop schema".to_string(),
"drop database".to_string(),
"truncate".to_string(),
"delete from".to_string(),
],
},
action: RuleAction::Reject {
message: "destructive SQL rejected by truth.sql_query_guard".to_string(),
},
});
store.add_rule(TruthRule {
id: "sql-not-empty".to_string(),
task_class: "sql_query".to_string(),
description: "SQL must not be empty".to_string(),
condition: RuleCondition::FieldEmpty {
field: "sql".to_string(),
},
action: RuleAction::Reject {
message: "empty SQL rejected".to_string(),
},
});
store
}
/// Phase 42 default store: staffing rules + DevOps scaffold composed
/// onto an empty TruthStore. Per the PRD: "Staffing rules ship first;
/// Terraform/Ansible rule shapes are scaffolded but unpopulated until
/// the long-horizon phase." The composition order is irrelevant here
/// (DevOps is empty) but preserved so the shape matches the PRD's
/// expected "compose on top" pattern.
///
/// Moved out of inline in-function rule registration (2026-04-24) to
/// land the Phase 42 module split the PRD called for: `staffing.rs` +
/// `devops.rs` each owns their task-class rule sets. Behavior unchanged
/// for existing callers.
pub fn default_truth_store() -> TruthStore {
devops::devops_rules(staffing::staffing_rules(TruthStore::new()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn truth_store_new_is_empty() {
let store = TruthStore::new();
assert!(store.rules.is_empty());
}
#[test]
fn add_rule_inserts_into_correct_task_class() {
let mut store = TruthStore::new();
store.add_rule(TruthRule {
id: "test-rule".to_string(),
task_class: "test.task".to_string(),
description: "Test rule".to_string(),
condition: RuleCondition::Always,
action: RuleAction::Pass,
});
let rules = store.get_rules("test.task");
assert_eq!(rules.len(), 1);
assert_eq!(rules[0].id, "test-rule");
}
#[test]
fn get_rules_returns_empty_for_unknown_class() {
let store = TruthStore::new();
let rules = store.get_rules("unknown.class");
assert!(rules.is_empty());
}
#[test]
fn check_returns_actions_for_task_class() {
let mut store = TruthStore::new();
store.add_rule(TruthRule {
id: "a1".to_string(),
task_class: "test".to_string(),
description: "A1".to_string(),
condition: RuleCondition::Always,
action: RuleAction::Pass,
});
store.add_rule(TruthRule {
id: "a2".to_string(),
task_class: "test".to_string(),
description: "A2".to_string(),
condition: RuleCondition::Always,
action: RuleAction::Reject {
message: "test reject".to_string(),
},
});
let actions = store.check("test");
assert_eq!(actions.len(), 2);
}
#[test]
fn rule_condition_serialize_always() {
let cond = RuleCondition::Always;
let json = serde_json::to_string(&cond).unwrap();
assert!(json.contains(r#""type":"Always"#));
}
#[test]
fn rule_condition_serialize_field_equals() {
let cond = RuleCondition::FieldEquals {
field: "foo".to_string(),
value: "bar".to_string(),
};
let json = serde_json::to_string(&cond).unwrap();
assert!(json.contains(r#""type":"FieldEquals""#));
assert!(json.contains(r#""field":"foo""#));
assert!(json.contains(r#""value":"bar""#));
}
#[test]
fn rule_action_serialize_redact() {
let action = RuleAction::Redact {
fields: vec!["ssn".to_string()],
};
let json = serde_json::to_string(&action).unwrap();
assert!(json.contains(r#""type":"Redact""#));
assert!(json.contains("ssn"));
}
#[test]
fn rule_action_serialize_reject() {
let action = RuleAction::Reject {
message: "test".to_string(),
};
let json = serde_json::to_string(&action).unwrap();
assert!(json.contains(r#""type":"Reject""#));
}
#[test]
fn default_truth_store_has_staffing_rules() {
let store = default_truth_store();
let fill_rules = store.get_rules("staffing.fill");
assert!(!fill_rules.is_empty());
let any_rules = store.get_rules("staffing.any");
assert!(!any_rules.is_empty());
}
#[test]
fn multiple_rules_same_task_class() {
let mut store = TruthStore::new();
for i in 0..5 {
store.add_rule(TruthRule {
id: format!("rule-{}", i),
task_class: "test".to_string(),
description: format!("Rule {}", i),
condition: RuleCondition::Always,
action: RuleAction::Pass,
});
}
let rules = store.get_rules("test");
assert_eq!(rules.len(), 5);
}
#[test]
fn truth_rule_clone_preserves_data() {
let rule = TruthRule {
id: "clone-test".to_string(),
task_class: "clone.task".to_string(),
description: "Clone test".to_string(),
condition: RuleCondition::FieldEquals {
field: "x".to_string(),
value: "y".to_string(),
},
action: RuleAction::Block {
message: "blocked".to_string(),
},
};
let cloned = rule.clone();
assert_eq!(cloned.id, rule.id);
assert_eq!(cloned.condition, rule.condition);
assert_eq!(cloned.action, rule.action);
}
#[test]
fn field_greater_condition_parse() {
let json = r#"{"type":"FieldGreater","field":"count","threshold":10}"#;
let cond: RuleCondition = serde_json::from_str(json).unwrap();
match cond {
RuleCondition::FieldGreater { field, threshold } => {
assert_eq!(field, "count");
assert_eq!(threshold, 10);
}
_ => panic!("Expected FieldGreater"),
}
}
#[test]
fn block_action_blocks_with_message() {
let action = RuleAction::Block {
message: "Rate limited".to_string(),
};
let json = serde_json::to_string(&action).unwrap();
assert!(json.contains("Rate limited"));
}
#[test]
fn empty_store_check_returns_empty() {
let store = TruthStore::new();
let actions = store.check("empty.class");
assert!(actions.is_empty());
}
// ── P42-001 evaluate() tests — actually walk RuleCondition ──
fn fill_store() -> TruthStore {
let mut s = TruthStore::new();
s.add_rule(TruthRule {
id: "active".into(),
task_class: "t".into(),
description: "must be active".into(),
condition: RuleCondition::FieldEquals {
field: "worker.status".into(),
value: "active".into(),
},
action: RuleAction::Reject {
message: "worker not active".into(),
},
});
s.add_rule(TruthRule {
id: "deadline".into(),
task_class: "t".into(),
description: "deadline required".into(),
condition: RuleCondition::FieldEmpty {
field: "contract.deadline".into(),
},
action: RuleAction::Reject {
message: "missing deadline".into(),
},
});
s.add_rule(TruthRule {
id: "budget".into(),
task_class: "t".into(),
description: "budget positive".into(),
condition: RuleCondition::FieldGreater {
field: "contract.budget".into(),
threshold: 0,
},
action: RuleAction::Block {
message: "budget must be positive".into(),
},
});
s
}
#[test]
fn evaluate_field_equals_pass_on_match() {
let s = fill_store();
let ctx = serde_json::json!({"worker": {"status": "active"}});
let o = s.evaluate("t", &ctx);
let active = o.iter().find(|r| r.rule_id == "active").unwrap();
assert!(active.passed, "active condition should hold");
}
#[test]
fn evaluate_field_equals_fail_on_mismatch() {
let s = fill_store();
let ctx = serde_json::json!({"worker": {"status": "terminated"}});
let o = s.evaluate("t", &ctx);
let active = o.iter().find(|r| r.rule_id == "active").unwrap();
assert!(!active.passed, "terminated should fail active condition");
}
#[test]
fn evaluate_field_equals_fail_on_missing() {
let s = fill_store();
let ctx = serde_json::json!({});
let o = s.evaluate("t", &ctx);
let active = o.iter().find(|r| r.rule_id == "active").unwrap();
assert!(!active.passed, "missing worker.status should fail");
}
#[test]
fn evaluate_field_empty_pass_when_absent() {
let s = fill_store();
// FieldEmpty passes when the field is missing/null/empty string.
// Deadline rule says "field empty means action fires" — so passed=true
// here means the rule's condition held (deadline IS empty).
let ctx = serde_json::json!({});
let o = s.evaluate("t", &ctx);
let deadline = o.iter().find(|r| r.rule_id == "deadline").unwrap();
assert!(deadline.passed);
}
#[test]
fn evaluate_field_empty_fail_when_present() {
let s = fill_store();
let ctx = serde_json::json!({"contract": {"deadline": "2026-05-01"}});
let o = s.evaluate("t", &ctx);
let deadline = o.iter().find(|r| r.rule_id == "deadline").unwrap();
assert!(!deadline.passed, "non-empty deadline should fail FieldEmpty check");
}
#[test]
fn evaluate_field_greater_pass_and_fail() {
let s = fill_store();
let ctx_ok = serde_json::json!({"contract": {"budget": 100}});
let ctx_bad = serde_json::json!({"contract": {"budget": 0}});
let ok = s.evaluate("t", &ctx_ok);
let bad = s.evaluate("t", &ctx_bad);
assert!(ok.iter().find(|r| r.rule_id == "budget").unwrap().passed);
assert!(!bad.iter().find(|r| r.rule_id == "budget").unwrap().passed);
}
#[test]
fn evaluate_always_condition_passes_unconditionally() {
let mut s = TruthStore::new();
s.add_rule(TruthRule {
id: "always".into(),
task_class: "x".into(),
description: "".into(),
condition: RuleCondition::Always,
action: RuleAction::Pass,
});
let o = s.evaluate("x", &serde_json::json!(null));
assert!(o[0].passed);
}
#[test]
fn evaluate_preserves_action_regardless_of_outcome() {
let s = fill_store();
let ctx = serde_json::json!({"worker": {"status": "active"}});
let o = s.evaluate("t", &ctx);
let active = o.iter().find(|r| r.rule_id == "active").unwrap();
// Action is attached whether the rule passed or not — the consumer
// decides how to use it.
assert_eq!(
active.action,
RuleAction::Reject {
message: "worker not active".into()
}
);
}
#[test]
fn evaluate_on_unknown_task_class_returns_empty() {
let s = fill_store();
let o = s.evaluate("nonexistent", &serde_json::json!({}));
assert!(o.is_empty());
}
#[test]
fn check_still_returns_actions_unconditionally_for_back_compat() {
// Legacy API should still behave the same — no condition walking.
let s = fill_store();
let actions = s.check("t");
assert_eq!(actions.len(), 3, "check returns one action per rule regardless of condition");
}
fn sql_guard_store() -> TruthStore {
let mut s = TruthStore::new();
s.add_rule(TruthRule {
id: "no-destructive".into(),
task_class: "sql_query".into(),
description: "SQL must not contain destructive verbs".into(),
condition: RuleCondition::FieldContainsAny {
field: "sql".into(),
needles: vec![
"drop table".into(),
"drop schema".into(),
"truncate".into(),
"delete from".into(),
],
},
action: RuleAction::Reject {
message: "destructive SQL rejected".into(),
},
});
s
}
#[test]
fn field_contains_any_matches_case_insensitively() {
let s = sql_guard_store();
let ctx = serde_json::json!({"sql": "SELECT * FROM t; DROP TABLE users;"});
let o = s.evaluate("sql_query", &ctx);
assert!(o[0].passed, "condition holds when needle present (case-insensitive)");
}
#[test]
fn field_contains_any_is_false_when_no_needle_matches() {
let s = sql_guard_store();
let ctx = serde_json::json!({"sql": "SELECT count(*) FROM workers"});
let o = s.evaluate("sql_query", &ctx);
assert!(!o[0].passed, "benign SELECT should not match destructive needles");
}
#[test]
fn field_contains_any_false_when_field_missing() {
let s = sql_guard_store();
let ctx = serde_json::json!({});
let o = s.evaluate("sql_query", &ctx);
assert!(!o[0].passed, "missing field → condition cannot hold");
}
#[test]
fn field_contains_any_empty_needles_list_never_matches() {
let mut s = TruthStore::new();
s.add_rule(TruthRule {
id: "empty".into(),
task_class: "x".into(),
description: "".into(),
condition: RuleCondition::FieldContainsAny {
field: "sql".into(),
needles: vec![],
},
action: RuleAction::Pass,
});
let o = s.evaluate("x", &serde_json::json!({"sql": "anything"}));
assert!(!o[0].passed, "no needles → any::<bool> is false");
}
}

187
crates/truth/src/loader.rs Normal file
View File

@ -0,0 +1,187 @@
//! File-backed TruthRule loader (Phase 42 PRD).
//!
//! PRD: "truth/ dir at repo root — rule files, versioned in git."
//! This module walks a directory, parses every `*.toml` file it finds,
//! and registers the rules into a caller-supplied store. Rule IDs must
//! be unique across the combined set — duplicate-ID collisions are
//! load-time errors.
//!
//! The TOML format matches the shape at `truth/README.md`. The same
//! `RuleCondition` + `RuleAction` enums used by the in-code registrars
//! deserialize directly from `condition = { type = "FieldEquals", ... }`
//! thanks to `#[serde(tag = "type")]`.
use std::fs;
use std::path::Path;
use serde::Deserialize;
use crate::{TruthRule, TruthStore};
/// Deserialization wrapper — a TOML file is a list of [[rule]] blocks.
#[derive(Deserialize)]
struct RuleFile {
#[serde(default)]
rule: Vec<TruthRule>,
}
/// Load every `*.toml` file in `dir` and add its rules to `store`.
/// Returns the number of rules loaded across all files.
///
/// Errors:
/// - directory doesn't exist or can't be read
/// - any `.toml` file fails to parse
/// - any rule ID collides with an existing rule (same ID already
/// registered in the store)
///
/// Non-goals: recursive walk (flat dir only), hot reload (one-shot load).
pub fn load_from_dir(store: &mut TruthStore, dir: impl AsRef<Path>) -> Result<usize, String> {
let dir = dir.as_ref();
let entries = fs::read_dir(dir)
.map_err(|e| format!("read_dir {}: {e}", dir.display()))?;
let mut loaded_ids = store.all_rule_ids();
let mut count = 0usize;
let mut paths: Vec<_> = entries
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().and_then(|s| s.to_str()) == Some("toml"))
.collect();
// Deterministic order — alphabetical by filename. Matters when a
// cross-file ID collision happens; the earlier filename wins
// nothing (both error), but the error message is reproducible.
paths.sort();
for path in paths {
let raw = fs::read_to_string(&path)
.map_err(|e| format!("read {}: {e}", path.display()))?;
let file: RuleFile = toml::from_str(&raw)
.map_err(|e| format!("parse {}: {e}", path.display()))?;
for rule in file.rule {
if !loaded_ids.insert(rule.id.clone()) {
return Err(format!(
"duplicate rule id '{}' from {}",
rule.id,
path.display()
));
}
store.add_rule(rule);
count += 1;
}
}
Ok(count)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn write_file(dir: &Path, name: &str, content: &str) {
let path = dir.join(name);
let mut f = fs::File::create(&path).unwrap();
f.write_all(content.as_bytes()).unwrap();
}
#[test]
fn loads_rules_from_toml_files() {
let tmp = tempdir_for("loader_test");
write_file(&tmp, "a.toml", r#"
[[rule]]
id = "a-rule"
task_class = "test"
description = "test rule"
action = { type = "Pass" }
[rule.condition]
type = "Always"
"#);
let mut store = TruthStore::new();
let n = load_from_dir(&mut store, &tmp).unwrap();
assert_eq!(n, 1);
assert_eq!(store.get_rules("test").len(), 1);
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn rejects_duplicate_rule_ids() {
let tmp = tempdir_for("dup_ids");
write_file(&tmp, "a.toml", r#"
[[rule]]
id = "same"
task_class = "t"
description = ""
action = { type = "Pass" }
[rule.condition]
type = "Always"
"#);
write_file(&tmp, "b.toml", r#"
[[rule]]
id = "same"
task_class = "t"
description = ""
action = { type = "Pass" }
[rule.condition]
type = "Always"
"#);
let mut store = TruthStore::new();
let err = load_from_dir(&mut store, &tmp).unwrap_err();
assert!(err.contains("duplicate"), "got: {err}");
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn duplicate_with_in_code_rule_is_rejected() {
// Existing in-store IDs count as "already registered." Operator
// can't shadow an in-code rule by file without changing the ID.
let tmp = tempdir_for("dup_in_code");
write_file(&tmp, "conflict.toml", r#"
[[rule]]
id = "worker-active"
task_class = "staffing.fill"
description = "file attempt"
action = { type = "Pass" }
[rule.condition]
type = "Always"
"#);
// staffing_rules registers "worker-active"
let mut store = crate::staffing::staffing_rules(TruthStore::new());
let err = load_from_dir(&mut store, &tmp).unwrap_err();
assert!(err.contains("duplicate") && err.contains("worker-active"));
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn skips_non_toml_files() {
let tmp = tempdir_for("skip_non_toml");
write_file(&tmp, "a.toml", r#"
[[rule]]
id = "x"
task_class = "t"
description = ""
action = { type = "Pass" }
[rule.condition]
type = "Always"
"#);
write_file(&tmp, "README.md", "not a toml file");
let mut store = TruthStore::new();
let n = load_from_dir(&mut store, &tmp).unwrap();
assert_eq!(n, 1); // README.md ignored
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn missing_dir_returns_error() {
let mut store = TruthStore::new();
let err = load_from_dir(&mut store, "/nonexistent/path/here").unwrap_err();
assert!(err.contains("read_dir"));
}
fn tempdir_for(tag: &str) -> std::path::PathBuf {
let dir = std::env::temp_dir().join(format!("truth_loader_{}_{}", tag,
std::process::id()));
fs::create_dir_all(&dir).unwrap();
dir
}
}

View File

@ -0,0 +1,125 @@
//! Staffing task-class rules for the TruthStore.
//!
//! Phase 42 PRD: "Staffing rules ship first. Terraform/Ansible rule
//! shapes are scaffolded but unpopulated until the long-horizon phase."
//! This module owns the staffing rule set; `devops.rs` holds the
//! matching scaffold for the DevOps long-horizon.
//!
//! Rules registered here live under the task classes `staffing.fill`
//! (fill proposals), `staffing.rescue` (rescue escalations), and
//! `staffing.any` (rules that apply across all staffing task classes —
//! PII redaction being the canonical example).
//!
//! All rules are evaluated via the `TruthStore::evaluate` walk, which
//! pairs each rule's `RuleCondition` against a caller-supplied JSON
//! context and emits a `RuleOutcome { passed, action }` per rule.
//! Downstream enforcement (router gate, SQL gate, execution-loop gate)
//! decides how to apply the action — `Reject` / `Block` shortcircuit,
//! `Redact` mutates, `Pass` is informational.
use crate::{RuleAction, RuleCondition, TruthRule, TruthStore};
/// Register the staffing rule set on an existing store. Returns the
/// store for chaining if the caller wants to fold other rule sets on
/// top (e.g. `staffing_rules(devops_rules(TruthStore::new()))`).
pub fn staffing_rules(mut store: TruthStore) -> TruthStore {
store.add_rule(TruthRule {
id: "worker-active".to_string(),
task_class: "staffing.fill".to_string(),
description: "Worker must be active".to_string(),
condition: RuleCondition::FieldEquals {
field: "worker.status".to_string(),
value: "active".to_string(),
},
action: RuleAction::Pass,
});
store.add_rule(TruthRule {
id: "client-not-blacklisted".to_string(),
task_class: "staffing.fill".to_string(),
description: "Worker cannot be blacklisted for client".to_string(),
condition: RuleCondition::FieldEquals {
field: "worker.client_blacklisted".to_string(),
value: "false".to_string(),
},
action: RuleAction::Pass,
});
store.add_rule(TruthRule {
id: "deadline-required".to_string(),
task_class: "staffing.fill".to_string(),
description: "Contract must have deadline".to_string(),
condition: RuleCondition::FieldEmpty {
field: "contract.deadline".to_string(),
},
action: RuleAction::Reject {
message: "Contract deadline is required".to_string(),
},
});
store.add_rule(TruthRule {
id: "budget-required".to_string(),
task_class: "staffing.fill".to_string(),
description: "Budget must be non-negative".to_string(),
condition: RuleCondition::FieldGreater {
field: "contract.budget_per_hour_max".to_string(),
threshold: 0,
},
action: RuleAction::Pass,
});
store.add_rule(TruthRule {
id: "pii-redact".to_string(),
task_class: "staffing.any".to_string(),
description: "Redact PII before cloud calls".to_string(),
condition: RuleCondition::Always,
action: RuleAction::Redact {
fields: vec!["ssn".to_string(), "salary".to_string()],
},
});
store
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn staffing_rules_registers_five_rules() {
// 4 staffing.fill rules + 1 staffing.any rule = 5 total.
// Regression guard: if someone adds a rule to this module
// without updating the count, this test surfaces it.
let store = staffing_rules(TruthStore::new());
let fill = store.get_rules("staffing.fill").len();
let any = store.get_rules("staffing.any").len();
assert_eq!(fill, 4);
assert_eq!(any, 1);
}
#[test]
fn blacklisted_worker_fails_the_rule() {
let store = staffing_rules(TruthStore::new());
let ctx = serde_json::json!({
"worker": { "client_blacklisted": "true" }
});
let outcomes = store.evaluate("staffing.fill", &ctx);
let blk = outcomes.iter().find(|o| o.rule_id == "client-not-blacklisted").unwrap();
assert!(!blk.passed, "blacklisted worker must fail the rule");
}
#[test]
fn missing_deadline_fires_reject_via_empty_condition() {
let store = staffing_rules(TruthStore::new());
// FieldEmpty passes when the field is missing — and the rule's
// action is Reject, so enforcement should fire.
let ctx = serde_json::json!({});
let outcomes = store.evaluate("staffing.fill", &ctx);
let deadline = outcomes.iter().find(|o| o.rule_id == "deadline-required").unwrap();
assert!(deadline.passed);
match &deadline.action {
RuleAction::Reject { message } => assert!(message.contains("deadline")),
_ => panic!("expected Reject action"),
}
}
}

View File

@ -1238,13 +1238,13 @@ fn IngestPanel() -> Element {
pg_tables.set(Some(tables)); pg_tables.set(Some(tables));
} }
} }
Err(e) => pg_tables.set(None), Err(_) => pg_tables.set(None),
} }
pg_loading.set(false); pg_loading.set(false);
}); });
}; };
let mut import_table = move |table: String| { let import_table = move |table: String| {
let host = pg_host.read().clone(); let host = pg_host.read().clone();
let db = pg_db.read().clone(); let db = pg_db.read().clone();
spawn(async move { spawn(async move {

View File

@ -0,0 +1,15 @@
[package]
name = "validator"
version = "0.1.0"
edition = "2024"
[dependencies]
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
# Parquet loader for ParquetWorkerLookup (Phase 43 v3 — production
# WorkerLookup backed by workers_500k.parquet snapshot).
arrow = { workspace = true }
parquet = { workspace = true }

View File

@ -0,0 +1,44 @@
//! DevOps validator scaffold — long-horizon.
//!
//! PRD: "scaffold only: stubbed Terraform/Ansible validators
//! (`terraform validate`, `ansible-lint`) for the long-horizon phase."
//! Shipped as Unimplemented stubs so the execution-loop dispatcher
//! has a consistent failure shape to surface ("phase 43 not wired")
//! instead of a missing-impl panic.
use crate::{Artifact, Report, Validator, ValidationError};
pub struct TerraformValidator;
impl Validator for TerraformValidator {
fn name(&self) -> &'static str { "devops.terraform" }
fn validate(&self, _artifact: &Artifact) -> Result<Report, ValidationError> {
Err(ValidationError::Unimplemented { artifact: "terraform_plan" })
}
}
pub struct AnsibleValidator;
impl Validator for AnsibleValidator {
fn name(&self) -> &'static str { "devops.ansible" }
fn validate(&self, _artifact: &Artifact) -> Result<Report, ValidationError> {
Err(ValidationError::Unimplemented { artifact: "ansible_playbook" })
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn terraform_scaffold_returns_unimplemented() {
let r = TerraformValidator.validate(&Artifact::TerraformPlan(serde_json::json!({})));
assert!(matches!(r, Err(ValidationError::Unimplemented { .. })));
}
#[test]
fn ansible_scaffold_returns_unimplemented() {
let r = AnsibleValidator.validate(&Artifact::AnsiblePlaybook(serde_json::json!({})));
assert!(matches!(r, Err(ValidationError::Unimplemented { .. })));
}
}

181
crates/validator/src/lib.rs Normal file
View File

@ -0,0 +1,181 @@
//! Phase 43 Validation Pipeline.
//!
//! PRD: "Staffing outputs run through schema / completeness /
//! consistency / policy gates. Plug into Layer 5 execution loop —
//! failure triggers observer-correction iteration."
//!
//! This crate provides the `Validator` trait + `Artifact` enum +
//! Report/ValidationError types. Staffing validators (fill, email,
//! playbook) and the DevOps scaffold live in submodules.
//!
//! Landed 2026-04-24 as a scaffold — the trait + types + module
//! layout match the PRD; individual validator implementations are
//! `Unimplemented` stubs that return a clear "phase 43 not wired"
//! error rather than silently passing. The execution-loop integration
//! (generate → validate → correct → retry) comes in a follow-up
//! commit once the stubs are filled.
use serde::{Deserialize, Serialize};
use thiserror::Error;
pub mod staffing;
pub mod devops;
/// What a validator saw. One variant per artifact class we validate.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "kind")]
pub enum Artifact {
/// A fill proposal from the staffing executor — shape is
/// `{fills: [{candidate_id, name}]}` per PRD.
FillProposal(serde_json::Value),
/// An email/SMS draft for outreach.
EmailDraft(serde_json::Value),
/// A playbook being sealed for memory.
Playbook(serde_json::Value),
/// Terraform plan output (scaffold, long-horizon).
TerraformPlan(serde_json::Value),
/// Ansible playbook (scaffold, long-horizon).
AnsiblePlaybook(serde_json::Value),
}
/// Success report. Empty `findings` means a clean pass. Populated
/// findings with `Severity::Warning` means "acceptable but notable" —
/// the artifact passes. `Severity::Error` means validation failed;
/// the validator should return `Err(...)` in that case, not `Ok`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Report {
pub findings: Vec<Finding>,
pub elapsed_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Finding {
pub field: String,
pub severity: Severity,
pub message: String,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum Severity {
Warning,
Error,
}
/// Validation failure — what went wrong + where + why. Returned as
/// `Err` from `validate`. Execution loop catches these and feeds them
/// to the observer-correction retry loop.
#[derive(Debug, Clone, Error, Serialize, Deserialize)]
pub enum ValidationError {
/// Artifact schema doesn't match what we expected.
#[error("schema mismatch at {field}: {reason}")]
Schema { field: String, reason: String },
/// Required data missing (e.g. endorsed count != target count).
#[error("completeness: {reason}")]
Completeness { reason: String },
/// Data that's inconsistent with another source of truth
/// (e.g. worker_id doesn't exist in the workers table).
#[error("consistency: {reason}")]
Consistency { reason: String },
/// Policy violation — truth rule or access control said no.
#[error("policy: {reason}")]
Policy { reason: String },
/// Validator hasn't been implemented yet — scaffold stub.
#[error("validator not yet implemented for {artifact} — phase 43 scaffold")]
Unimplemented { artifact: &'static str },
}
/// Core validation contract. Implementations live in `staffing::*` and
/// `devops::*`. The execution loop dispatches to the right impl based
/// on the Artifact variant.
pub trait Validator: Send + Sync {
fn validate(&self, artifact: &Artifact) -> Result<Report, ValidationError>;
/// Human-readable name for logs + Langfuse traces.
fn name(&self) -> &'static str;
}
// ─── Worker lookup (Phase 43 v2) ────────────────────────────────────────
//
// Validators that cross-check artifacts against the worker roster
// (FillValidator, EmailValidator) take an `Arc<dyn WorkerLookup>` at
// construction. Keeping the trait sync + in-memory mirrors the
// lakehouse pattern of "load truth into memory, validate against
// snapshot, refresh periodically" rather than per-call DB hits.
//
// Production impl: wrap a parquet snapshot loaded from
// `data/datasets/workers_500k.parquet` (or its safe view counterpart
// once Track A.B lands). Tests use `InMemoryWorkerLookup`.
/// One worker row from the staffing roster — the fields validators
/// actually read. Anything not on this struct (resume_text, scores,
/// communications) is intentionally hidden from the validator path.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkerRecord {
pub candidate_id: String,
pub name: String,
/// Free-form. Validators check for `"active"` (any other value
/// fails the status check). Common values from existing data:
/// "active", "inactive", "placed", "blacklisted".
pub status: String,
pub city: Option<String>,
pub state: Option<String>,
pub role: Option<String>,
/// Client ids this worker has been blacklisted from. Populated
/// from joining a blacklist table; empty when not provided.
#[serde(default)]
pub blacklisted_clients: Vec<String>,
}
/// Worker lookup contract. Sync by design — implementations should
/// hold an in-memory snapshot, not perform per-call I/O.
pub trait WorkerLookup: Send + Sync {
fn find(&self, candidate_id: &str) -> Option<WorkerRecord>;
/// Number of workers in the snapshot. Default 0 for impls that
/// genuinely don't know (e.g. a future SQL-backed lookup that
/// counts on demand). InMemoryWorkerLookup overrides with the
/// HashMap size; ParquetWorkerLookup constructs an
/// InMemoryWorkerLookup so it inherits the override. Used by
/// /v1/health to report data-load status during production
/// switchover (the Chicago dataset replaces synthetic test data;
/// the health endpoint is how operators verify the new file
/// loaded correctly without restart-and-pray).
fn len(&self) -> usize { 0 }
}
/// HashMap-backed lookup. Used by validator unit tests + as a
/// reasonable bootstrap impl for production once the parquet loader
/// fills it on startup.
pub struct InMemoryWorkerLookup {
rows: std::collections::HashMap<String, WorkerRecord>,
}
impl InMemoryWorkerLookup {
pub fn new() -> Self {
Self { rows: Default::default() }
}
pub fn from_records(records: Vec<WorkerRecord>) -> Self {
let mut rows = std::collections::HashMap::with_capacity(records.len());
for r in records {
rows.insert(r.candidate_id.clone(), r);
}
Self { rows }
}
pub fn insert(&mut self, record: WorkerRecord) {
self.rows.insert(record.candidate_id.clone(), record);
}
pub fn len(&self) -> usize { self.rows.len() }
pub fn is_empty(&self) -> bool { self.rows.is_empty() }
}
impl Default for InMemoryWorkerLookup {
fn default() -> Self { Self::new() }
}
impl WorkerLookup for InMemoryWorkerLookup {
fn find(&self, candidate_id: &str) -> Option<WorkerRecord> {
self.rows.get(candidate_id).cloned()
}
fn len(&self) -> usize {
self.rows.len()
}
}

View File

@ -0,0 +1,370 @@
//! Email/SMS draft validator (Phase 43 v2 — real PII + name checks).
//!
//! PRD checks:
//! - Schema (TO/BODY fields present)
//! - Length (SMS ≤ 160 chars; email subject ≤ 78 chars)
//! - PII absence (no SSN / salary leaked into outgoing text)
//! - Worker-name consistency (name in message matches worker record)
//!
//! Like FillValidator, EmailValidator takes `Arc<dyn WorkerLookup>` at
//! construction. The contract metadata (which worker the message is
//! about) travels under `_context.candidate_id` in the JSON payload.
//! When `_context.candidate_id` is present and resolves, the validator
//! cross-checks that the worker's name appears verbatim in the body.
//!
//! PII detection is std-only (no regex dep) — a hand-rolled scan
//! covers the patterns we actually care about: SSN (NNN-NN-NNNN),
//! salary statements ("salary" / "compensation" near a $ amount).
use crate::{
Artifact, Report, Validator, ValidationError, WorkerLookup,
};
use std::sync::Arc;
use std::time::Instant;
pub struct EmailValidator {
workers: Arc<dyn WorkerLookup>,
}
impl EmailValidator {
pub fn new(workers: Arc<dyn WorkerLookup>) -> Self {
Self { workers }
}
}
const SMS_MAX_CHARS: usize = 160;
const EMAIL_SUBJECT_MAX_CHARS: usize = 78;
impl Validator for EmailValidator {
fn name(&self) -> &'static str { "staffing.email" }
fn validate(&self, artifact: &Artifact) -> Result<Report, ValidationError> {
let started = Instant::now();
let value = match artifact {
Artifact::EmailDraft(v) => v,
other => return Err(ValidationError::Schema {
field: "artifact".into(),
reason: format!("EmailValidator expects EmailDraft, got {other:?}"),
}),
};
let _to = value.get("to").and_then(|v| v.as_str()).ok_or(
ValidationError::Schema {
field: "to".into(),
reason: "missing or not a string".into(),
},
)?;
let body = value.get("body").and_then(|v| v.as_str()).ok_or(
ValidationError::Schema {
field: "body".into(),
reason: "missing or not a string".into(),
},
)?;
let is_sms = value.get("kind").and_then(|k| k.as_str()) == Some("sms");
if is_sms && body.len() > SMS_MAX_CHARS {
return Err(ValidationError::Completeness {
reason: format!("SMS body is {} chars, max {SMS_MAX_CHARS}", body.len()),
});
}
if let Some(subject) = value.get("subject").and_then(|v| v.as_str()) {
if subject.len() > EMAIL_SUBJECT_MAX_CHARS {
return Err(ValidationError::Completeness {
reason: format!(
"email subject is {} chars, max {EMAIL_SUBJECT_MAX_CHARS}",
subject.len()
),
});
}
}
// ── PII scan on body + subject combined ──
let scanned = format!(
"{} {}",
value.get("subject").and_then(|v| v.as_str()).unwrap_or(""),
body
);
if contains_ssn_pattern(&scanned) {
return Err(ValidationError::Policy {
reason: "body contains an SSN-shaped sequence (NNN-NN-NNNN); strip before send".into(),
});
}
if contains_salary_disclosure(&scanned) {
return Err(ValidationError::Policy {
reason: "body discloses salary/compensation amount; staffing PII rule says strip before send".into(),
});
}
// ── Worker-name consistency ──
let candidate_id = value.get("_context")
.and_then(|c| c.get("candidate_id"))
.and_then(|v| v.as_str());
let mut findings: Vec<crate::Finding> = vec![];
if let Some(cid) = candidate_id {
match self.workers.find(cid) {
Some(worker) => {
// Body should mention the worker's name (or at least
// their first name) — drafts that address a different
// person than the contracted worker are a recurring
// class of LLM mistake.
let first = worker.name.split_whitespace().next().unwrap_or(&worker.name);
let body_lower = body.to_lowercase();
let first_lower = first.to_lowercase();
if !first_lower.is_empty() && !body_lower.contains(&first_lower) {
findings.push(crate::Finding {
field: "body".into(),
severity: crate::Severity::Warning,
message: format!(
"body doesn't mention worker first name {first:?} (candidate_id {cid:?})"
),
});
}
// Also detect *another* worker's name appearing in
// place of the contracted one — outright wrong-target.
// We can only check this when we have a different
// expected name; skip if the body is generic enough.
}
None => {
return Err(ValidationError::Consistency {
reason: format!(
"_context.candidate_id {cid:?} not found in worker roster"
),
});
}
}
}
Ok(Report {
findings,
elapsed_ms: started.elapsed().as_millis() as u64,
})
}
}
// ─── PII scanners (std-only) ────────────────────────────────────────────
/// Detects an SSN-shaped sequence: 3 digits, dash, 2 digits, dash, 4 digits.
/// Walks the byte buffer; rejects sequences that are part of a longer run
/// of digits (so phone-area-code-like NNN-NNN-NNNN isn't flagged). Tight
/// false-positive surface: it's specifically the NNN-NN-NNNN shape.
fn contains_ssn_pattern(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.len() < 11 { return false; }
for i in 0..=bytes.len().saturating_sub(11) {
let win = &bytes[i..i + 11];
let shape = win.iter().enumerate().all(|(j, &b)| match j {
0 | 1 | 2 | 4 | 5 | 7 | 8 | 9 | 10 => b.is_ascii_digit(),
3 | 6 => b == b'-',
_ => unreachable!(),
});
if !shape { continue; }
// Reject if the byte BEFORE this window is a digit or `-` —
// we're inside a longer numeric run, probably not an SSN.
if i > 0 {
let prev = bytes[i - 1];
if prev.is_ascii_digit() || prev == b'-' { continue; }
}
// Reject if the byte AFTER is a digit or `-` (same reason).
if i + 11 < bytes.len() {
let next = bytes[i + 11];
if next.is_ascii_digit() || next == b'-' { continue; }
}
return true;
}
false
}
/// Detects salary/compensation disclosure: the keywords "salary",
/// "compensation", "pay rate", "bill rate", "hourly rate" appearing
/// within ~40 chars of a `$` followed by digits. Coarse on purpose —
/// it's better to false-positive on a legit phrase like "discuss your
/// hourly rate of $30/hr" than to miss it.
fn contains_salary_disclosure(s: &str) -> bool {
let lower = s.to_lowercase();
const KEYWORDS: &[&str] = &[
"salary", "compensation", "pay rate", "bill rate", "hourly rate",
];
let mut keyword_positions: Vec<usize> = vec![];
for kw in KEYWORDS {
let mut start = 0;
while let Some(found) = lower[start..].find(kw) {
let abs = start + found;
keyword_positions.push(abs);
start = abs + kw.len();
}
}
if keyword_positions.is_empty() { return false; }
// Find every `$NNN+` in the text.
let bytes = lower.as_bytes();
let mut dollar_positions: Vec<usize> = vec![];
for (i, &b) in bytes.iter().enumerate() {
if b == b'$' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit() {
dollar_positions.push(i);
}
}
if dollar_positions.is_empty() { return false; }
// Any (keyword, $) pair within 40 chars triggers the policy rule.
for &kp in &keyword_positions {
for &dp in &dollar_positions {
if kp.abs_diff(dp) <= 40 {
return true;
}
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{InMemoryWorkerLookup, WorkerRecord};
use serde_json::json;
fn lookup(records: Vec<WorkerRecord>) -> Arc<dyn WorkerLookup> {
Arc::new(InMemoryWorkerLookup::from_records(records))
}
fn worker(id: &str, name: &str) -> WorkerRecord {
WorkerRecord {
candidate_id: id.into(),
name: name.into(),
status: "active".into(),
city: None, state: None, role: None,
blacklisted_clients: vec![],
}
}
#[test]
fn long_sms_fails_completeness() {
let v = EmailValidator::new(lookup(vec![]));
let body = "x".repeat(200);
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "+15555550123", "body": body, "kind": "sms"
})));
assert!(matches!(r, Err(ValidationError::Completeness { .. })));
}
#[test]
fn long_email_subject_fails_completeness() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "a@b.com", "body": "hi", "subject": "x".repeat(100)
})));
assert!(matches!(r, Err(ValidationError::Completeness { .. })));
}
#[test]
fn missing_to_fails_schema() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({"body": "hi"})));
assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "to"));
}
#[test]
fn well_formed_email_passes() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "hiring@example.com",
"subject": "Interview: Friday 10am",
"body": "Hi Jane — confirming interview Friday 10am."
})));
assert!(r.is_ok(), "well-formed email should pass: {:?}", r);
}
#[test]
fn ssn_in_body_fails_policy() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "x@y.com",
"body": "Hi Jane — your file shows 123-45-6789 on record."
})));
match r {
Err(ValidationError::Policy { reason }) => assert!(reason.contains("SSN")),
other => panic!("expected Policy SSN error, got {other:?}"),
}
}
#[test]
fn ssn_in_subject_fails_policy() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "x@y.com",
"subject": "Re: ID 123-45-6789",
"body": "details inside"
})));
assert!(matches!(r, Err(ValidationError::Policy { .. })));
}
#[test]
fn phone_number_does_not_trigger_ssn_false_positive() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "x@y.com",
"body": "Call me at 555-123-4567 to confirm."
})));
assert!(r.is_ok(), "phone NNN-NNN-NNNN should NOT match SSN NNN-NN-NNNN: {:?}", r);
}
#[test]
fn salary_disclosure_fails_policy() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "x@y.com",
"body": "Confirming your hourly rate of $32.50 per hour."
})));
assert!(matches!(r, Err(ValidationError::Policy { .. })));
}
#[test]
fn discussing_dollars_without_salary_keyword_passes() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "x@y.com",
"body": "The $20 parking pass is at the front desk."
})));
assert!(r.is_ok(), "non-salary $ should pass: {:?}", r);
}
#[test]
fn unknown_candidate_id_fails_consistency() {
let v = EmailValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "x@y.com",
"body": "Hi Jane",
"_context": {"candidate_id": "W-FAKE"}
})));
match r {
Err(ValidationError::Consistency { reason }) => assert!(reason.contains("not found")),
other => panic!("expected Consistency, got {other:?}"),
}
}
#[test]
fn missing_first_name_in_body_is_warning() {
let v = EmailValidator::new(lookup(vec![worker("W-1", "Jane Doe")]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "x@y.com",
"body": "Hi there — confirming your interview Friday.",
"_context": {"candidate_id": "W-1"}
})));
let report = r.expect("missing name should be warning, not error");
assert_eq!(report.findings.len(), 1);
assert_eq!(report.findings[0].severity, crate::Severity::Warning);
assert!(report.findings[0].message.to_lowercase().contains("first name"));
}
#[test]
fn matching_first_name_passes_clean() {
let v = EmailValidator::new(lookup(vec![worker("W-1", "Jane Doe")]));
let r = v.validate(&Artifact::EmailDraft(json!({
"to": "x@y.com",
"body": "Hi Jane — confirming your interview Friday.",
"_context": {"candidate_id": "W-1"}
})));
let report = r.expect("matching name should pass");
assert!(report.findings.is_empty(), "expected no findings, got {:?}", report.findings);
}
}

View File

@ -0,0 +1,383 @@
//! Fill-proposal validator (Phase 43 v2 — real consistency checks).
//!
//! PRD checks:
//! - Schema compliance (propose_done shape: `{fills: [{candidate_id, name}]}`)
//! - Completeness (endorsed count == target_count)
//! - Worker existence (every candidate_id present in workers roster)
//! - Status check (worker.status == "active")
//! - Client blacklist (worker NOT in client.blacklisted_clients)
//! - Geo/role match (worker city/state/role matches contract)
//!
//! The contract metadata (target_count, city, state, role, client_id)
//! travels alongside the JSON payload under a `_context` key:
//! `{"_context": {"target_count": 2, "city": "Toledo", "state": "OH",
//! "role": "Welder", "client_id": "CLI-00099"}, "fills": [...]}`.
//! This keeps the Validator trait signature stable while letting the
//! validator cross-check fills against contract truth.
//!
//! Worker-existence + status + geo + blacklist all share a single
//! lookup trait (`WorkerLookup`) so the validator stays decoupled
//! from queryd / parquet / catalogd transport details.
use crate::{
Artifact, Report, Validator, ValidationError, WorkerLookup, WorkerRecord,
};
use std::sync::Arc;
use std::time::Instant;
pub struct FillValidator {
workers: Arc<dyn WorkerLookup>,
}
impl FillValidator {
pub fn new(workers: Arc<dyn WorkerLookup>) -> Self {
Self { workers }
}
}
#[derive(Debug, Default)]
struct FillContext {
target_count: Option<usize>,
city: Option<String>,
state: Option<String>,
role: Option<String>,
client_id: Option<String>,
}
fn extract_context(value: &serde_json::Value) -> FillContext {
let ctx_obj = value.get("_context").and_then(|c| c.as_object());
let ctx = match ctx_obj {
Some(o) => o,
None => return FillContext::default(),
};
FillContext {
target_count: ctx.get("target_count").and_then(|v| v.as_u64()).map(|n| n as usize),
city: ctx.get("city").and_then(|v| v.as_str()).map(String::from),
state: ctx.get("state").and_then(|v| v.as_str()).map(String::from),
role: ctx.get("role").and_then(|v| v.as_str()).map(String::from),
client_id: ctx.get("client_id").and_then(|v| v.as_str()).map(String::from),
}
}
fn eq_ci(a: &str, b: &str) -> bool {
a.trim().eq_ignore_ascii_case(b.trim())
}
impl Validator for FillValidator {
fn name(&self) -> &'static str { "staffing.fill" }
fn validate(&self, artifact: &Artifact) -> Result<Report, ValidationError> {
let started = Instant::now();
let value = match artifact {
Artifact::FillProposal(v) => v,
other => return Err(ValidationError::Schema {
field: "artifact".into(),
reason: format!("FillValidator expects FillProposal, got {other:?}"),
}),
};
// ── Schema check ──
let fills = value.get("fills").and_then(|f| f.as_array()).ok_or(
ValidationError::Schema {
field: "fills".into(),
reason: "expected top-level `fills` array".into(),
},
)?;
for (i, fill) in fills.iter().enumerate() {
if fill.get("candidate_id").is_none() {
return Err(ValidationError::Schema {
field: format!("fills[{i}].candidate_id"),
reason: "missing".into(),
});
}
if fill.get("name").is_none() {
return Err(ValidationError::Schema {
field: format!("fills[{i}].name"),
reason: "missing".into(),
});
}
}
let ctx = extract_context(value);
// ── Completeness: count match ──
if let Some(target) = ctx.target_count {
if fills.len() != target {
return Err(ValidationError::Completeness {
reason: format!(
"endorsed count {} != target_count {target}",
fills.len()
),
});
}
}
// ── Cross-roster checks ──
let mut findings: Vec<crate::Finding> = vec![];
let mut seen_ids = std::collections::HashSet::new();
for (i, fill) in fills.iter().enumerate() {
let candidate_id = fill.get("candidate_id").and_then(|v| v.as_str()).unwrap_or("");
let proposed_name = fill.get("name").and_then(|v| v.as_str()).unwrap_or("");
// Duplicate-ID guard inside one fill.
if !seen_ids.insert(candidate_id.to_string()) {
return Err(ValidationError::Consistency {
reason: format!(
"duplicate candidate_id {candidate_id:?} appears multiple times in fills"
),
});
}
// Worker existence — the gate that catches phantom IDs the
// model fabricates. This is the load-bearing check for
// the 0→85% pattern.
let worker: WorkerRecord = match self.workers.find(candidate_id) {
Some(w) => w,
None => return Err(ValidationError::Consistency {
reason: format!(
"fills[{i}].candidate_id {candidate_id:?} does not exist in worker roster"
),
}),
};
// Status — only "active" workers can be endorsed.
if !eq_ci(&worker.status, "active") {
return Err(ValidationError::Consistency {
reason: format!(
"fills[{i}] worker {candidate_id:?} has status {:?}, expected \"active\"",
worker.status
),
});
}
// Client blacklist.
if let Some(client) = ctx.client_id.as_deref() {
if worker.blacklisted_clients.iter().any(|b| eq_ci(b, client)) {
return Err(ValidationError::Policy {
reason: format!(
"fills[{i}] worker {candidate_id:?} blacklisted for client {client:?}"
),
});
}
}
// Geo / role match — warn-level when missing context, hard
// fail on mismatch with explicit contract values.
if let (Some(want_city), Some(have_city)) = (ctx.city.as_deref(), worker.city.as_deref()) {
if !eq_ci(want_city, have_city) {
return Err(ValidationError::Consistency {
reason: format!(
"fills[{i}] worker {candidate_id:?} city {have_city:?} doesn't match contract city {want_city:?}"
),
});
}
}
if let (Some(want_state), Some(have_state)) = (ctx.state.as_deref(), worker.state.as_deref()) {
if !eq_ci(want_state, have_state) {
return Err(ValidationError::Consistency {
reason: format!(
"fills[{i}] worker {candidate_id:?} state {have_state:?} doesn't match contract state {want_state:?}"
),
});
}
}
if let (Some(want_role), Some(have_role)) = (ctx.role.as_deref(), worker.role.as_deref()) {
if !eq_ci(want_role, have_role) {
return Err(ValidationError::Consistency {
reason: format!(
"fills[{i}] worker {candidate_id:?} role {have_role:?} doesn't match contract role {want_role:?}"
),
});
}
}
// Name-mismatch is a warning, not an error — recruiters
// sometimes send updated names through the proposal layer
// before the roster is updated.
if !proposed_name.is_empty() && !eq_ci(proposed_name, &worker.name) {
findings.push(crate::Finding {
field: format!("fills[{i}].name"),
severity: crate::Severity::Warning,
message: format!(
"proposed name {proposed_name:?} differs from roster name {:?} for {candidate_id:?}",
worker.name
),
});
}
}
Ok(Report {
findings,
elapsed_ms: started.elapsed().as_millis() as u64,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::InMemoryWorkerLookup;
use serde_json::json;
fn lookup(records: Vec<WorkerRecord>) -> Arc<dyn WorkerLookup> {
Arc::new(InMemoryWorkerLookup::from_records(records))
}
fn worker(id: &str, name: &str, status: &str, city: &str, state: &str, role: &str) -> WorkerRecord {
WorkerRecord {
candidate_id: id.into(),
name: name.into(),
status: status.into(),
city: Some(city.into()),
state: Some(state.into()),
role: Some(role.into()),
blacklisted_clients: vec![],
}
}
#[test]
fn wrong_artifact_type_fails_schema() {
let v = FillValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::EmailDraft(json!({})));
assert!(matches!(r, Err(ValidationError::Schema { .. })));
}
#[test]
fn missing_fills_array_fails_schema() {
let v = FillValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::FillProposal(json!({})));
assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "fills"));
}
#[test]
fn fill_without_candidate_id_fails() {
let v = FillValidator::new(lookup(vec![]));
let r = v.validate(&Artifact::FillProposal(json!({"fills": [{"name": "Jane"}]})));
assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field.contains("candidate_id")));
}
#[test]
fn well_formed_proposal_with_real_workers_passes() {
let v = FillValidator::new(lookup(vec![
worker("W-1", "Jane Doe", "active", "Toledo", "OH", "Welder"),
worker("W-2", "John Smith", "active", "Toledo", "OH", "Welder"),
]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 2, "city": "Toledo", "state": "OH", "role": "Welder"},
"fills": [
{"candidate_id": "W-1", "name": "Jane Doe"},
{"candidate_id": "W-2", "name": "John Smith"}
]
})));
assert!(r.is_ok(), "expected pass, got {:?}", r);
}
#[test]
fn phantom_candidate_id_fails_consistency() {
let v = FillValidator::new(lookup(vec![worker("W-1", "Jane", "active", "Toledo", "OH", "Welder")]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder"},
"fills": [{"candidate_id": "W-FAKE-99999", "name": "Imaginary"}]
})));
match r {
Err(ValidationError::Consistency { reason }) => assert!(reason.contains("does not exist")),
other => panic!("expected Consistency error, got {other:?}"),
}
}
#[test]
fn inactive_worker_fails_consistency() {
let v = FillValidator::new(lookup(vec![worker("W-1", "Jane", "inactive", "Toledo", "OH", "Welder")]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 1},
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
})));
match r {
Err(ValidationError::Consistency { reason }) => assert!(reason.contains("inactive")),
other => panic!("expected Consistency error, got {other:?}"),
}
}
#[test]
fn wrong_city_fails_consistency() {
let v = FillValidator::new(lookup(vec![worker("W-1", "Jane", "active", "Cincinnati", "OH", "Welder")]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder"},
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
})));
match r {
Err(ValidationError::Consistency { reason }) => assert!(reason.to_lowercase().contains("city")),
other => panic!("expected Consistency error, got {other:?}"),
}
}
#[test]
fn wrong_role_fails_consistency() {
let v = FillValidator::new(lookup(vec![worker("W-1", "Jane", "active", "Toledo", "OH", "Driver")]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder"},
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
})));
match r {
Err(ValidationError::Consistency { reason }) => assert!(reason.to_lowercase().contains("role")),
other => panic!("expected Consistency error, got {other:?}"),
}
}
#[test]
fn count_mismatch_fails_completeness() {
let v = FillValidator::new(lookup(vec![
worker("W-1", "Jane", "active", "Toledo", "OH", "Welder"),
]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 2, "city": "Toledo", "state": "OH", "role": "Welder"},
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
})));
assert!(matches!(r, Err(ValidationError::Completeness { .. })));
}
#[test]
fn duplicate_candidate_id_fails_consistency() {
let v = FillValidator::new(lookup(vec![
worker("W-1", "Jane", "active", "Toledo", "OH", "Welder"),
]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 2, "city": "Toledo", "state": "OH", "role": "Welder"},
"fills": [
{"candidate_id": "W-1", "name": "Jane"},
{"candidate_id": "W-1", "name": "Jane"}
]
})));
match r {
Err(ValidationError::Consistency { reason }) => assert!(reason.contains("duplicate")),
other => panic!("expected Consistency error, got {other:?}"),
}
}
#[test]
fn blacklisted_worker_fails_policy() {
let mut w = worker("W-1", "Jane", "active", "Toledo", "OH", "Welder");
w.blacklisted_clients = vec!["CLI-00099".into()];
let v = FillValidator::new(lookup(vec![w]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder", "client_id": "CLI-00099"},
"fills": [{"candidate_id": "W-1", "name": "Jane"}]
})));
assert!(matches!(r, Err(ValidationError::Policy { .. })));
}
#[test]
fn name_mismatch_is_warning_not_error() {
let v = FillValidator::new(lookup(vec![
worker("W-1", "Jane Doe", "active", "Toledo", "OH", "Welder"),
]));
let r = v.validate(&Artifact::FillProposal(json!({
"_context": {"target_count": 1, "city": "Toledo", "state": "OH", "role": "Welder"},
"fills": [{"candidate_id": "W-1", "name": "Janet Doe"}]
})));
let report = r.expect("name mismatch should be warning, not error");
assert_eq!(report.findings.len(), 1);
assert_eq!(report.findings[0].severity, crate::Severity::Warning);
assert!(report.findings[0].message.contains("differs from roster"));
}
}

View File

@ -0,0 +1,9 @@
//! Staffing validators — fill proposals, email/SMS drafts, sealed
//! playbooks. Phase 43 PRD: "the 0→85% pattern reproduces on real
//! staffing tasks — the iteration loop with validation in place is
//! what made small models successful."
pub mod fill;
pub mod email;
pub mod playbook;
pub mod parquet_lookup;

View File

@ -0,0 +1,165 @@
//! Production WorkerLookup backed by a workers_500k.parquet snapshot.
//!
//! Loads the full roster into memory at startup (one-shot). 500K rows
//! at ~150 bytes per WorkerRecord ≈ 75 MB resident — fine for any
//! production lakehouse process. Refresh is intentionally
//! caller-driven (call `from_parquet` again to rebuild) rather than
//! automatic — operators decide when staffing data has changed enough
//! to justify the few-second reload.
//!
//! Schema mapping (workers_500k.parquet → WorkerRecord):
//! worker_id (int64) → candidate_id = "W-{id}"
//! name (string) → name
//! role (string) → role
//! city (string) → city
//! state (string) → state
//! availability (double) → status: "active" if >0 else "inactive"
//!
//! No status column on workers_500k, so we derive from availability —
//! the floor convention used elsewhere in the lakehouse staffing
//! pipeline. Workers with availability=0.0 are treated as inactive
//! (vacation, suspended, etc.). Once the Track-A.B `_safe` view ships
//! with proper `status`, switch this loader to read it directly.
//!
//! Blacklist join is not done here — caller is expected to populate
//! `blacklisted_clients` from a separate source (Phase 43 PRD says
//! `client_blacklist` table; not yet defined). Default empty.
use crate::{InMemoryWorkerLookup, WorkerLookup, WorkerRecord};
use parquet::file::reader::{FileReader, SerializedFileReader};
use parquet::record::Field;
use std::fs::File;
use std::path::Path;
use std::sync::Arc;
#[derive(Debug, thiserror::Error)]
pub enum LookupLoadError {
#[error("opening parquet at {path}: {source}")]
Open { path: String, #[source] source: std::io::Error },
#[error("parsing parquet at {path}: {source}")]
Parse { path: String, #[source] source: parquet::errors::ParquetError },
#[error("missing required column {column}")]
MissingColumn { column: String },
#[error("row {row}: {reason}")]
BadRow { row: usize, reason: String },
}
/// Build an `InMemoryWorkerLookup` from a workers_500k-shaped parquet
/// file. Returned as `Arc<dyn WorkerLookup>` to drop into validator
/// constructors.
pub fn load_workers_parquet(path: &Path) -> Result<Arc<dyn WorkerLookup>, LookupLoadError> {
let file = File::open(path).map_err(|e| LookupLoadError::Open {
path: path.display().to_string(),
source: e,
})?;
let reader = SerializedFileReader::new(file).map_err(|e| LookupLoadError::Parse {
path: path.display().to_string(),
source: e,
})?;
// Validate schema covers what we need before iterating rows.
let schema = reader.metadata().file_metadata().schema();
let column_names: Vec<&str> = schema.get_fields().iter().map(|f| f.name()).collect();
for required in &["worker_id", "name", "role", "city", "state", "availability"] {
if !column_names.contains(required) {
return Err(LookupLoadError::MissingColumn { column: (*required).to_string() });
}
}
let row_iter = reader.get_row_iter(None).map_err(|e| LookupLoadError::Parse {
path: path.display().to_string(),
source: e,
})?;
let mut records: Vec<WorkerRecord> = Vec::with_capacity(reader.metadata().file_metadata().num_rows() as usize);
let mut row_idx = 0usize;
for row_result in row_iter {
let row = row_result.map_err(|e| LookupLoadError::Parse {
path: path.display().to_string(),
source: e,
})?;
let mut worker_id: Option<i64> = None;
let mut name: Option<String> = None;
let mut role: Option<String> = None;
let mut city: Option<String> = None;
let mut state: Option<String> = None;
let mut availability: f64 = 0.0;
for (col_name, field) in row.get_column_iter() {
match (col_name.as_str(), field) {
("worker_id", Field::Long(v)) => worker_id = Some(*v),
("worker_id", Field::Int(v)) => worker_id = Some(*v as i64),
("name", Field::Str(v)) => name = Some(v.clone()),
("role", Field::Str(v)) => role = Some(v.clone()),
("city", Field::Str(v)) => city = Some(v.clone()),
("state", Field::Str(v)) => state = Some(v.clone()),
("availability", Field::Double(v)) => availability = *v,
("availability", Field::Float(v)) => availability = *v as f64,
_ => { /* extra columns ignored */ }
}
}
let id = worker_id.ok_or_else(|| LookupLoadError::BadRow {
row: row_idx,
reason: "worker_id missing or non-integer".into(),
})?;
let nm = name.ok_or_else(|| LookupLoadError::BadRow {
row: row_idx,
reason: "name missing".into(),
})?;
records.push(WorkerRecord {
candidate_id: format!("W-{id}"),
name: nm,
// status derived from availability (workers_500k has no
// status column). 0.0 → inactive, >0.0 → active.
status: if availability > 0.0 { "active".into() } else { "inactive".into() },
city,
state,
role,
blacklisted_clients: vec![],
});
row_idx += 1;
}
tracing::info!(
target: "validator.parquet_lookup",
rows = records.len(),
path = %path.display(),
"loaded workers parquet snapshot"
);
Ok(Arc::new(InMemoryWorkerLookup::from_records(records)))
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
/// Smoke test against the live workers_500k.parquet on disk.
/// Skipped automatically if the file isn't present (CI / sparse
/// checkouts) so the test suite stays portable.
#[test]
fn load_real_workers_500k() {
let path = PathBuf::from("/home/profit/lakehouse/data/datasets/workers_500k.parquet");
if !path.exists() {
eprintln!("skip: {} not present", path.display());
return;
}
let lookup = load_workers_parquet(&path).expect("load");
// Basic shape: at least one worker resolves and has the
// expected fields populated.
let probe = lookup.find("W-1");
assert!(probe.is_some(), "W-1 should exist in 500K-row parquet");
let w = probe.unwrap();
assert!(!w.name.is_empty(), "name should be populated");
assert!(w.status == "active" || w.status == "inactive");
assert!(w.role.is_some());
assert!(w.city.is_some());
assert!(w.state.is_some());
}
#[test]
fn missing_file_returns_error() {
let r = load_workers_parquet(Path::new("/nonexistent.parquet"));
assert!(matches!(r, Err(LookupLoadError::Open { .. })));
}
}

View File

@ -0,0 +1,134 @@
//! Sealed playbook validator.
//!
//! PRD checks:
//! - Operation format (`fill: Role xN in City, ST`)
//! - endorsed_names non-empty, ≤ target_count × 2
//! - fingerprint populated (Phase 25 validity window requirement)
use crate::{Artifact, Report, Validator, ValidationError};
use std::time::Instant;
pub struct PlaybookValidator;
impl Validator for PlaybookValidator {
fn name(&self) -> &'static str { "staffing.playbook" }
fn validate(&self, artifact: &Artifact) -> Result<Report, ValidationError> {
let started = Instant::now();
let value = match artifact {
Artifact::Playbook(v) => v,
other => return Err(ValidationError::Schema {
field: "artifact".into(),
reason: format!("PlaybookValidator expects Playbook, got {other:?}"),
}),
};
// Operation format: "fill: Role xN in City, ST" — at minimum
// we check the string-shape. Fuller grammar parse lives in
// phase 25 code where operations are structured beyond strings.
let op = value.get("operation").and_then(|v| v.as_str()).ok_or(
ValidationError::Schema {
field: "operation".into(),
reason: "missing or not a string".into(),
},
)?;
if !op.starts_with("fill:") {
return Err(ValidationError::Schema {
field: "operation".into(),
reason: format!("expected `fill: ...` prefix, got {op:?}"),
});
}
let endorsed = value.get("endorsed_names").and_then(|v| v.as_array()).ok_or(
ValidationError::Schema {
field: "endorsed_names".into(),
reason: "missing or not an array".into(),
},
)?;
if endorsed.is_empty() {
return Err(ValidationError::Completeness {
reason: "endorsed_names must be non-empty".into(),
});
}
if let Some(target) = value.get("target_count").and_then(|v| v.as_u64()) {
let max = (target * 2) as usize;
if endorsed.len() > max {
return Err(ValidationError::Completeness {
reason: format!(
"endorsed_names ({}) exceeds target_count × 2 ({max})",
endorsed.len()
),
});
}
}
if value.get("fingerprint").and_then(|v| v.as_str()).map_or(true, |s| s.is_empty()) {
return Err(ValidationError::Schema {
field: "fingerprint".into(),
reason: "missing — required for Phase 25 validity window".into(),
});
}
Ok(Report {
findings: vec![],
elapsed_ms: started.elapsed().as_millis() as u64,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn well_formed_playbook_passes() {
let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({
"operation": "fill: Welder x2 in Toledo, OH",
"endorsed_names": ["W-123", "W-456"],
"target_count": 2,
"fingerprint": "abc123"
})));
assert!(r.is_ok(), "got {:?}", r);
}
#[test]
fn empty_endorsed_names_fails_completeness() {
let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({
"operation": "fill: Welder x2 in Toledo, OH",
"endorsed_names": [],
"fingerprint": "abc"
})));
assert!(matches!(r, Err(ValidationError::Completeness { .. })));
}
#[test]
fn overfull_endorsed_names_fails_completeness() {
let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({
"operation": "fill: Welder x1 in Toledo, OH",
"endorsed_names": ["a", "b", "c"],
"target_count": 1,
"fingerprint": "abc"
})));
assert!(matches!(r, Err(ValidationError::Completeness { .. })));
}
#[test]
fn missing_fingerprint_fails_schema() {
let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({
"operation": "fill: X x1 in A, B",
"endorsed_names": ["a"]
})));
assert!(matches!(r, Err(ValidationError::Schema { field, .. }) if field == "fingerprint"));
}
#[test]
fn wrong_operation_prefix_fails_schema() {
let r = PlaybookValidator.validate(&Artifact::Playbook(serde_json::json!({
"operation": "sms_draft: hello",
"endorsed_names": ["a"],
"fingerprint": "x"
})));
assert!(matches!(r, Err(ValidationError::Schema { .. })));
}
}

View File

@ -0,0 +1,118 @@
//! Profile activation tracking (Phase 41 PRD).
//!
//! Phase 41 PRD called out `crates/vectord/src/activation.rs` with
//! `ActivationTracker` + background-job pattern. The activation
//! handler itself lives in `service.rs::activate_profile` (200+ lines
//! of warm-up + bucket binding that's wired to VectorState); this
//! module provides the type the PRD named and a single-flight guard
//! that satisfies the PRD gate "refuse new activation if one is
//! pending/running."
//!
//! Handler extraction (moving the body of `activate_profile` here)
//! is deliberately NOT in this commit — it's a module-structure
//! refactor, not a semantic change. When that lands, the inline
//! `tokio::spawn` in `service.rs` moves into `ActivationTracker::start`
//! and the HTTP handler shrinks to ~20 lines of validate + start +
//! respond-202.
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
/// Tracks in-flight profile activations. The PRD's "single-flight guard"
/// lives here: callers check `is_pending` before starting a new activation
/// and register via `mark_pending` if they proceed. On completion, they
/// call `mark_complete` so the next caller can start.
///
/// Per-profile granularity — activating profile A doesn't block B.
#[derive(Clone, Default)]
pub struct ActivationTracker {
pending: Arc<RwLock<HashMap<String, String>>>, // profile_id → job_id
}
impl ActivationTracker {
pub fn new() -> Self {
Self::default()
}
/// Check if a profile has an activation already running. Returns the
/// in-flight job_id if so. Safe to call without holding a lock.
pub async fn is_pending(&self, profile_id: &str) -> Option<String> {
self.pending.read().await.get(profile_id).cloned()
}
/// Register a new activation as pending. Returns false if an
/// activation is already running for the same profile (caller should
/// return 409 Conflict or surface the existing job_id). Returns true
/// on successful registration.
pub async fn mark_pending(&self, profile_id: &str, job_id: &str) -> bool {
let mut guard = self.pending.write().await;
if guard.contains_key(profile_id) {
return false;
}
guard.insert(profile_id.to_string(), job_id.to_string());
true
}
/// Remove the pending marker when activation finishes (success OR
/// failure — both free the slot for the next caller).
pub async fn mark_complete(&self, profile_id: &str) {
self.pending.write().await.remove(profile_id);
}
/// How many activations are currently in-flight across all profiles.
pub async fn in_flight_count(&self) -> usize {
self.pending.read().await.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn empty_tracker_has_no_pending() {
let t = ActivationTracker::new();
assert_eq!(t.in_flight_count().await, 0);
assert!(t.is_pending("any-profile").await.is_none());
}
#[tokio::test]
async fn mark_pending_registers_the_job() {
let t = ActivationTracker::new();
assert!(t.mark_pending("profile-A", "job-1").await);
assert_eq!(t.in_flight_count().await, 1);
assert_eq!(t.is_pending("profile-A").await, Some("job-1".into()));
}
#[tokio::test]
async fn single_flight_guard_refuses_second_activation_same_profile() {
// PRD Phase 41 gate: "refuse new activation if one is
// pending/running." Same profile twice → second call returns
// false, caller must surface the in-flight job_id.
let t = ActivationTracker::new();
assert!(t.mark_pending("profile-A", "job-1").await);
assert!(!t.mark_pending("profile-A", "job-2").await);
// Still the first job — second registration didn't overwrite.
assert_eq!(t.is_pending("profile-A").await, Some("job-1".into()));
}
#[tokio::test]
async fn different_profiles_dont_block_each_other() {
// Per-profile granularity — activating A doesn't block B.
let t = ActivationTracker::new();
assert!(t.mark_pending("profile-A", "job-1").await);
assert!(t.mark_pending("profile-B", "job-2").await);
assert_eq!(t.in_flight_count().await, 2);
}
#[tokio::test]
async fn mark_complete_frees_the_slot() {
let t = ActivationTracker::new();
t.mark_pending("profile-A", "job-1").await;
t.mark_complete("profile-A").await;
assert_eq!(t.in_flight_count().await, 0);
// Next activation can now proceed.
assert!(t.mark_pending("profile-A", "job-2").await);
}
}

View File

@ -46,6 +46,21 @@ pub struct IndexMeta {
/// Existing indexes: "W-", "CAND-", "W500K-", etc. /// Existing indexes: "W-", "CAND-", "W500K-", etc.
#[serde(default)] #[serde(default)]
pub id_prefix: Option<String>, pub id_prefix: Option<String>,
/// PRD 11.3 — when this index was last searched against. `None` =
/// never used since registration (or pre-field-existed metadata).
/// Incremental re-embed walks this to skip cold indexes.
/// Scrum iter 11 flagged the missing field as a UnitMismatch
/// because callers were reading `created_at` as a proxy for
/// liveness, which conflated "built" with "used."
#[serde(default)]
pub last_used: Option<DateTime<Utc>>,
/// PRD 11.3 — SHA-256 of (sorted source file list + chunk_size +
/// overlap + model_version). Lets incremental re-embed detect
/// "no change since last build" without scanning the source
/// Parquet. None = signature not computed yet (pre-existing
/// indexes before this field landed).
#[serde(default)]
pub build_signature: Option<String>,
} }
fn default_bucket() -> String { "primary".to_string() } fn default_bucket() -> String { "primary".to_string() }
@ -128,4 +143,139 @@ impl IndexRegistry {
self.indexes.write().await.remove(index_name); self.indexes.write().await.remove(index_name);
Ok(()) Ok(())
} }
/// Stamp `last_used = now()` on an index. Search handlers call this
/// on every hit so incremental re-embed (PRD 11.3) can tell live
/// indexes from cold ones. Silently no-ops if the index is unknown
/// — callers get best-effort behavior, not a 500 on a missing row.
pub async fn touch_used(&self, index_name: &str) {
if let Some(m) = self.indexes.write().await.get_mut(index_name) {
m.last_used = Some(Utc::now());
}
}
}
/// Compute a stable build_signature for PRD 11.3 incremental re-embed.
/// Hashes (sorted source file list, chunk_size, overlap, model_version)
/// so a caller can ask "has anything we built from changed?" without
/// re-scanning the source parquet. Same inputs always produce the
/// same hash.
pub fn compute_build_signature(
source_files: &[impl AsRef<str>],
chunk_size: usize,
overlap: usize,
model_version: &str,
) -> String {
use sha2::{Digest, Sha256};
let mut sorted: Vec<&str> = source_files.iter().map(|s| s.as_ref()).collect();
sorted.sort();
let mut hasher = Sha256::new();
for f in &sorted {
hasher.update(f.as_bytes());
hasher.update(b"\n");
}
hasher.update(chunk_size.to_le_bytes());
hasher.update(overlap.to_le_bytes());
hasher.update(model_version.as_bytes());
format!("{:x}", hasher.finalize())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn build_signature_is_deterministic() {
let sig1 = compute_build_signature(&["a.parquet", "b.parquet"], 800, 80, "v1");
let sig2 = compute_build_signature(&["a.parquet", "b.parquet"], 800, 80, "v1");
assert_eq!(sig1, sig2, "same inputs → same hash");
}
#[test]
fn build_signature_order_invariant() {
// Files get sorted internally so caller's order doesn't matter.
let sig_a = compute_build_signature(&["a.parquet", "b.parquet"], 800, 80, "v1");
let sig_b = compute_build_signature(&["b.parquet", "a.parquet"], 800, 80, "v1");
assert_eq!(sig_a, sig_b, "file list order must not affect hash");
}
#[test]
fn build_signature_changes_on_chunk_param() {
let sig_a = compute_build_signature(&["a.parquet"], 800, 80, "v1");
let sig_b = compute_build_signature(&["a.parquet"], 900, 80, "v1");
assert_ne!(sig_a, sig_b, "chunk_size change → different hash");
}
#[test]
fn build_signature_changes_on_model_version() {
let sig_a = compute_build_signature(&["a.parquet"], 800, 80, "v1");
let sig_b = compute_build_signature(&["a.parquet"], 800, 80, "v2");
assert_ne!(sig_a, sig_b, "model version change → different hash");
}
#[tokio::test]
async fn touch_used_updates_last_used() {
use object_store::memory::InMemory;
let store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
let reg = IndexRegistry::new(store);
let meta = IndexMeta {
index_name: "test".into(),
source: "s".into(),
model_name: "m".into(),
model_version: "v1".into(),
dimensions: 768,
chunk_count: 0,
doc_count: 0,
chunk_size: 800,
overlap: 80,
storage_key: "k".into(),
created_at: Utc::now(),
build_time_secs: 0.0,
chunks_per_sec: 0.0,
bucket: "primary".into(),
vector_backend: Default::default(),
id_prefix: None,
last_used: None,
build_signature: None,
};
reg.register(meta).await.unwrap();
assert!(reg.get("test").await.unwrap().last_used.is_none());
reg.touch_used("test").await;
assert!(reg.get("test").await.unwrap().last_used.is_some());
}
#[tokio::test]
async fn touch_used_is_noop_on_missing_index() {
use object_store::memory::InMemory;
let store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
let reg = IndexRegistry::new(store);
// No panic — unknown index just doesn't get touched.
reg.touch_used("nonexistent").await;
}
#[test]
fn index_meta_deserializes_without_new_fields_backcompat() {
// Pre-field-existence metadata files on disk must still load.
// Critical — we have ~40 .json meta files under vectors/meta/
// that predate these fields.
let json = r#"{
"index_name": "resumes_v1",
"source": "resumes",
"model_name": "nomic-embed-text",
"model_version": "latest",
"dimensions": 768,
"chunk_count": 100,
"doc_count": 10,
"chunk_size": 800,
"overlap": 80,
"storage_key": "vectors/resumes_v1.parquet",
"created_at": "2026-04-20T00:00:00Z",
"build_time_secs": 1.0,
"chunks_per_sec": 100.0
}"#;
let meta: IndexMeta = serde_json::from_str(json).expect("must deserialize pre-field meta");
assert!(meta.last_used.is_none());
assert!(meta.build_signature.is_none());
assert_eq!(meta.bucket, "primary");
}
} }

View File

@ -7,7 +7,9 @@ pub mod harness;
pub mod hnsw; pub mod hnsw;
pub mod index_registry; pub mod index_registry;
pub mod jobs; pub mod jobs;
pub mod activation;
pub mod playbook_memory; pub mod playbook_memory;
pub mod pathway_memory;
pub mod doc_drift; pub mod doc_drift;
pub mod promotion; pub mod promotion;
pub mod refresh; pub mod refresh;

File diff suppressed because it is too large Load Diff

View File

@ -1647,7 +1647,7 @@ mod validity_window_tests {
let past = (chrono::Utc::now() - chrono::Duration::days(1)).to_rfc3339(); let past = (chrono::Utc::now() - chrono::Duration::days(1)).to_rfc3339();
let future = (chrono::Utc::now() + chrono::Duration::days(1)).to_rfc3339(); let future = (chrono::Utc::now() + chrono::Duration::days(1)).to_rfc3339();
let e_expired = mkentry("pb-expired", "Nashville", "TN", None, Some(past)); let e_expired = mkentry("pb-expired", "Nashville", "TN", None, Some(past));
let e_alive = { let mut e = mkentry("pb-alive", "Nashville", "TN", None, Some(future)); e }; let e_alive = mkentry("pb-alive", "Nashville", "TN", None, Some(future));
pm.set_entries(vec![e_expired, e_alive]).await.unwrap(); pm.set_entries(vec![e_expired, e_alive]).await.unwrap();
let boosts = pm.compute_boost_for_filtered_with_role( let boosts = pm.compute_boost_for_filtered_with_role(
&[1.0, 0.0, 0.0], 100, 0.5, &[1.0, 0.0, 0.0], 100, 0.5,

View File

@ -131,6 +131,11 @@ impl PromotionRegistry {
file.history.drain(0..drop); file.history.drain(0..drop);
} }
} }
// Bind `entry` ref-captured for the log line below so the log
// doesn't double-unwrap file.current — entry is Some-by-construction
// at the function boundary; past versions reached in via
// `.as_ref().unwrap()` twice, which compiled but would panic if
// the construction above ever changed.
file.current = Some(entry); file.current = Some(entry);
file.index_name = index_name.to_string(); file.index_name = index_name.to_string();
@ -140,10 +145,12 @@ impl PromotionRegistry {
ops::put(&store, &key, json.into()).await?; ops::put(&store, &key, json.into()).await?;
self.cache.write().await.insert(index_name.to_string(), file.clone()); self.cache.write().await.insert(index_name.to_string(), file.clone());
tracing::info!( if let Some(cur) = &file.current {
"promoted '{}' to config {:?} (trial={})", tracing::info!(
index_name, file.current.as_ref().unwrap().config, file.current.as_ref().unwrap().trial_id, "promoted '{}' to config {:?} (trial={})",
); index_name, cur.config, cur.trial_id,
);
}
Ok(file) Ok(file)
} }

View File

@ -308,6 +308,8 @@ async fn try_update_index_meta(
bucket: "primary".to_string(), bucket: "primary".to_string(),
vector_backend: shared::types::VectorBackend::Parquet, vector_backend: shared::types::VectorBackend::Parquet,
id_prefix: None, id_prefix: None,
last_used: None,
build_signature: None,
}; };
index_registry.register(meta).await index_registry.register(meta).await
} }

Some files were not shown because too many files have changed in this diff Show More