diff --git a/auditor/checks/kimi_architect.ts b/auditor/checks/kimi_architect.ts index b490889..7905066 100644 --- a/auditor/checks/kimi_architect.ts +++ b/auditor/checks/kimi_architect.ts @@ -26,8 +26,8 @@ // // Off by default: caller checks LH_AUDITOR_KIMI=1 before invoking. -import { readFile, writeFile, mkdir, appendFile, stat } from "node:fs/promises"; -import { existsSync } from "node:fs"; +import { readFile, writeFile, mkdir, appendFile, stat, realpath } from "node:fs/promises"; +import { existsSync, realpathSync } from "node:fs"; import { dirname, join, resolve } from "node:path"; import type { Finding, CheckKind } from "../types.ts"; @@ -35,6 +35,14 @@ const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100"; const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts"; const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl"; const REPO_ROOT = "/home/profit/lakehouse"; +// Canonicalize at module load — REPO_ROOT itself may be a symlink in +// some environments (e.g. /home/profit is a bind-mount). Computing +// once at startup means the per-finding grounding loop can compare +// realpath(target) against this stable anchor. +const REPO_ROOT_REAL = (() => { + try { return realpathSync(REPO_ROOT); } + catch { return REPO_ROOT; } +})(); // 15 min budget. Bun's fetch has an intrinsic ~300s limit that our // AbortController + setTimeout combo could not override; we use curl // via Bun.spawn instead (callKimi below). Curl honors -m for max @@ -365,14 +373,19 @@ async function computeGrounding(findings: Finding[]): Promise<{ total: number; v const line = Number(lineStr); if (!line || !relpath) return false; - // Path-traversal guard (caught 2026-04-27 by Kimi self-audit on - // dd77632). A model output emitting `../../../etc/passwd:1` would - // otherwise resolve outside REPO_ROOT and we'd readFile() a system - // file. Resolve, then verify the absolute path is anchored under - // REPO_ROOT — refuse anything that escapes (relative ".." traversal - // OR an absolute path that doesn't start with REPO_ROOT). Existing - // grounding-citation conventions all use repo-relative paths, so - // this rejection is safe in practice. + // Path-traversal guard, two-layer (caught 2026-04-27 by Kimi + // self-audits on dd77632 then 2d9cb12). + // + // Layer 1 (lexical): resolve() normalizes `..` segments. Refuse + // any path that doesn't anchor under REPO_ROOT. + // + // Layer 2 (symlink): even if the lexical path is anchored, it + // could be a symlink whose target escapes. realpath() resolves + // symlinks; compare the real path against REPO_ROOT_REAL. + // + // Both layers exist because attackers might bypass either alone: + // raw `../etc/passwd` triggers layer 1; a planted symlink at + // ./safe-looking-name → /etc/passwd triggers layer 2. const abs = resolve(REPO_ROOT, relpath); if (!abs.startsWith(REPO_ROOT + "/") && abs !== REPO_ROOT) { f.evidence.push(`[grounding: path escapes repo root, refusing]`); @@ -384,7 +397,16 @@ async function computeGrounding(findings: Finding[]): Promise<{ total: number; v return false; } try { - const lines = (await readFile(abs, "utf8")).split("\n"); + // Symlink-resolution check before any read. realpath() throws + // if the file doesn't exist; existsSync above shields the + // common case but a TOCTOU race could still error here — the + // outer catch handles it. + const realPath = await realpath(abs); + if (!realPath.startsWith(REPO_ROOT_REAL + "/") && realPath !== REPO_ROOT_REAL) { + f.evidence.push(`[grounding: symlink target escapes repo root, refusing]`); + return false; + } + const lines = (await readFile(realPath, "utf8")).split("\n"); if (line < 1 || line > lines.length) { f.evidence.push(`[grounding: line ${line} > EOF (${lines.length})]`); return false;