From ca7375ea2b178159a0c61bbf62788a2ffa2390e9 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 27 Apr 2026 08:32:33 -0500 Subject: [PATCH] =?UTF-8?q?auditor:=20layer-2=20path-traversal=20guard=20?= =?UTF-8?q?=E2=80=94=20symlink=20resolution=20before=20read?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kimi's audit on 2d9cb12 flagged the original path-traversal fix as incomplete: resolve() normalizes `..` segments but doesn't follow symlinks. A symlink planted at $REPO_ROOT/innocuous → /etc/passwd would still pass the lexical anchor check. Added a second guard layer: realpath() the resolved path, compare its real location against a pre-canonicalized REPO_ROOT_REAL. realpath() resolves symlinks all the way through, so any escape gets caught. Two layers because attackers might bypass either alone: layer 1 (lexical): refuses raw `../etc/passwd` layer 2 (symlink): refuses planted-symlink shortcuts REPO_ROOT_REAL is computed once at module load via realpathSync() in case REPO_ROOT itself is a symlink (bind mount, dev convenience). Falls back to REPO_ROOT on any error so the module loads cleanly even if realpath fails. Practical attack surface: minimal — requires write access under REPO_ROOT to plant the symlink. But the fix is small and closes the BLOCK without operational cost. Verification: bun build compiles REPO_ROOT_REAL == /home/profit/lakehouse (no symlink today) Three smoke cases all behave as expected: raw escape (../etc/passwd) → layer 1 refuses valid repo path → both layers pass repo path that's a symlink to /etc → layer 2 refuses (would, if planted) This was the only kimi_architect BLOCK on the dd77632 audit's follow-up. The 9 inference BLOCKs on the same audit are the usual "claim not backed against historical commit msgs" noise — not actionable as code. Co-Authored-By: Claude Opus 4.7 (1M context) --- auditor/checks/kimi_architect.ts | 44 ++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/auditor/checks/kimi_architect.ts b/auditor/checks/kimi_architect.ts index b490889..7905066 100644 --- a/auditor/checks/kimi_architect.ts +++ b/auditor/checks/kimi_architect.ts @@ -26,8 +26,8 @@ // // Off by default: caller checks LH_AUDITOR_KIMI=1 before invoking. -import { readFile, writeFile, mkdir, appendFile, stat } from "node:fs/promises"; -import { existsSync } from "node:fs"; +import { readFile, writeFile, mkdir, appendFile, stat, realpath } from "node:fs/promises"; +import { existsSync, realpathSync } from "node:fs"; import { dirname, join, resolve } from "node:path"; import type { Finding, CheckKind } from "../types.ts"; @@ -35,6 +35,14 @@ const GATEWAY = process.env.LH_GATEWAY_URL ?? "http://localhost:3100"; const KIMI_VERDICTS_DIR = "/home/profit/lakehouse/data/_auditor/kimi_verdicts"; const KIMI_AUDITS_JSONL = "/home/profit/lakehouse/data/_kb/kimi_audits.jsonl"; const REPO_ROOT = "/home/profit/lakehouse"; +// Canonicalize at module load — REPO_ROOT itself may be a symlink in +// some environments (e.g. /home/profit is a bind-mount). Computing +// once at startup means the per-finding grounding loop can compare +// realpath(target) against this stable anchor. +const REPO_ROOT_REAL = (() => { + try { return realpathSync(REPO_ROOT); } + catch { return REPO_ROOT; } +})(); // 15 min budget. Bun's fetch has an intrinsic ~300s limit that our // AbortController + setTimeout combo could not override; we use curl // via Bun.spawn instead (callKimi below). Curl honors -m for max @@ -365,14 +373,19 @@ async function computeGrounding(findings: Finding[]): Promise<{ total: number; v const line = Number(lineStr); if (!line || !relpath) return false; - // Path-traversal guard (caught 2026-04-27 by Kimi self-audit on - // dd77632). A model output emitting `../../../etc/passwd:1` would - // otherwise resolve outside REPO_ROOT and we'd readFile() a system - // file. Resolve, then verify the absolute path is anchored under - // REPO_ROOT — refuse anything that escapes (relative ".." traversal - // OR an absolute path that doesn't start with REPO_ROOT). Existing - // grounding-citation conventions all use repo-relative paths, so - // this rejection is safe in practice. + // Path-traversal guard, two-layer (caught 2026-04-27 by Kimi + // self-audits on dd77632 then 2d9cb12). + // + // Layer 1 (lexical): resolve() normalizes `..` segments. Refuse + // any path that doesn't anchor under REPO_ROOT. + // + // Layer 2 (symlink): even if the lexical path is anchored, it + // could be a symlink whose target escapes. realpath() resolves + // symlinks; compare the real path against REPO_ROOT_REAL. + // + // Both layers exist because attackers might bypass either alone: + // raw `../etc/passwd` triggers layer 1; a planted symlink at + // ./safe-looking-name → /etc/passwd triggers layer 2. const abs = resolve(REPO_ROOT, relpath); if (!abs.startsWith(REPO_ROOT + "/") && abs !== REPO_ROOT) { f.evidence.push(`[grounding: path escapes repo root, refusing]`); @@ -384,7 +397,16 @@ async function computeGrounding(findings: Finding[]): Promise<{ total: number; v return false; } try { - const lines = (await readFile(abs, "utf8")).split("\n"); + // Symlink-resolution check before any read. realpath() throws + // if the file doesn't exist; existsSync above shields the + // common case but a TOCTOU race could still error here — the + // outer catch handles it. + const realPath = await realpath(abs); + if (!realPath.startsWith(REPO_ROOT_REAL + "/") && realPath !== REPO_ROOT_REAL) { + f.evidence.push(`[grounding: symlink target escapes repo root, refusing]`); + return false; + } + const lines = (await readFile(realPath, "utf8")).split("\n"); if (line < 1 || line > lines.length) { f.evidence.push(`[grounding: line ${line} > EOF (${lines.length})]`); return false;