Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
/v1/chat/completions route alias (same handler as /chat) lets any tool
using the official `openai` SDK adopt the gateway via OPENAI_BASE_URL
alone — no custom provider field needed.
resolve_provider() extended:
- bare `vendor/model` (slash) → openrouter (catches x-ai/grok-4.1-fast,
moonshotai/kimi-k2, deepseek/deepseek-v4-flash, openai/gpt-oss-120b:free)
- bare vendor model names (no slash, no colon) get auto-prefixed:
gpt-* / o1-* / o3-* / o4-* → openai/<name> (OpenRouter form)
claude-* → anthropic/<name>
grok-* → x-ai/<name>
Then routed to openrouter. Ollama models (with colon, no slash) keep
default routing. Tools like pi-ai validate against an OpenAI-style
catalog and send bare names — this lets them flow through cleanly.
Verified end-to-end:
- curl POST /v1/chat/completions {model: "gpt-4o-mini", ...} → 200,
routed to openrouter as openai/gpt-4o-mini
- openai SDK with baseURL=http://localhost:3100/v1 → 3 model variants all
succeed (openai/gpt-4o-mini, gpt-4o-mini, x-ai/grok-4.1-fast)
- Langfuse traces fire automatically on every call
(v1.chat:openrouter, provider tagged in metadata)
scripts/mode_pass5_variance_paid.ts gains LH_CONDITIONS env so subset
runs (e.g. just isolation vs composed) take half the latency.
Archon-on-Lakehouse integration: gateway side is done. Pi-ai's
openai-responses backend uses /v1/responses (not /chat/completions) and
its openrouter backend appears to bail in client-side validation before
sending. Patching Pi locally to override baseUrl works for arch but the
harness still rejects — needs more work in a follow-up. Direct openai
SDK path (langchain-js / agents / patched Pi) works today.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
106 lines
4.0 KiB
TypeScript
106 lines
4.0 KiB
TypeScript
#!/usr/bin/env bun
|
||
/**
|
||
* Pass 5: variance test for the 2026-04-26 paid-model bake-off.
|
||
*
|
||
* The pass-4 single-rep sweep showed isolation beating every matrix
|
||
* condition by 1.0-1.4 grounded findings/file on grok-4.1-fast. This
|
||
* harness runs N reps × M conditions on the file where the effect was
|
||
* sharpest (pathway_memory.rs, 1355 lines) so we can decide whether
|
||
* the deltas are real signal or run-to-run noise.
|
||
*
|
||
* Conditions:
|
||
* 1. codereview_isolation — no matrix
|
||
* 2. codereview_lakehouse + corpus=lakehouse_arch_v1 — A only
|
||
* 3. codereview_lakehouse + corpus=lakehouse_symbols_v1 — C only
|
||
* 4. codereview_lakehouse (modes.toml default) — A+C composed
|
||
*
|
||
* Output appends per-call to data/_kb/mode_experiments.jsonl. Aggregate
|
||
* with `bun run scripts/mode_compare.ts --since <ts>` and read the
|
||
* grounded column with multiple rows per (mode|corpus) key.
|
||
*
|
||
* Usage:
|
||
* bun run scripts/mode_pass5_variance_paid.ts
|
||
* LH_REPS=3 LH_FILE=crates/queryd/src/delta.rs bun run scripts/mode_pass5_variance_paid.ts
|
||
*/
|
||
|
||
const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
|
||
const MODEL = process.env.LH_MODEL ?? "x-ai/grok-4.1-fast";
|
||
const FILE = process.env.LH_FILE ?? "crates/vectord/src/pathway_memory.rs";
|
||
const REPS = Number(process.env.LH_REPS ?? 5);
|
||
|
||
interface Condition {
|
||
label: string;
|
||
mode: string;
|
||
corpus?: string | string[];
|
||
}
|
||
|
||
const ALL_CONDITIONS: Condition[] = [
|
||
{ label: "isolation ", mode: "codereview_isolation" },
|
||
{ label: "arch_only ", mode: "codereview_lakehouse", corpus: "lakehouse_arch_v1" },
|
||
{ label: "symbols_only ", mode: "codereview_lakehouse", corpus: "lakehouse_symbols_v1" },
|
||
{ label: "composed (A+C) ", mode: "codereview_lakehouse" /* uses modes.toml default */ },
|
||
];
|
||
|
||
// Optional whitelist via env: LH_CONDITIONS=isolation,composed limits the
|
||
// run to a subset (matches against the trimmed `label`). Useful when only
|
||
// the head-to-head pair matters and saves ~50% latency on slow rungs.
|
||
const wantedLabels = (process.env.LH_CONDITIONS ?? "")
|
||
.split(",").map(s => s.trim().toLowerCase()).filter(Boolean);
|
||
const CONDITIONS: Condition[] = wantedLabels.length === 0
|
||
? ALL_CONDITIONS
|
||
: ALL_CONDITIONS.filter(c => wantedLabels.some(w => c.label.trim().toLowerCase().startsWith(w)));
|
||
|
||
async function runOne(c: Condition, rep: number): Promise<{ ok: boolean; latency_ms?: number; resp_chars?: number; error?: string }> {
|
||
const body: any = {
|
||
task_class: "scrum_review",
|
||
file_path: FILE,
|
||
force_mode: c.mode,
|
||
force_model: MODEL,
|
||
};
|
||
if (c.corpus !== undefined) body.force_matrix_corpus = c.corpus;
|
||
|
||
try {
|
||
const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
|
||
method: "POST",
|
||
headers: { "content-type": "application/json" },
|
||
body: JSON.stringify(body),
|
||
signal: AbortSignal.timeout(240_000),
|
||
});
|
||
if (!r.ok) {
|
||
const txt = await r.text().catch(() => "");
|
||
return { ok: false, error: `HTTP ${r.status}: ${txt.slice(0, 160)}` };
|
||
}
|
||
const j: any = await r.json();
|
||
return { ok: true, latency_ms: j.latency_ms, resp_chars: (j.response ?? "").length };
|
||
} catch (e: any) {
|
||
return { ok: false, error: e.message };
|
||
}
|
||
}
|
||
|
||
async function main() {
|
||
const total = CONDITIONS.length * REPS;
|
||
console.log(`[pass5] file=${FILE}`);
|
||
console.log(`[pass5] model=${MODEL} · ${CONDITIONS.length} conditions × ${REPS} reps = ${total} runs`);
|
||
console.log("");
|
||
|
||
let i = 0;
|
||
const startTs = new Date().toISOString();
|
||
for (let rep = 1; rep <= REPS; rep++) {
|
||
for (const c of CONDITIONS) {
|
||
i++;
|
||
process.stdout.write(` [${i}/${total}] rep=${rep} ${c.label}... `);
|
||
const r = await runOne(c, rep);
|
||
if (r.ok) {
|
||
console.log(`✓ ${r.resp_chars} chars · ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
|
||
} else {
|
||
console.log(`✗ ${r.error}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
console.log(`\n[pass5] complete · started ${startTs}`);
|
||
console.log(`[pass5] aggregate: bun run scripts/mode_compare.ts --since ${startTs}`);
|
||
}
|
||
|
||
main().catch(e => { console.error(e); process.exit(1); });
|