From 5bbcaf4c33af8bf95b97a14102681c5a09d0fded Mon Sep 17 00:00:00 2001 From: profit Date: Wed, 22 Apr 2026 03:44:36 -0500 Subject: [PATCH] Fix: layer-2 Langfuse filter used meaningless ternary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Caught by running a side-test through LLM Team's run_codereview flow (gpt-oss:120b reviewer) against this fixture, 2026-04-22. BEFORE: const ourStart = Date.parse( l1.evidence.match(/tokens=/) ? result.ran_at : result.ran_at ); // Both branches return result.ran_at — the ternary is meaningless. // result.ran_at is the fixture start time, NOT the moment we fired // /v1/chat. Any trace created between fixture-start and chat-fetch // would false-negative. AFTER: const chat_request_sent_ms = Date.now(); // captured before layer 1 // ... const recent = items.filter(t => Date.parse(t.timestamp) >= chat_request_sent_ms ); Re-ran the fixture against the live stack — layers 1,2,4 still pass (no regression); layer 2 trace matched at age=2494ms which is within the chat-to-trace propagation window. Layers 3,5 still fail for the original unrelated reasons (UpsertOutcome serde panic + Phase 45 slice 3 endpoint not built). First concrete act-on-finding from a code-checker run. The process works. --- auditor/fixtures/hybrid_38_40_45.ts | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/auditor/fixtures/hybrid_38_40_45.ts b/auditor/fixtures/hybrid_38_40_45.ts index 8e3ed16..f8472b9 100644 --- a/auditor/fixtures/hybrid_38_40_45.ts +++ b/auditor/fixtures/hybrid_38_40_45.ts @@ -102,6 +102,15 @@ export async function runHybridFixture(): Promise { // ======================================================================== // Layer 1 — Phase 38: POST /v1/chat returns valid OpenAI shape // ======================================================================== + // Captured HERE, immediately before the chat layer runs, so layer 2's + // Langfuse-trace filter uses the actual moment the chat call was + // attempted — not the fixture start time. Earlier draft had a + // meaningless ternary returning result.ran_at on both branches; the + // LLM-Team codereview (2026-04-22) caught this and flagged it as a + // false-negative window on traces created between fixture-start and + // chat-fetch. + const chat_request_sent_ms = Date.now(); + const l1 = await measureLayer("phase38_chat", "38", async () => { const r = await fetch(`${GATEWAY}/v1/chat`, { method: "POST", @@ -150,11 +159,13 @@ export async function runHybridFixture(): Promise { if (!r.ok) throw new Error(`langfuse ${r.status}: ${await r.text()}`); const j: any = await r.json(); const items = Array.isArray(j.data) ? j.data : []; - // Find a trace newer than our l1 start timestamp. - const ourStart = Date.parse(l1.evidence.match(/tokens=/) ? result.ran_at : result.ran_at); - const recent = items.filter((t: any) => Date.parse(t.timestamp) >= ourStart); + // Filter on the chat-request timestamp captured above. A Langfuse + // trace must be newer than the moment we fired /v1/chat to plausibly + // belong to our request. Using fixture start time (result.ran_at) + // was wrong and could false-negative on slow fixtures. + const recent = items.filter((t: any) => Date.parse(t.timestamp) >= chat_request_sent_ms); if (recent.length === 0) { - throw new Error(`no v1.chat:ollama trace since ${new Date(ourStart).toISOString()} (${items.length} older traces visible, Langfuse reachable — tracing is not firing)`); + throw new Error(`no v1.chat:ollama trace since ${new Date(chat_request_sent_ms).toISOString()} (${items.length} older traces visible, Langfuse reachable — tracing is not firing)`); } const trace = recent[0]; return {