matrix-agent-validated/auditor/checks/dynamic.ts

// Dynamic execution check — runs the hybrid fixture and maps its
// layer results to auditor Findings.
//
// A layer that fails with a "not implemented / 404 / slice N" error
// gets severity=info (honest placeholder signal). A layer that fails
// any other way gets severity=warn (something actually broke).
// An info-level summary finding is always emitted carrying the real
// numbers — shipped/placeholder phase counts, per-layer latency.

import { runHybridFixture } from "../fixtures/hybrid_38_40_45.ts";
import type { Finding } from "../types.ts";

const PLACEHOLDER_MARKERS = [
  "unimplemented",
  " 404 ", "(404)", " 405 ", "(405)",
  "slice 3", "slice 4", "slice 5",
  "endpoint not built", "not yet",
];

function isPlaceholderFailure(err?: string): boolean {
  if (!err) return false;
  const low = err.toLowerCase();
  return PLACEHOLDER_MARKERS.some(m => low.includes(m.toLowerCase()));
}

export async function runDynamicCheck(): Promise<Finding[]> {
  const findings: Finding[] = [];

  let result;
  try {
    result = await runHybridFixture();
  } catch (e) {
    // Fixture itself crashed — can't run dynamic check at all.
    return [
      {
        check: "dynamic",
        severity: "warn",
        summary: `hybrid fixture crashed before completing: ${(e as Error).message.slice(0, 140)}`,
        evidence: [(e as Error).message],
      },
    ];
  }

  // Per-layer findings for every non-ok layer.
  for (const layer of result.layers) {
    if (layer.ok) continue;
    const placeholder = isPlaceholderFailure(layer.error);
    findings.push({
      check: "dynamic",
      severity: placeholder ? "info" : "warn",
      summary: placeholder
        ? `hybrid fixture layer ${layer.layer} (Phase ${layer.phase}) honestly reports unimplemented`
        : `hybrid fixture layer ${layer.layer} (Phase ${layer.phase}) failed — not a placeholder, a real failure`,
      evidence: [
        `evidence: ${layer.evidence.slice(0, 160)}`,
        ...(layer.error ? [`error: ${layer.error.slice(0, 160)}`] : []),
        `latency_ms: ${layer.latency_ms}`,
      ],
    });
  }

  // One overall summary with real numbers so the report shows what
  // DID pass plus per-layer timing.
  const metrics_preview = Object.entries(result.real_numbers)
    .slice(0, 10)
    .map(([k, v]) => `${k}=${v}`);
  findings.push({
    check: "dynamic",
    severity: "info",
    summary: `hybrid fixture overall=${result.overall}, shipped [${result.shipped_phases.join(", ")}], placeholder [${result.placeholder_phases.join(", ")}]`,
    evidence: metrics_preview.length > 0 ? metrics_preview : ["no metrics emitted"],
  });

  // If the fixture ran at all but nothing passed, elevate one of the
  // summary findings to warn — something more than "all honest
  // placeholders" is wrong.
  if (result.overall === "fail") {
    findings.push({
      check: "dynamic",
      severity: "warn",
      summary: `hybrid fixture: 0 layers passed (overall=fail)`,
      evidence: [
        "a total fixture fail usually means a precondition service is down",
        "(gateway /health / sidecar / Langfuse /v1/chat) — NOT necessarily",
        "the PR's code problem. Check service status before blaming the PR.",
      ],
    });
  }

  return findings;
}