matrix-agent-validated/scripts/seal_agent_playbook.ts

#!/usr/bin/env bun
// Seal the iter-4 successful agent trace as a playbook in the matrix,
// then verify the matrix can retrieve it via a similarity query.
//
// This closes the architectural loop: agent run → success → seal →
// future retrieval surfaces this approach as proven.

import { readFile } from "node:fs/promises";

const GATEWAY = "http://localhost:3100";
// Default to live workspace; override with first arg for archived sessions.
const SESSION_DIR = process.argv[2] ?? "/home/profit/lakehouse/tests/agent_test";

async function main() {
  const trace = (await readFile(`${SESSION_DIR}/_trace.jsonl`, "utf8"))
    .split("\n").filter(l => l.trim()).map(l => JSON.parse(l));
  const finalMd = await readFile(`${SESSION_DIR}/_final.md`, "utf8");

  // Extract tool sequence from trace
  const toolCalls = trace.filter(t => t.kind === "tool_call");
  const toolSeq = toolCalls.map(t => t.tool).join(" → ");
  const totalSteps = toolCalls.length;
  const totalLatency = trace.filter(t => t.latency_ms).reduce((a, t) => a + (t.latency_ms ?? 0), 0);

  console.log(`iter-4 trace: ${trace.length} events, ${totalSteps} tool calls, ${(totalLatency/1000).toFixed(1)}s total`);
  console.log(`tool sequence: ${toolSeq}`);
  console.log(`final output: ${finalMd.length} chars`);

  // Build playbook entry: this captures the proven approach for the
  // task class "chicago_permit_staffing_analysis" so a future agent
  // querying for similar work surfaces this trace as a reference.
  const operation = `Chicago permit staffing analysis — qwen3.5:latest agent, ${totalSteps}-step success`;
  const approach = `PROVEN AGENT WORKFLOW (validated 2026-04-25 iter 4):

1. PLAN FIRST via note() — explicit step list before any execution
2. list_permits(min_cost=N) — get high-cost candidates
3. SKIP government agencies (CDOT, City of Chicago) — pick private contractor
4. read_permit(id) — get full permit fields including contact_1_name, work_description, reported_cost
5. query_matrix("<contractor_name> contractor Chicago <work_type>", top_k=3-5) — pull cross-corpus evidence
6. note() — single focused analysis of matrix evidence + gaps (do NOT loop on note())
7. done(summary=<5-section markdown>) — Permit Summary, Contractor Profile, Staffing Implications, Risk Signals, Recommendation

KEY LESSONS:
- llm_team_runs_v1 + llm_team_response_cache_v1 are noise corpora — exclude
- Useful corpora: chicago_permits_v1, entity_brief_v1, sec_tickers_v1, distilled_procedural_v20260423102847
- Matrix often returns "no specific evidence" for private contractors — that's OK, acknowledge gap honestly, do NOT invent history
- Recommendation should reflect actual evidence: "Investigate-Further" when matrix is empty, not generic "Pursue"
- Total wall ≈30s for 6 tool calls`;

  const context = `PRD: tests/agent_test/PRD.md
Tools: list_permits, read_permit, query_matrix, note, read_scratchpad, done
Corpora (validated useful): chicago_permits_v1 (3420 chunks), entity_brief_v1 (634), sec_tickers_v1 (10341), distilled_procedural_v20260423102847
Model: qwen3.5:latest (local Ollama, think:false)
Source data: 2,853 Chicago building permits (last 30d), 552 with cost >= $100K and named contractors
Output spec: 5-section markdown (Permit Summary, Contractor Profile, Staffing Implications, Risk Signals, Recommendation), 600-1000 words`;

  // endorsed_names: keywords that should match similar future queries
  const endorsedNames = [
    "qwen3.5:latest",
    "chicago_permit_analysis",
    "private_contractor_review",
    "matrix_retrieval_workflow",
    "list_permits_read_query_done",
  ];

  // playbook_memory/seed expects "fill: Role xN in City, ST" shape — wrong tool for
  // a general agent-task playbook. Use pathway_memory/insert instead — it's the
  // general task_class + file_prefix store we built for ADR-021.
  console.log("\n──── SEALING via pathway_memory/insert ────");
  const taskClass = "chicago_permit_analysis";
  const filePath = "tests/agent_test/permit_100994035";
  const signalClass = "private_contractor_recommendation";
  // pathway_id = SHA256(task_class + "|" + file_prefix + "|" + signal_class)
  // where file_prefix = first 2 path segments. Matches gateway's hot-swap logic.
  const filePrefix = filePath.split("/").slice(0, 2).join("/");
  const hasher = new Bun.CryptoHasher("sha256");
  hasher.update(`${taskClass}|${filePrefix}|${signalClass}`);
  const pathwayId = hasher.digest("hex");
  console.log(`pathway_id: ${pathwayId}`);

  const traceEntry = {
    pathway_id: pathwayId,
    task_class: taskClass,
    file_path: filePath,
    signal_class: signalClass,
    created_at: new Date().toISOString(),
    ladder_attempts: toolCalls.map((t, i) => ({
      rung: i + 1,
      model: t.tool === "done" ? "qwen3.5:latest+done" : `qwen3.5:latest+${t.tool}`,
      latency_ms: t.latency_ms ?? 0,
      accepted: t.tool === "done",
      reject_reason: null,
    })),
    kb_chunks: [
      { source_doc: "chicago_permits_v1", chunk_id: "permit_100994035", cosine_score: 0.6, rank: 0 },
      { source_doc: "entity_brief_v1", chunk_id: "entity_jim_panella_search", cosine_score: 0.58, rank: 1 },
      { source_doc: "sec_tickers_v1", chunk_id: "sec_no_match", cosine_score: 0.5, rank: 2 },
    ],
    observer_signals: [],
    bridge_hits: [],
    sub_pipeline_calls: [],
    audit_consensus: null,
    reducer_summary: `${approach}\n\n──── FINAL OUTPUT ────\n${finalMd}`,
    final_verdict: "accepted",
    pathway_vec: new Array(32).fill(0), // gateway computes/replaces if it does
    replay_count: 0,
    replays_succeeded: 0,
    semantic_flags: [],
    type_hints_used: [],
    bug_fingerprints: [],
    retired: false,
  };
  // Use Mem0-style upsert (J 2026-04-25). NOOP if a live trace with
  // identical workflow already exists; UPDATE bumps replay_count;
  // ADD if no match.
  const seal = await fetch(`${GATEWAY}/vectors/pathway/upsert`, {
    method: "POST",
    headers: { "content-type": "application/json" },
    body: JSON.stringify(traceEntry),
    signal: AbortSignal.timeout(30000),
  });
  if (!seal.ok) {
    console.error(`✗ seal failed: ${seal.status} — ${(await seal.text()).slice(0, 300)}`);
    process.exit(1);
  }
  const sealResult = await seal.json();
  console.log(`✓ sealed via pathway/upsert: ${JSON.stringify(sealResult).slice(0, 300)}`);
  // sealResult.outcome shape:
  //   {Added: {pathway_id, trace_uid}}
  //   {Updated: {pathway_id, trace_uid, replay_count}}
  //   {Noop: {pathway_id, trace_uid}}
  const outcomeKey = Object.keys(sealResult.outcome ?? {})[0];
  console.log(`  Mem0 outcome: ${outcomeKey}`);

  // ─── VERIFY: pathway_memory stats + bug_fingerprints query ───
  console.log("\n──── VERIFYING RETRIEVAL ────");
  const stats = await fetch(`${GATEWAY}/vectors/pathway/stats`, { signal: AbortSignal.timeout(10000) });
  if (stats.ok) {
    const s: any = await stats.json();
    console.log(`pathway_memory stats: total=${s.total_pathways} retired=${s.retired} reuse_rate=${s.reuse_rate}`);
  }

  // Query for the same narrow fingerprint we just sealed — should retrieve
  // our trace as a bug_fingerprint context (or via hot_swap if eligible).
  const fpQuery = await fetch(`${GATEWAY}/vectors/pathway/bug_fingerprints`, {
    method: "POST",
    headers: { "content-type": "application/json" },
    body: JSON.stringify({
      task_class: "chicago_permit_analysis",
      file_path: "tests/agent_test/permit_100994036",  // different permit, same prefix
      signal_class: "private_contractor_recommendation",
      limit: 5,
    }),
    signal: AbortSignal.timeout(10000),
  });
  if (fpQuery.ok) {
    const result: any = await fpQuery.json();
    const fps = result.fingerprints ?? result;
    console.log(`bug_fingerprints retrieval (sister permit, same prefix): ${JSON.stringify(fps).slice(0, 400)}`);
  }

  // Confirm the trace landed in state.json
  const stateProbe = await Bun.file("/home/profit/lakehouse/data/_pathway_memory/state.json");
  if (await stateProbe.exists()) {
    const state: any = JSON.parse(await stateProbe.text());
    let found = false;
    for (const traces of Object.values(state.pathways ?? {}) as any[][]) {
      for (const t of traces) {
        if (t.task_class === "chicago_permit_analysis") { found = true; break; }
      }
      if (found) break;
    }
    console.log(`state.json contains chicago_permit_analysis trace: ${found}`);
  }
}

main().catch(e => { console.error(`FATAL: ${e.message}`); process.exit(1); });