Architectural snapshot of the lakehouse codebase at the point where the
full matrix-driven agent loop with Mem0 versioning + deletion was
validated end-to-end.
WHAT THIS REPO IS
A clean single-commit snapshot of the lakehouse code. Heavy test data
(.parquet datasets, vector indexes) excluded — see REPLICATION.md for
regen path. Full lakehouse history at git.agentview.dev/profit/lakehouse.
WHAT WAS PROVEN
- Vector retrieval across multi-corpora matrix (chicago_permits + entity
briefs + sec_tickers + distilled procedural + llm_team runs)
- Observer hand-review (cloud + heuristic fallback) gating each candidate
- Local-model agent loop (qwen3.5:latest) with tool use + scratchpad
- Playbook seal on success → next-iter retrieval surfaces it as preamble
- Mem0 versioning + deletion in pathway_memory:
* UPSERT: ADD on new workflow, UPDATE bumps replay_count on identical
* REVISE: chains versions, parent.superseded_at + superseded_by stamped
* RETIRE: marks specific trace retired with reason, excluded from retrieval
* HISTORY: walks chain root→tip, cycle-safe
KEY DIRECTORIES
- crates/vectord/src/pathway_memory.rs — Mem0 ops live here
- crates/vectord/src/playbook_memory.rs — original Mem0 reference
- tests/agent_test/ — local-model agent harness + PRD + session archives
- scripts/dump_raw_corpus.sh — MinIO bucket dump (raw test corpus)
- scripts/vectorize_raw_corpus.ts — corpus → vector indexes
- scripts/analyze_chicago_contracts.ts — real inference pipeline
- scripts/seal_agent_playbook.ts — Mem0 upsert from agent traces
Replication: see REPLICATION.md for Debian 13 clean install + cloud-only
adaptation (no local Ollama).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
164 lines
6.9 KiB
Python
Executable File
164 lines
6.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Aggregate KB state for item 3 decision.
|
|
|
|
Reads data/_kb/*.jsonl and tests/multi-agent/playbooks/*/results.json
|
|
to answer:
|
|
- How many distinct signatures exist?
|
|
- Total runs, avg ok rate, avg citations per event?
|
|
- Which (role, city) combos have NEVER gotten a citation?
|
|
- Recommender confidence progression (cold → medium → high)?
|
|
- Mean turn count trend across runs (proxy for efficiency).
|
|
|
|
Run after `scripts/run_kb_batch.sh` completes. Writes a markdown
|
|
summary to tests/multi-agent/playbooks/kb_measurement.md and prints
|
|
to stdout.
|
|
"""
|
|
import json
|
|
import os
|
|
import sys
|
|
from collections import Counter, defaultdict
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
KB = ROOT / "data" / "_kb"
|
|
PLAYBOOKS = ROOT / "tests" / "multi-agent" / "playbooks"
|
|
|
|
|
|
def load_jsonl(p):
|
|
if not p.exists():
|
|
return []
|
|
out = []
|
|
for line in p.read_text().splitlines():
|
|
if line.strip():
|
|
try:
|
|
out.append(json.loads(line))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
return out
|
|
|
|
|
|
def main():
|
|
sigs = load_jsonl(KB / "signatures.jsonl")
|
|
outcomes = load_jsonl(KB / "outcomes.jsonl")
|
|
recs = load_jsonl(KB / "pathway_recommendations.jsonl")
|
|
corrections = load_jsonl(KB / "error_corrections.jsonl")
|
|
|
|
# --- Basic counts ---
|
|
print(f"Signatures: {len(sigs)}")
|
|
print(f"Outcomes: {len(outcomes)}")
|
|
print(f"Recommendations: {len(recs)}")
|
|
print(f"Error corrections: {len(corrections)}")
|
|
print()
|
|
|
|
# --- Recommender confidence progression ---
|
|
conf_counts = Counter(r.get("confidence", "?") for r in recs)
|
|
print(f"Recommender confidence distribution:")
|
|
for c in ("high", "medium", "low"):
|
|
print(f" {c:8s}: {conf_counts.get(c, 0)}")
|
|
print()
|
|
|
|
# Time-ordered confidence
|
|
recs_sorted = sorted(recs, key=lambda r: r.get("generated_at", ""))
|
|
neighbor_counts = [len(r.get("neighbors_consulted", [])) for r in recs_sorted]
|
|
if neighbor_counts:
|
|
print(f"Neighbors consulted over time (first → last):")
|
|
print(f" first 3: {neighbor_counts[:3]}")
|
|
print(f" last 3: {neighbor_counts[-3:]}")
|
|
print(f" max: {max(neighbor_counts)}")
|
|
print()
|
|
|
|
# --- Fill rate + citation density per run ---
|
|
if outcomes:
|
|
total_ok = sum(o["ok_events"] for o in outcomes)
|
|
total_events = sum(o["total_events"] for o in outcomes)
|
|
total_cites = sum(o.get("total_citations", 0) for o in outcomes)
|
|
total_turns = sum(o.get("total_turns", 0) for o in outcomes)
|
|
print(f"Fill rate: {total_ok}/{total_events} = {100*total_ok/max(1,total_events):.1f}%")
|
|
print(f"Avg citations per run: {total_cites/len(outcomes):.2f}")
|
|
print(f"Avg turns per run: {total_turns/len(outcomes):.1f}")
|
|
print()
|
|
|
|
# First 5 runs vs last 5 — does it get better?
|
|
sorted_out = sorted(outcomes, key=lambda o: o.get("created_at", ""))
|
|
if len(sorted_out) >= 10:
|
|
first = sorted_out[:5]
|
|
last = sorted_out[-5:]
|
|
fok = sum(o["ok_events"] for o in first) / sum(o["total_events"] for o in first)
|
|
lok = sum(o["ok_events"] for o in last) / sum(o["total_events"] for o in last)
|
|
fcit = sum(o.get("total_citations", 0) for o in first) / 5
|
|
lcit = sum(o.get("total_citations", 0) for o in last) / 5
|
|
print(f"First 5 runs ok rate: {100*fok:.1f}% avg cites: {fcit:.2f}")
|
|
print(f"Last 5 runs ok rate: {100*lok:.1f}% avg cites: {lcit:.2f}")
|
|
print()
|
|
|
|
# --- Per-(role, city) citation coverage ---
|
|
cite_by_combo = Counter()
|
|
combo_attempts = Counter()
|
|
for o in outcomes:
|
|
for ev in o.get("per_event", []):
|
|
key = (ev.get("role", "?"), "?") # city not in per_event summary
|
|
combo_attempts[key] += 1
|
|
# Read the playbook dirs for full event detail (has city)
|
|
cites_by_role_city = defaultdict(lambda: {"attempts": 0, "citations": 0, "ok": 0})
|
|
for o in outcomes:
|
|
run_dir = PLAYBOOKS / o["run_id"]
|
|
results_file = run_dir / "results.json"
|
|
if not results_file.exists():
|
|
continue
|
|
try:
|
|
results = json.loads(results_file.read_text())
|
|
except Exception:
|
|
continue
|
|
for r in results:
|
|
e = r.get("event", {})
|
|
key = (e.get("role"), e.get("city"), e.get("state"))
|
|
cites_by_role_city[key]["attempts"] += 1
|
|
cites_by_role_city[key]["citations"] += len(r.get("playbook_citations") or [])
|
|
if r.get("ok"):
|
|
cites_by_role_city[key]["ok"] += 1
|
|
|
|
combos_with_cites = [(k, v) for k, v in cites_by_role_city.items() if v["citations"] > 0]
|
|
combos_zero_cites = [(k, v) for k, v in cites_by_role_city.items() if v["citations"] == 0 and v["ok"] > 0]
|
|
print(f"(role, city, state) combos with any citation: {len(combos_with_cites)}")
|
|
print(f"(role, city, state) combos with ok fills but 0 cites: {len(combos_zero_cites)}")
|
|
print()
|
|
if combos_with_cites:
|
|
print("Top 10 combos by citation count:")
|
|
for (role, city, state), v in sorted(combos_with_cites, key=lambda x: -x[1]["citations"])[:10]:
|
|
print(f" {role:25s} {city:15s} {state}: {v['citations']} cites across {v['attempts']} attempts ({v['ok']} ok)")
|
|
print()
|
|
|
|
# --- Write markdown report ---
|
|
lines = ["# KB Measurement Report", ""]
|
|
lines.append(f"Generated from {len(outcomes)} runs across {len(sigs)} distinct signatures.")
|
|
lines.append("")
|
|
lines.append("## Recommender confidence")
|
|
for c in ("high", "medium", "low"):
|
|
lines.append(f"- {c}: {conf_counts.get(c, 0)}")
|
|
lines.append("")
|
|
lines.append("## Overall fill + citation")
|
|
if outcomes:
|
|
lines.append(f"- Fill rate: **{total_ok}/{total_events}** ({100*total_ok/max(1,total_events):.1f}%)")
|
|
lines.append(f"- Avg citations per run: **{total_cites/len(outcomes):.2f}**")
|
|
lines.append(f"- Avg turns per run: {total_turns/len(outcomes):.1f}")
|
|
lines.append("")
|
|
lines.append("## Citation coverage by (role, city, state)")
|
|
lines.append(f"- Combos with ≥1 citation: {len(combos_with_cites)}")
|
|
lines.append(f"- Combos with ok fills but 0 citations: {len(combos_zero_cites)}")
|
|
lines.append("")
|
|
lines.append("## Item 3 decision signal")
|
|
if combos_zero_cites:
|
|
lines.append("Non-zero: there are **combos that succeeded but never triggered playbook_memory boost**. Candidates for item 3 investigation:")
|
|
for (role, city, state), v in combos_zero_cites[:5]:
|
|
lines.append(f"- {role} in {city}, {state}: {v['ok']}/{v['attempts']} ok, 0 cites")
|
|
else:
|
|
lines.append("All ok combos got at least some citation firing. Boost mechanism is healthy; raising the cap may help but isn't forced.")
|
|
lines.append("")
|
|
out = PLAYBOOKS / "kb_measurement.md"
|
|
out.write_text("\n".join(lines))
|
|
print(f"✓ markdown report → {out}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|