Phase 19 wiring + Path 1/2 work + chain integrity fixes
Backend:
- crates/vectord/src/playbook_memory.rs (new): Phase 19 in-memory boost
store with seed/rebuild/snapshot, plus temporal decay (e^-age/30 per
playbook), persist_to_sql endpoint backing successful_playbooks_live,
and discover_patterns endpoint for meta-index pattern aggregation
(recurring certs/skills/archetype/reliability across similar past fills).
- DEFAULT_TOP_K_PLAYBOOKS bumped 5 → 25; old default silently missed
most boosts when memory had > 25 entries.
- service.rs: new routes /vectors/playbook_memory/{seed,rebuild,stats,
persist_sql,patterns}.
Bun staffing co-pilot (mcp-server/):
- /search, /match, /verify, /proof, /simulation/run, MCP tools all
forward use_playbook_memory:true and playbook_memory_k:25 to the
hybrid endpoint. Boost was previously dark across the entire app.
- /log no longer POSTs to /ingest/file — that endpoint REPLACES the
dataset's object list, so single-row CSV writes were wiping all prior
rows in successful_playbooks (sp_rows went 33→1 in one /log call).
/log now seeds playbook_memory with canonical short text and calls
/persist_sql to keep successful_playbooks_live in sync.
- /simulation/run cumulative end-of-week CSV write removed for the same
reason. Per-day per-contract /seed (added in this session) is the
accumulating feedback path now.
- search.html addWorkerInsight renders a green "Endorsed · N playbooks"
chip with playbook citations when boost > 0.
Internal Dioxus UI (crates/ui/):
- Dashboard phase list rewritten through Phase 19 (was stuck at "Phase
16: File Watcher" / "Phase 17: DB Connector" — both wrong).
- Removed fabricated "27ms" stat label.
- Ask tab examples + SQL default replaced with real staffing prompts
against candidates/clients/job_orders (was referencing nonexistent
employees/products/events).
- New Playbook tab exposes /vectors/playbook_memory/{stats,rebuild} and
side-by-side hybrid search (boost OFF vs ON) with citations.
Tests (tests/multi-agent/):
- run_e2e_rated.ts: parallel two-agent (mistral + qwen2.5) build phase
+ verifier rating (geo, auth, persist, boost, speed → /10).
- network_proving.ts: continuous build → verify → repeat with
staffing-recruiter profile hot-swap; geo-discrimination check.
- chain_of_custody.ts: single recruiter operation traced through every
layer (Bun /search, direct /vectors/hybrid parity, /log, SQL,
playbook_memory growth, profile activation, post-op boost lift).
This commit is contained in:
parent
8e3cac5812
commit
25b7e6c3a7
@ -205,3 +205,13 @@ tr:hover td { background: var(--accent-glow); }
|
|||||||
padding: 8px 12px; border-bottom: 1px solid var(--border); font-size: 13px;
|
padding: 8px 12px; border-bottom: 1px solid var(--border); font-size: 13px;
|
||||||
}
|
}
|
||||||
.table-item:hover { background: var(--accent-glow); }
|
.table-item:hover { background: var(--accent-glow); }
|
||||||
|
|
||||||
|
/* Phase 19 — Playbook panel */
|
||||||
|
.boosted-row { background: rgba(120, 200, 120, 0.10); }
|
||||||
|
.boosted-row td { border-top: 1px solid rgba(120, 200, 120, 0.30); }
|
||||||
|
.mono-cell {
|
||||||
|
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||||
|
font-size: 11px; color: var(--text-dim);
|
||||||
|
max-width: 220px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
|
||||||
|
}
|
||||||
|
.panel-section + .panel-section { margin-top: 18px; }
|
||||||
|
|||||||
@ -178,9 +178,116 @@ enum Tab {
|
|||||||
Explore,
|
Explore,
|
||||||
Sql,
|
Sql,
|
||||||
Ingest,
|
Ingest,
|
||||||
|
Playbook,
|
||||||
Status,
|
Status,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Playbook memory types (Phase 19) ---
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||||
|
struct PlaybookStats {
|
||||||
|
entries: usize,
|
||||||
|
entries_with_embeddings: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
total_names_endorsed: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
sample: Vec<PlaybookSample>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||||
|
struct PlaybookSample {
|
||||||
|
id: String,
|
||||||
|
operation: String,
|
||||||
|
#[serde(default)]
|
||||||
|
city: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
state: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
endorsed: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||||
|
struct HybridResp {
|
||||||
|
#[serde(default)]
|
||||||
|
sql_matches: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
vector_reranked: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
method: String,
|
||||||
|
#[serde(default)]
|
||||||
|
duration_ms: u64,
|
||||||
|
#[serde(default)]
|
||||||
|
answer: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
sources: Vec<HybridSource>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||||
|
struct HybridSource {
|
||||||
|
doc_id: String,
|
||||||
|
chunk_text: String,
|
||||||
|
score: f32,
|
||||||
|
#[serde(default)]
|
||||||
|
sql_verified: bool,
|
||||||
|
#[serde(default)]
|
||||||
|
playbook_boost: f32,
|
||||||
|
#[serde(default)]
|
||||||
|
playbook_citations: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||||
|
struct IndexInfo {
|
||||||
|
index_name: String,
|
||||||
|
source: String,
|
||||||
|
#[serde(default)]
|
||||||
|
chunk_count: usize,
|
||||||
|
#[serde(default)]
|
||||||
|
vector_backend: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_playbook_stats() -> Result<PlaybookStats, String> {
|
||||||
|
let resp = reqwest::get(&format!("{}/vectors/playbook_memory/stats", api_base()))
|
||||||
|
.await.map_err(|e| e.to_string())?;
|
||||||
|
if !resp.status().is_success() {
|
||||||
|
return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
|
||||||
|
}
|
||||||
|
resp.json().await.map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn rebuild_playbook_memory() -> Result<serde_json::Value, String> {
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let resp = client.post(&format!("{}/vectors/playbook_memory/rebuild", api_base()))
|
||||||
|
.json(&serde_json::json!({}))
|
||||||
|
.send().await.map_err(|e| e.to_string())?;
|
||||||
|
if !resp.status().is_success() {
|
||||||
|
return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
|
||||||
|
}
|
||||||
|
resp.json().await.map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_indexes() -> Result<Vec<IndexInfo>, String> {
|
||||||
|
let resp = reqwest::get(&format!("{}/vectors/indexes", api_base()))
|
||||||
|
.await.map_err(|e| e.to_string())?;
|
||||||
|
resp.json().await.map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn hybrid_search(index_name: &str, question: &str, use_playbook: bool, top_k: usize) -> Result<HybridResp, String> {
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let resp = client.post(&format!("{}/vectors/hybrid", api_base()))
|
||||||
|
.json(&serde_json::json!({
|
||||||
|
"index_name": index_name,
|
||||||
|
"question": question,
|
||||||
|
"top_k": top_k,
|
||||||
|
"generate": false,
|
||||||
|
"use_playbook_memory": use_playbook,
|
||||||
|
}))
|
||||||
|
.send().await.map_err(|e| e.to_string())?;
|
||||||
|
if !resp.status().is_success() {
|
||||||
|
return Err(format!("HTTP {}: {}", resp.status(), resp.text().await.unwrap_or_default()));
|
||||||
|
}
|
||||||
|
resp.json().await.map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
// --- App ---
|
// --- App ---
|
||||||
|
|
||||||
#[component]
|
#[component]
|
||||||
@ -239,6 +346,11 @@ fn App() -> Element {
|
|||||||
onclick: move |_| active_tab.set(Tab::Ingest),
|
onclick: move |_| active_tab.set(Tab::Ingest),
|
||||||
"Ingest"
|
"Ingest"
|
||||||
}
|
}
|
||||||
|
button {
|
||||||
|
class: if *active_tab.read() == Tab::Playbook { "tab active" } else { "tab" },
|
||||||
|
onclick: move |_| active_tab.set(Tab::Playbook),
|
||||||
|
"Playbook"
|
||||||
|
}
|
||||||
button {
|
button {
|
||||||
class: if *active_tab.read() == Tab::Status { "tab active" } else { "tab" },
|
class: if *active_tab.read() == Tab::Status { "tab active" } else { "tab" },
|
||||||
onclick: move |_| active_tab.set(Tab::Status),
|
onclick: move |_| active_tab.set(Tab::Status),
|
||||||
@ -260,6 +372,7 @@ fn App() -> Element {
|
|||||||
Tab::Explore => rsx! { ExplorePanel { datasets: datasets.read().clone() } },
|
Tab::Explore => rsx! { ExplorePanel { datasets: datasets.read().clone() } },
|
||||||
Tab::Sql => rsx! { SqlPanel {} },
|
Tab::Sql => rsx! { SqlPanel {} },
|
||||||
Tab::Ingest => rsx! { IngestPanel {} },
|
Tab::Ingest => rsx! { IngestPanel {} },
|
||||||
|
Tab::Playbook => rsx! { PlaybookPanel {} },
|
||||||
Tab::Status => rsx! { StatusPanel {} },
|
Tab::Status => rsx! { StatusPanel {} },
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -354,14 +467,14 @@ fn AskPanel(datasets: Vec<Dataset>) -> Element {
|
|||||||
div { class: "panel ask-panel",
|
div { class: "panel ask-panel",
|
||||||
div { class: "ask-hero",
|
div { class: "ask-hero",
|
||||||
h2 { "Ask your data anything" }
|
h2 { "Ask your data anything" }
|
||||||
p { class: "subtitle", "Natural language → SQL → Results. Powered by local AI." }
|
p { class: "subtitle", "Natural language → SQL → Results. Powered by local AI over the staffing dataset." }
|
||||||
}
|
}
|
||||||
|
|
||||||
div { class: "ask-input-row",
|
div { class: "ask-input-row",
|
||||||
input {
|
input {
|
||||||
class: "ask-input",
|
class: "ask-input",
|
||||||
value: "{question}",
|
value: "{question}",
|
||||||
placeholder: "e.g. Which department has the highest average salary?",
|
placeholder: "e.g. Which clients placed the most candidates last quarter?",
|
||||||
oninput: move |e| question.set(e.value()),
|
oninput: move |e| question.set(e.value()),
|
||||||
onkeydown: move |e| {
|
onkeydown: move |e| {
|
||||||
if e.key() == Key::Enter {
|
if e.key() == Key::Enter {
|
||||||
@ -432,10 +545,12 @@ fn AskPanel(datasets: Vec<Dataset>) -> Element {
|
|||||||
|
|
||||||
div { class: "ask-examples",
|
div { class: "ask-examples",
|
||||||
"Try: "
|
"Try: "
|
||||||
button { class: "example-btn", onclick: move |_| question.set("Which department has the highest average salary?".into()), "highest avg salary by dept" }
|
button { class: "example-btn", onclick: move |_| question.set("How many candidates do we have by city?".into()), "candidates by city" }
|
||||||
button { class: "example-btn", onclick: move |_| question.set("Show me the top 3 most expensive products".into()), "top 3 expensive products" }
|
button { class: "example-btn", onclick: move |_| question.set("Top 10 clients by total placements".into()), "top clients by placements" }
|
||||||
button { class: "example-btn", onclick: move |_| question.set("How many events per action type?".into()), "events by action" }
|
button { class: "example-btn", onclick: move |_| question.set("Open job orders ordered by bill rate descending".into()), "open jobs by rate" }
|
||||||
button { class: "example-btn", onclick: move |_| question.set("List all employees who earn more than 90000".into()), "employees > 90k" }
|
button { class: "example-btn", onclick: move |_| question.set("Recruiters with the highest placement count".into()), "top recruiters" }
|
||||||
|
button { class: "example-btn", onclick: move |_| question.set("Total billed hours per client last month".into()), "hours per client" }
|
||||||
|
button { class: "example-btn", onclick: move |_| question.set("Cold leads: candidates we called more than 5 times but never placed".into()), "cold leads" }
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(sql) = generated_sql.read().as_ref() {
|
if let Some(sql) = generated_sql.read().as_ref() {
|
||||||
@ -578,7 +693,7 @@ fn ExplorePanel(datasets: Vec<Dataset>) -> Element {
|
|||||||
|
|
||||||
#[component]
|
#[component]
|
||||||
fn SqlPanel() -> Element {
|
fn SqlPanel() -> Element {
|
||||||
let mut query_text = use_signal(|| String::from("SELECT * FROM employees LIMIT 10"));
|
let mut query_text = use_signal(|| String::from("SELECT candidate_id, first_name, last_name, city, status FROM candidates LIMIT 10"));
|
||||||
let mut result = use_signal(|| None::<Result<QueryResponse, String>>);
|
let mut result = use_signal(|| None::<Result<QueryResponse, String>>);
|
||||||
let mut loading = use_signal(|| false);
|
let mut loading = use_signal(|| false);
|
||||||
|
|
||||||
@ -727,7 +842,7 @@ fn DashboardPanel() -> Element {
|
|||||||
}
|
}
|
||||||
div { class: "stat-card accent",
|
div { class: "stat-card accent",
|
||||||
div { class: "stat-value", "{s[\"hnsw_loaded\"]}" }
|
div { class: "stat-value", "{s[\"hnsw_loaded\"]}" }
|
||||||
div { class: "stat-label", "HNSW Indexes (27ms)" }
|
div { class: "stat-label", "HNSW Indexes Loaded" }
|
||||||
}
|
}
|
||||||
div { class: "stat-card",
|
div { class: "stat-card",
|
||||||
div { class: "stat-value", "{s[\"tools\"]}" }
|
div { class: "stat-value", "{s[\"tools\"]}" }
|
||||||
@ -750,27 +865,27 @@ fn DashboardPanel() -> Element {
|
|||||||
div { class: "arch-grid",
|
div { class: "arch-grid",
|
||||||
div { class: "arch-card",
|
div { class: "arch-card",
|
||||||
div { class: "arch-title", "Ingest" }
|
div { class: "arch-title", "Ingest" }
|
||||||
div { class: "arch-items", "CSV, JSON, PDF, Text, PostgreSQL, File Watcher" }
|
div { class: "arch-items", "CSV · JSON · PDF (+OCR) · Text · Postgres · MySQL · Inbox watcher · Cron schedules" }
|
||||||
}
|
}
|
||||||
div { class: "arch-card",
|
div { class: "arch-card",
|
||||||
div { class: "arch-title", "Storage" }
|
div { class: "arch-title", "Storage" }
|
||||||
div { class: "arch-items", "Parquet on Object Storage, Delta Writes, Compaction" }
|
div { class: "arch-items", "Parquet on Object Storage · Delta writes · Compaction · Tombstones · Multi-bucket federation + rescue" }
|
||||||
}
|
}
|
||||||
div { class: "arch-card",
|
div { class: "arch-card",
|
||||||
div { class: "arch-title", "Query" }
|
div { class: "arch-title", "Query" }
|
||||||
div { class: "arch-items", "DataFusion SQL, MemCache (9.8x), Hot/Cold" }
|
div { class: "arch-items", "DataFusion SQL · MemCache (9.8× hot) · Merge-on-read · AI-safe views" }
|
||||||
}
|
}
|
||||||
div { class: "arch-card",
|
div { class: "arch-card",
|
||||||
div { class: "arch-title", "AI" }
|
div { class: "arch-title", "AI / Vector" }
|
||||||
div { class: "arch-items", "Ollama (local), Embed, Generate, RAG, HNSW" }
|
div { class: "arch-items", "Ollama (local) · Embed/Generate/RAG · HNSW (Parquet) · Lance IVF_PQ · Hybrid SQL+vector · Profile-scoped" }
|
||||||
|
}
|
||||||
|
div { class: "arch-card",
|
||||||
|
div { class: "arch-title", "Learning loop" }
|
||||||
|
div { class: "arch-items", "Playbook memory · Endorsement boost · Multi-agent orchestrator · Autotune agent (Pareto-promote)" }
|
||||||
}
|
}
|
||||||
div { class: "arch-card",
|
div { class: "arch-card",
|
||||||
div { class: "arch-title", "Governance" }
|
div { class: "arch-title", "Governance" }
|
||||||
div { class: "arch-items", "Event Journal, PII Detection, Tool Registry, Access Control" }
|
div { class: "arch-items", "Event journal · PII detection · Tool registry · Access control · Audit log · Catalog v2 metadata" }
|
||||||
}
|
|
||||||
div { class: "arch-card",
|
|
||||||
div { class: "arch-title", "Agents" }
|
|
||||||
div { class: "arch-items", "Workspaces, Handoff, Shortlists, Activity Logs" }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -779,20 +894,23 @@ fn DashboardPanel() -> Element {
|
|||||||
h3 { "Build Progression" }
|
h3 { "Build Progression" }
|
||||||
div { class: "phase-list",
|
div { class: "phase-list",
|
||||||
{rsx! {
|
{rsx! {
|
||||||
PhaseItem { num: "0-5", name: "Foundation", detail: "Storage, Catalog, DataFusion, AI, UI, gRPC" }
|
PhaseItem { num: "0-5", name: "Foundation", detail: "Storage · Catalog · DataFusion · Ollama · UI · gRPC" }
|
||||||
PhaseItem { num: "6", name: "Ingest Pipeline", detail: "CSV/JSON/PDF/Text auto-schema" }
|
PhaseItem { num: "6", name: "Ingest Pipeline", detail: "CSV · JSON · PDF · Text · auto-schema · dedupe" }
|
||||||
PhaseItem { num: "7", name: "Vector + RAG", detail: "Embed, Search, LLM Answers" }
|
PhaseItem { num: "7", name: "Vector + RAG", detail: "Embed · brute-force cosine · LLM grounded answers" }
|
||||||
PhaseItem { num: "8", name: "Hot Cache", detail: "9.8x speedup, Delta Writes" }
|
PhaseItem { num: "8", name: "Hot Cache + Deltas", detail: "MemTable LRU · 9.8× speedup · merge-on-read · compaction" }
|
||||||
PhaseItem { num: "8.5", name: "Agent Workspaces", detail: "Per-contract, Instant Handoff" }
|
PhaseItem { num: "8.5", name: "Agent Workspaces", detail: "Per-contract · daily/weekly/monthly tiers · zero-copy handoff" }
|
||||||
PhaseItem { num: "9", name: "Event Journal", detail: "Append-only Mutation History" }
|
PhaseItem { num: "9", name: "Event Journal", detail: "Append-only mutation log · time-travel · audit" }
|
||||||
PhaseItem { num: "10", name: "Rich Catalog", detail: "PII Detection, Lineage" }
|
PhaseItem { num: "10", name: "Rich Catalog v2", detail: "PII auto-detection · lineage · freshness SLA · sensitivity" }
|
||||||
PhaseItem { num: "11", name: "Embedding Versioning", detail: "Model-proof Vectors" }
|
PhaseItem { num: "11", name: "Embedding Versioning", detail: "Per-index model+version · A/B · incremental re-embed" }
|
||||||
PhaseItem { num: "12", name: "Tool Registry", detail: "6 Governed Actions + Audit" }
|
PhaseItem { num: "12", name: "Tool Registry", detail: "Governed actions · param validation · audit · MCP-ready" }
|
||||||
PhaseItem { num: "13", name: "Access Control", detail: "Role-based, Field-level" }
|
PhaseItem { num: "13", name: "Access Control", detail: "Roles · field-level sensitivity · column masking · query audit" }
|
||||||
PhaseItem { num: "14", name: "Schema Evolution", detail: "Diff Detection, AI Migration" }
|
PhaseItem { num: "14", name: "Schema Evolution", detail: "Diff detection · AI migration prompts · versioned schemas" }
|
||||||
PhaseItem { num: "15", name: "HNSW Index", detail: "100K Search in 27ms" }
|
PhaseItem { num: "15", name: "HNSW + Trials", detail: "100K vectors · p50 873µs · trial journal · eval harness" }
|
||||||
PhaseItem { num: "16", name: "File Watcher", detail: "Auto-ingest from Inbox" }
|
PhaseItem { num: "16", name: "Hot-swap + Autotune", detail: "Promotion registry · rollback · ε-greedy agent · Pareto winner" }
|
||||||
PhaseItem { num: "17", name: "DB Connector", detail: "PostgreSQL Import" }
|
PhaseItem { num: "17", name: "Model Profiles + VRAM", detail: "ModelProfile manifests · scoped search · sequential model swap" }
|
||||||
|
PhaseItem { num: "18", name: "Lance hybrid backend", detail: "IVF_PQ build 14× faster · random fetch 112× · S3-native · per-profile routing" }
|
||||||
|
PhaseItem { num: "19", name: "Playbook memory", detail: "Feedback loop · endorsement boost (cap 0.25) · orchestrator write-through · citations" }
|
||||||
|
PhaseItem { num: "+", name: "Federation + Schedules", detail: "Multi-bucket · rescue fallback · error journal · MySQL · PDF OCR · cron ingest · catalog dedupe" }
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -816,6 +934,285 @@ fn PhaseItem(num: String, name: String, detail: String) -> Element {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// === PLAYBOOK — Phase 19 meta-index feedback loop ===
|
||||||
|
|
||||||
|
#[component]
|
||||||
|
fn PlaybookPanel() -> Element {
|
||||||
|
let mut stats = use_signal(|| None::<Result<PlaybookStats, String>>);
|
||||||
|
let mut indexes = use_signal(Vec::<IndexInfo>::new);
|
||||||
|
let mut rebuild_status = use_signal(|| None::<Result<String, String>>);
|
||||||
|
let mut rebuilding = use_signal(|| false);
|
||||||
|
let mut loaded = use_signal(|| false);
|
||||||
|
|
||||||
|
// Comparison state
|
||||||
|
let mut selected_index = use_signal(|| String::new());
|
||||||
|
let mut question = use_signal(|| String::from("reliable assembler in Detroit"));
|
||||||
|
let mut top_k = use_signal(|| 10usize);
|
||||||
|
let mut compare_loading = use_signal(|| false);
|
||||||
|
let mut hits_off = use_signal(|| None::<Result<HybridResp, String>>);
|
||||||
|
let mut hits_on = use_signal(|| None::<Result<HybridResp, String>>);
|
||||||
|
|
||||||
|
let load_all = move || {
|
||||||
|
spawn(async move {
|
||||||
|
stats.set(Some(fetch_playbook_stats().await));
|
||||||
|
if let Ok(ix) = fetch_indexes().await {
|
||||||
|
if selected_index.read().is_empty() {
|
||||||
|
if let Some(default) = ix.iter().find(|i| i.source == "workers_500k").or_else(|| ix.first()) {
|
||||||
|
selected_index.set(default.index_name.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
indexes.set(ix);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
use_effect(move || {
|
||||||
|
if !*loaded.read() {
|
||||||
|
loaded.set(true);
|
||||||
|
load_all();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let do_rebuild = move |_| {
|
||||||
|
spawn(async move {
|
||||||
|
rebuilding.set(true);
|
||||||
|
rebuild_status.set(None);
|
||||||
|
match rebuild_playbook_memory().await {
|
||||||
|
Ok(v) => rebuild_status.set(Some(Ok(format!("rebuild ok — {}", v)))),
|
||||||
|
Err(e) => rebuild_status.set(Some(Err(e))),
|
||||||
|
}
|
||||||
|
// Refresh stats afterward
|
||||||
|
stats.set(Some(fetch_playbook_stats().await));
|
||||||
|
rebuilding.set(false);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
let do_compare = move |_| {
|
||||||
|
let idx = selected_index.read().clone();
|
||||||
|
let q = question.read().clone();
|
||||||
|
let k = *top_k.read();
|
||||||
|
if idx.is_empty() || q.trim().is_empty() { return; }
|
||||||
|
spawn(async move {
|
||||||
|
compare_loading.set(true);
|
||||||
|
hits_off.set(None);
|
||||||
|
hits_on.set(None);
|
||||||
|
// Run both sequentially so the embedding cache is shared
|
||||||
|
hits_off.set(Some(hybrid_search(&idx, &q, false, k).await));
|
||||||
|
hits_on.set(Some(hybrid_search(&idx, &q, true, k).await));
|
||||||
|
compare_loading.set(false);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
rsx! {
|
||||||
|
div { class: "panel",
|
||||||
|
div { class: "ask-hero",
|
||||||
|
h2 { "Playbook Memory" }
|
||||||
|
p { class: "subtitle",
|
||||||
|
"Phase 19 feedback loop: past successful playbooks boost future search rankings. \
|
||||||
|
Endorsed workers from semantically similar past operations re-rank toward the top, \
|
||||||
|
with citations back to the playbook that endorsed them."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stats card
|
||||||
|
div { class: "panel-section",
|
||||||
|
match stats.read().as_ref() {
|
||||||
|
None => rsx! { div { class: "loading", "loading playbook stats..." } },
|
||||||
|
Some(Err(e)) => rsx! { div { class: "error", "stats: {e}" } },
|
||||||
|
Some(Ok(s)) => rsx! {
|
||||||
|
div { class: "stat-grid",
|
||||||
|
div { class: "stat-card",
|
||||||
|
div { class: "stat-value", "{s.entries}" }
|
||||||
|
div { class: "stat-label", "Playbooks in Memory" }
|
||||||
|
}
|
||||||
|
div { class: "stat-card",
|
||||||
|
div { class: "stat-value", "{s.entries_with_embeddings}" }
|
||||||
|
div { class: "stat-label", "Embedded" }
|
||||||
|
}
|
||||||
|
div { class: "stat-card accent",
|
||||||
|
div { class: "stat-value", "{s.total_names_endorsed}" }
|
||||||
|
div { class: "stat-label", "Endorsed Worker-Tags" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
div { class: "sql-actions",
|
||||||
|
button {
|
||||||
|
class: "btn",
|
||||||
|
disabled: *rebuilding.read(),
|
||||||
|
onclick: do_rebuild,
|
||||||
|
if *rebuilding.read() { "rebuilding from successful_playbooks..." } else { "Rebuild from successful_playbooks" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(s) = rebuild_status.read().as_ref() {
|
||||||
|
match s {
|
||||||
|
Ok(msg) => rsx! { div { class: "result-box", "{msg}" } },
|
||||||
|
Err(e) => rsx! { div { class: "error", "{e}" } },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample playbooks
|
||||||
|
if let Some(Ok(s)) = stats.read().as_ref() {
|
||||||
|
if !s.sample.is_empty() {
|
||||||
|
div { class: "panel-section",
|
||||||
|
h3 { "Sample playbooks" }
|
||||||
|
div { class: "table-wrap",
|
||||||
|
table {
|
||||||
|
thead { tr {
|
||||||
|
th { "ID" }
|
||||||
|
th { "Operation" }
|
||||||
|
th { "Location" }
|
||||||
|
th { "Endorsed" }
|
||||||
|
} }
|
||||||
|
tbody {
|
||||||
|
for pb in s.sample.iter() {
|
||||||
|
{
|
||||||
|
let loc = match (&pb.city, &pb.state) {
|
||||||
|
(Some(c), Some(st)) => format!("{c}, {st}"),
|
||||||
|
_ => "—".into(),
|
||||||
|
};
|
||||||
|
let endorsed = if pb.endorsed.is_empty() {
|
||||||
|
"—".to_string()
|
||||||
|
} else {
|
||||||
|
pb.endorsed.join(", ")
|
||||||
|
};
|
||||||
|
let pid = pb.id.clone();
|
||||||
|
let op = pb.operation.clone();
|
||||||
|
rsx! {
|
||||||
|
tr {
|
||||||
|
td { class: "mono-cell", title: "{pid}", "{pid}" }
|
||||||
|
td { "{op}" }
|
||||||
|
td { "{loc}" }
|
||||||
|
td { "{endorsed}" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Side-by-side comparison: boost OFF vs ON
|
||||||
|
div { class: "panel-section",
|
||||||
|
h3 { "See the boost — search compared" }
|
||||||
|
p { class: "hint",
|
||||||
|
"Run the same query against the same index twice — once with playbook boost OFF and once ON. \
|
||||||
|
Hits with non-zero playbook_boost and citations are workers that past similar playbooks endorsed."
|
||||||
|
}
|
||||||
|
div { class: "form-row",
|
||||||
|
label { "Index" }
|
||||||
|
select {
|
||||||
|
value: "{selected_index}",
|
||||||
|
onchange: move |e| selected_index.set(e.value()),
|
||||||
|
for ix in indexes.read().iter() {
|
||||||
|
option { value: "{ix.index_name}", "{ix.index_name} ({ix.source}, {ix.chunk_count} chunks, {ix.vector_backend})" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
div { class: "form-row",
|
||||||
|
label { "Question" }
|
||||||
|
input {
|
||||||
|
value: "{question}",
|
||||||
|
oninput: move |e| question.set(e.value()),
|
||||||
|
placeholder: "e.g. reliable assembler in Detroit"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
div { class: "form-row",
|
||||||
|
label { "Top K" }
|
||||||
|
input {
|
||||||
|
r#type: "number",
|
||||||
|
value: "{top_k}",
|
||||||
|
oninput: move |e| {
|
||||||
|
if let Ok(n) = e.value().parse::<usize>() { top_k.set(n.clamp(1, 50)); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
button {
|
||||||
|
class: "btn btn-ask",
|
||||||
|
disabled: *compare_loading.read(),
|
||||||
|
onclick: do_compare,
|
||||||
|
if *compare_loading.read() { "running both queries..." } else { "Run comparison" }
|
||||||
|
}
|
||||||
|
|
||||||
|
div { class: "explore-grid",
|
||||||
|
div { class: "ds-detail",
|
||||||
|
h3 { "Boost OFF (vanilla)" }
|
||||||
|
match hits_off.read().as_ref() {
|
||||||
|
None => rsx! { div { class: "empty", "—" } },
|
||||||
|
Some(Err(e)) => rsx! { div { class: "error", "{e}" } },
|
||||||
|
Some(Ok(r)) => rsx! { HybridHitTable { resp: r.clone() } },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
div { class: "ds-detail",
|
||||||
|
h3 { "Boost ON (Phase 19)" }
|
||||||
|
match hits_on.read().as_ref() {
|
||||||
|
None => rsx! { div { class: "empty", "—" } },
|
||||||
|
Some(Err(e)) => rsx! { div { class: "error", "{e}" } },
|
||||||
|
Some(Ok(r)) => rsx! { HybridHitTable { resp: r.clone() } },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[component]
|
||||||
|
fn HybridHitTable(resp: HybridResp) -> Element {
|
||||||
|
rsx! {
|
||||||
|
div { class: "results-info",
|
||||||
|
"{resp.sources.len()} hits · {resp.duration_ms}ms · method={resp.method}"
|
||||||
|
}
|
||||||
|
if resp.sources.is_empty() {
|
||||||
|
div { class: "empty-sm", "no hits" }
|
||||||
|
} else {
|
||||||
|
div { class: "table-wrap",
|
||||||
|
table {
|
||||||
|
thead { tr {
|
||||||
|
th { "#" }
|
||||||
|
th { "Doc" }
|
||||||
|
th { "Score" }
|
||||||
|
th { "Boost" }
|
||||||
|
th { "Citations" }
|
||||||
|
th { "Snippet" }
|
||||||
|
} }
|
||||||
|
tbody {
|
||||||
|
for (i, h) in resp.sources.iter().enumerate() {
|
||||||
|
{
|
||||||
|
let snippet: String = h.chunk_text.chars().take(120).collect();
|
||||||
|
let cites = if h.playbook_citations.is_empty() {
|
||||||
|
"—".to_string()
|
||||||
|
} else {
|
||||||
|
h.playbook_citations.join(", ")
|
||||||
|
};
|
||||||
|
let row_class = if h.playbook_boost > 0.0 { "boosted-row" } else { "" };
|
||||||
|
let rank = i + 1;
|
||||||
|
let did = h.doc_id.clone();
|
||||||
|
let score = format!("{:.3}", h.score);
|
||||||
|
let boost = if h.playbook_boost > 0.0 { format!("+{:.3}", h.playbook_boost) } else { "—".into() };
|
||||||
|
rsx! {
|
||||||
|
tr { class: "{row_class}",
|
||||||
|
td { "{rank}" }
|
||||||
|
td { class: "mono-cell", "{did}" }
|
||||||
|
td { "{score}" }
|
||||||
|
td { "{boost}" }
|
||||||
|
td { class: "mono-cell", title: "{cites}", "{cites}" }
|
||||||
|
td { "{snippet}" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// === INGEST — Data on-ramp ===
|
// === INGEST — Data on-ramp ===
|
||||||
|
|
||||||
#[component]
|
#[component]
|
||||||
|
|||||||
@ -7,6 +7,7 @@ pub mod harness;
|
|||||||
pub mod hnsw;
|
pub mod hnsw;
|
||||||
pub mod index_registry;
|
pub mod index_registry;
|
||||||
pub mod jobs;
|
pub mod jobs;
|
||||||
|
pub mod playbook_memory;
|
||||||
pub mod promotion;
|
pub mod promotion;
|
||||||
pub mod refresh;
|
pub mod refresh;
|
||||||
pub mod store;
|
pub mod store;
|
||||||
|
|||||||
825
crates/vectord/src/playbook_memory.rs
Normal file
825
crates/vectord/src/playbook_memory.rs
Normal file
@ -0,0 +1,825 @@
|
|||||||
|
//! Phase 19: Playbook memory — the feedback loop that makes the index
|
||||||
|
//! learn from real outcomes instead of just logging them.
|
||||||
|
//!
|
||||||
|
//! When an agent (multi-agent orchestrator or human operator) seals a
|
||||||
|
//! successful playbook, it lands in the `successful_playbooks` dataset.
|
||||||
|
//! Historically that was a write-only log. This module turns it into a
|
||||||
|
//! re-ranking signal:
|
||||||
|
//!
|
||||||
|
//! 1. `rebuild` reads every row of `successful_playbooks`, embeds the
|
||||||
|
//! operation+approach+context as one vector per playbook, parses
|
||||||
|
//! out the worker names from the `result` column, and stores both
|
||||||
|
//! the vectors and the (playbook → names) endorsement map in memory.
|
||||||
|
//!
|
||||||
|
//! 2. At query time, `compute_boost_for` takes a new operation text
|
||||||
|
//! (e.g. "fill: Welder x2 in Toledo, OH"), embeds it, brute-force
|
||||||
|
//! ranks past playbooks by cosine similarity, and returns a boost
|
||||||
|
//! map keyed by (city, state, worker_name) → `BoostEntry`. Each
|
||||||
|
//! entry carries its similarity score and the citing playbook_ids,
|
||||||
|
//! so explanations ("ranked higher because of 3 similar past fills
|
||||||
|
//! in Toledo") are free.
|
||||||
|
//!
|
||||||
|
//! 3. The `use_playbook_memory` flag on `/vectors/hybrid` adds those
|
||||||
|
//! boosts to matching search hits and re-sorts.
|
||||||
|
//!
|
||||||
|
//! Why brute force instead of another HNSW: `successful_playbooks` grows
|
||||||
|
//! by operators, not automation. A few thousand rows is the realistic
|
||||||
|
//! ceiling for years. Brute force at 10K × 768d is <10ms on this hardware
|
||||||
|
//! — not worth the operational cost of another indexed surface.
|
||||||
|
//!
|
||||||
|
//! Persistence: the endorsements map round-trips through
|
||||||
|
//! `_playbook_memory/state.json` in primary storage so the cache
|
||||||
|
//! survives restarts without a full rebuild.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
|
use aibridge::client::{AiClient, EmbedRequest};
|
||||||
|
use object_store::ObjectStore;
|
||||||
|
use storaged::ops;
|
||||||
|
|
||||||
|
const STATE_KEY: &str = "_playbook_memory/state.json";
|
||||||
|
|
||||||
|
/// Maximum boost a single worker can accumulate across all similar past
|
||||||
|
/// playbooks. Prevents one very popular worker from always winning.
|
||||||
|
pub const MAX_BOOST_PER_WORKER: f32 = 0.25;
|
||||||
|
|
||||||
|
/// Default number of past playbooks to consider when ranking the current
|
||||||
|
/// operation. Bumped 5 → 25 on 2026-04-20 because at >100 entries in
|
||||||
|
/// memory the old default missed too many relevant playbooks — boost
|
||||||
|
/// silently failed even when the seeded workers were ideal matches.
|
||||||
|
/// 25 is brute-force-cheap (sub-ms) and covers most live operator memory.
|
||||||
|
pub const DEFAULT_TOP_K_PLAYBOOKS: usize = 25;
|
||||||
|
|
||||||
|
/// Half-life of a playbook's contribution to boost, in days. A playbook
|
||||||
|
/// 30 days old contributes half what a fresh one would; 60 days old, a
|
||||||
|
/// quarter; etc. Per Path 1 (deepen statistical) — stale endorsements
|
||||||
|
/// shouldn't dominate fresh signal. Recruiter trust depends on this.
|
||||||
|
pub const BOOST_HALF_LIFE_DAYS: f32 = 30.0;
|
||||||
|
|
||||||
|
/// Shape of one playbook in memory. The embedding is optional so we can
|
||||||
|
/// round-trip a cached state without re-embedding; the rebuild path
|
||||||
|
/// populates it.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct PlaybookEntry {
|
||||||
|
pub playbook_id: String,
|
||||||
|
pub operation: String,
|
||||||
|
pub approach: String,
|
||||||
|
pub context: String,
|
||||||
|
pub timestamp: String,
|
||||||
|
/// Parsed out of `result` (e.g. "2/2 filled → Matthew Roberts, Amy Davis").
|
||||||
|
/// Stored as raw names; matching against search results happens on
|
||||||
|
/// (city, state, name) tuples at boost time.
|
||||||
|
pub endorsed_names: Vec<String>,
|
||||||
|
/// City + state parsed out of the operation string. Kept separately
|
||||||
|
/// so boost matching doesn't re-parse on every query.
|
||||||
|
pub city: Option<String>,
|
||||||
|
pub state: Option<String>,
|
||||||
|
/// Embedding of `operation + approach + context`. Option so persisted
|
||||||
|
/// state can omit it on first load and have a later embed() fill in.
|
||||||
|
#[serde(default)]
|
||||||
|
pub embedding: Option<Vec<f32>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Persisted / in-memory state.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||||
|
struct PlaybookMemoryState {
|
||||||
|
entries: Vec<PlaybookEntry>,
|
||||||
|
/// Unix epoch millis when the last rebuild completed. Caller can
|
||||||
|
/// use this to gate "stale > N hours → trigger rebuild" behavior.
|
||||||
|
last_rebuilt_at: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Per-worker boost payload. `citations` lets the response layer show
|
||||||
|
/// "boosted because of these past fills" without a second lookup.
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct BoostEntry {
|
||||||
|
pub boost: f32,
|
||||||
|
pub citations: Vec<String>, // playbook_ids that endorsed this worker
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Live handle passed around the service. Clone-cheap (all state is
|
||||||
|
/// inside one Arc<RwLock>).
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct PlaybookMemory {
|
||||||
|
state: Arc<RwLock<PlaybookMemoryState>>,
|
||||||
|
store: Arc<dyn ObjectStore>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PlaybookMemory {
|
||||||
|
pub fn new(store: Arc<dyn ObjectStore>) -> Self {
|
||||||
|
Self {
|
||||||
|
state: Arc::new(RwLock::new(PlaybookMemoryState::default())),
|
||||||
|
store,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Best-effort load from primary storage. Missing = empty memory; the
|
||||||
|
/// first `/rebuild` call will hydrate it.
|
||||||
|
pub async fn load_from_storage(&self) -> Result<usize, String> {
|
||||||
|
let data = match ops::get(&self.store, STATE_KEY).await {
|
||||||
|
Ok(d) => d,
|
||||||
|
Err(_) => return Ok(0),
|
||||||
|
};
|
||||||
|
let persisted: PlaybookMemoryState = serde_json::from_slice(&data)
|
||||||
|
.map_err(|e| format!("parse playbook_memory state: {e}"))?;
|
||||||
|
let n = persisted.entries.len();
|
||||||
|
*self.state.write().await = persisted;
|
||||||
|
tracing::info!("playbook_memory: loaded {n} entries from {STATE_KEY}");
|
||||||
|
Ok(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn persist(&self) -> Result<(), String> {
|
||||||
|
let snapshot = self.state.read().await.clone();
|
||||||
|
let bytes = serde_json::to_vec_pretty(&snapshot).map_err(|e| e.to_string())?;
|
||||||
|
ops::put(&self.store, STATE_KEY, bytes.into()).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Replace the full in-memory state atomically and persist.
|
||||||
|
pub async fn set_entries(&self, entries: Vec<PlaybookEntry>) -> Result<(), String> {
|
||||||
|
let mut s = self.state.write().await;
|
||||||
|
s.entries = entries;
|
||||||
|
s.last_rebuilt_at = chrono::Utc::now().timestamp_millis();
|
||||||
|
drop(s);
|
||||||
|
self.persist().await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn entry_count(&self) -> usize {
|
||||||
|
self.state.read().await.entries.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn snapshot(&self) -> Vec<PlaybookEntry> {
|
||||||
|
self.state.read().await.entries.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Given an operation's embedding, find the top-K most similar past
|
||||||
|
/// playbooks (by cosine similarity) and return a per-worker boost map
|
||||||
|
/// keyed by (city, state, name). Worker is matched by the tuple so a
|
||||||
|
/// shared name across cities doesn't cross-pollinate.
|
||||||
|
///
|
||||||
|
/// Boost formula: each qualifying playbook contributes
|
||||||
|
/// `similarity * base_weight / n_workers` to each worker it endorsed,
|
||||||
|
/// where `base_weight` is tuned to keep the cap realistic without
|
||||||
|
/// forcing every result to saturate. Total per worker is capped at
|
||||||
|
/// `MAX_BOOST_PER_WORKER`.
|
||||||
|
pub async fn compute_boost_for(
|
||||||
|
&self,
|
||||||
|
query_embedding: &[f32],
|
||||||
|
top_k_playbooks: usize,
|
||||||
|
base_weight: f32,
|
||||||
|
) -> HashMap<(String, String, String), BoostEntry> {
|
||||||
|
let entries = self.state.read().await.entries.clone();
|
||||||
|
|
||||||
|
// Brute-force cosine. Empty / missing embeddings just skip.
|
||||||
|
let mut scored: Vec<(f32, &PlaybookEntry)> = entries
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| e.embedding.as_ref().map(|v| (cosine(query_embedding, v), e)))
|
||||||
|
.collect();
|
||||||
|
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
scored.truncate(top_k_playbooks.max(1));
|
||||||
|
|
||||||
|
let now = chrono::Utc::now();
|
||||||
|
let mut boosts: HashMap<(String, String, String), BoostEntry> = HashMap::new();
|
||||||
|
for (similarity, pb) in &scored {
|
||||||
|
// Negative or near-zero similarity = not actually related;
|
||||||
|
// skip so we don't inject noise when the memory is sparse.
|
||||||
|
if *similarity <= 0.05 { continue; }
|
||||||
|
let Some(city) = &pb.city else { continue; };
|
||||||
|
let Some(state) = &pb.state else { continue; };
|
||||||
|
let n_workers = pb.endorsed_names.len().max(1);
|
||||||
|
// Path 1 — temporal decay. Older playbooks weight less. Failure
|
||||||
|
// to parse the timestamp degrades to "no decay" (treat as fresh)
|
||||||
|
// rather than dropping the entry entirely; keeps backward
|
||||||
|
// compatibility with seed payloads that omitted timestamp.
|
||||||
|
let decay = chrono::DateTime::parse_from_rfc3339(&pb.timestamp)
|
||||||
|
.ok()
|
||||||
|
.map(|t| {
|
||||||
|
let age_days = (now.signed_duration_since(t.with_timezone(&chrono::Utc))
|
||||||
|
.num_seconds() as f32) / 86400.0;
|
||||||
|
if age_days <= 0.0 { 1.0 }
|
||||||
|
else { (-age_days / BOOST_HALF_LIFE_DAYS).exp() }
|
||||||
|
})
|
||||||
|
.unwrap_or(1.0);
|
||||||
|
let per_worker = similarity * base_weight * decay / (n_workers as f32);
|
||||||
|
for name in &pb.endorsed_names {
|
||||||
|
let key = (city.clone(), state.clone(), name.clone());
|
||||||
|
let entry = boosts.entry(key).or_insert(BoostEntry {
|
||||||
|
boost: 0.0,
|
||||||
|
citations: Vec::new(),
|
||||||
|
});
|
||||||
|
entry.boost = (entry.boost + per_worker).min(MAX_BOOST_PER_WORKER);
|
||||||
|
if !entry.citations.contains(&pb.playbook_id) {
|
||||||
|
entry.citations.push(pb.playbook_id.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
boosts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cosine similarity — pulled out so rebuild/boost share one impl.
|
||||||
|
fn cosine(a: &[f32], b: &[f32]) -> f32 {
|
||||||
|
let (mut dot, mut na, mut nb) = (0.0_f32, 0.0_f32, 0.0_f32);
|
||||||
|
let n = a.len().min(b.len());
|
||||||
|
for i in 0..n {
|
||||||
|
dot += a[i] * b[i];
|
||||||
|
na += a[i] * a[i];
|
||||||
|
nb += b[i] * b[i];
|
||||||
|
}
|
||||||
|
if na == 0.0 || nb == 0.0 { return 0.0; }
|
||||||
|
dot / (na.sqrt() * nb.sqrt())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------- Pattern discovery (Path 2 — meta-index) ----------------
|
||||||
|
//
|
||||||
|
// Phase 19's boost path answers "for THIS exact city + role, which workers
|
||||||
|
// have we used before?" Pattern discovery answers a different question:
|
||||||
|
// "for queries like this one, what TRAITS have past successful fills had
|
||||||
|
// in common — even if no exact prior playbook covers this geo?"
|
||||||
|
//
|
||||||
|
// The discovered pattern surfaces signals the operator didn't query for:
|
||||||
|
// e.g. "every successful Welder fill we've seen carried OSHA-10 + lockout
|
||||||
|
// /tagout — you may want to filter on those." That's the meta-index
|
||||||
|
// dimension of the original PRD: identify things we didn't know about.
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct PatternReport {
|
||||||
|
pub query: String,
|
||||||
|
pub matched_playbooks: usize,
|
||||||
|
pub total_workers_examined: usize,
|
||||||
|
pub common_certifications: Vec<TraitFreq>,
|
||||||
|
pub common_skills: Vec<TraitFreq>,
|
||||||
|
pub modal_archetype: Option<String>,
|
||||||
|
pub reliability_p50: f64,
|
||||||
|
pub reliability_min: f64,
|
||||||
|
pub reliability_max: f64,
|
||||||
|
pub matched_playbook_ids: Vec<String>,
|
||||||
|
pub discovered_pattern: String,
|
||||||
|
pub duration_secs: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct TraitFreq {
|
||||||
|
pub name: String,
|
||||||
|
pub count: usize,
|
||||||
|
pub frequency: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn discover_patterns(
|
||||||
|
memory: &PlaybookMemory,
|
||||||
|
ai_client: &AiClient,
|
||||||
|
catalog: &catalogd::registry::Registry,
|
||||||
|
buckets: &Arc<storaged::registry::BucketRegistry>,
|
||||||
|
query: &str,
|
||||||
|
top_k_playbooks: usize,
|
||||||
|
min_trait_frequency: f32,
|
||||||
|
) -> Result<PatternReport, String> {
|
||||||
|
let t0 = std::time::Instant::now();
|
||||||
|
|
||||||
|
// 1. Embed the query through the same nomic-embed-text model used
|
||||||
|
// for playbook embeddings, so cosine is meaningful.
|
||||||
|
let resp = ai_client
|
||||||
|
.embed(EmbedRequest { texts: vec![query.into()], model: None })
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("embed query: {e}"))?;
|
||||||
|
if resp.embeddings.is_empty() {
|
||||||
|
return Err("embed returned no vectors".into());
|
||||||
|
}
|
||||||
|
let qv: Vec<f32> = resp.embeddings[0].iter().map(|x| *x as f32).collect();
|
||||||
|
|
||||||
|
// 2. Find top-K most similar past playbooks (cosine over embeddings).
|
||||||
|
let entries = memory.snapshot().await;
|
||||||
|
let mut scored: Vec<(f32, &PlaybookEntry)> = entries
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| e.embedding.as_ref().map(|v| (cosine(&qv, v), e)))
|
||||||
|
.collect();
|
||||||
|
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
scored.truncate(top_k_playbooks);
|
||||||
|
let matched: Vec<(f32, PlaybookEntry)> = scored
|
||||||
|
.into_iter()
|
||||||
|
.filter(|(s, _)| *s > 0.05)
|
||||||
|
.map(|(s, e)| (s, e.clone()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if matched.is_empty() {
|
||||||
|
return Ok(PatternReport {
|
||||||
|
query: query.into(),
|
||||||
|
matched_playbooks: 0,
|
||||||
|
total_workers_examined: 0,
|
||||||
|
common_certifications: vec![],
|
||||||
|
common_skills: vec![],
|
||||||
|
modal_archetype: None,
|
||||||
|
reliability_p50: 0.0, reliability_min: 0.0, reliability_max: 0.0,
|
||||||
|
matched_playbook_ids: vec![],
|
||||||
|
discovered_pattern: "No similar past playbooks found.".into(),
|
||||||
|
duration_secs: t0.elapsed().as_secs_f32(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Pull each endorsed worker's full profile from workers_500k.
|
||||||
|
// Restrict by (name, city, state) tuple so cross-city homonyms
|
||||||
|
// don't pollute the aggregate.
|
||||||
|
let mut conditions: Vec<String> = Vec::new();
|
||||||
|
let mut matched_ids: Vec<String> = Vec::new();
|
||||||
|
for (_, pb) in &matched {
|
||||||
|
matched_ids.push(pb.playbook_id.clone());
|
||||||
|
let (Some(city), Some(state)) = (pb.city.as_ref(), pb.state.as_ref()) else { continue };
|
||||||
|
for name in &pb.endorsed_names {
|
||||||
|
let esc = |s: &str| s.replace('\'', "''");
|
||||||
|
conditions.push(format!(
|
||||||
|
"(name = '{}' AND city = '{}' AND state = '{}')",
|
||||||
|
esc(name), esc(city), esc(state)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if conditions.is_empty() {
|
||||||
|
return Ok(PatternReport {
|
||||||
|
query: query.into(),
|
||||||
|
matched_playbooks: matched.len(),
|
||||||
|
total_workers_examined: 0,
|
||||||
|
common_certifications: vec![], common_skills: vec![],
|
||||||
|
modal_archetype: None, reliability_p50: 0.0,
|
||||||
|
reliability_min: 0.0, reliability_max: 0.0,
|
||||||
|
matched_playbook_ids: matched_ids,
|
||||||
|
discovered_pattern: "Matched playbooks but no endorsed names with city/state to lookup.".into(),
|
||||||
|
duration_secs: t0.elapsed().as_secs_f32(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let sql = format!(
|
||||||
|
"SELECT name, role, city, state, certifications, skills, archetype, \
|
||||||
|
CAST(reliability AS DOUBLE) as reliability \
|
||||||
|
FROM workers_500k WHERE {} LIMIT 500",
|
||||||
|
conditions.join(" OR ")
|
||||||
|
);
|
||||||
|
let engine = queryd::context::QueryEngine::new(
|
||||||
|
catalog.clone(), buckets.clone(), queryd::cache::MemCache::new(0),
|
||||||
|
);
|
||||||
|
let batches = engine.query(&sql).await.map_err(|e| format!("worker lookup: {e}"))?;
|
||||||
|
|
||||||
|
// 4. Aggregate. Pipe-separated cert/skill lists, single-string archetype,
|
||||||
|
// numeric reliability. Frequencies are share-of-workers.
|
||||||
|
use arrow::array::{Array, AsArray};
|
||||||
|
let mut cert_counts: HashMap<String, usize> = HashMap::new();
|
||||||
|
let mut skill_counts: HashMap<String, usize> = HashMap::new();
|
||||||
|
let mut arch_counts: HashMap<String, usize> = HashMap::new();
|
||||||
|
let mut reliabilities: Vec<f64> = Vec::new();
|
||||||
|
let mut total = 0usize;
|
||||||
|
|
||||||
|
let get_string = |b: &arrow::record_batch::RecordBatch, col: &str, row: usize| -> String {
|
||||||
|
let Some(c) = b.column_by_name(col) else { return String::new(); };
|
||||||
|
if let Some(arr) = c.as_string_view_opt() {
|
||||||
|
if arr.is_null(row) { return String::new(); }
|
||||||
|
return arr.value(row).to_string();
|
||||||
|
}
|
||||||
|
if let Some(arr) = c.as_string_opt::<i32>() {
|
||||||
|
if arr.is_null(row) { return String::new(); }
|
||||||
|
return arr.value(row).to_string();
|
||||||
|
}
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
let get_f64 = |b: &arrow::record_batch::RecordBatch, col: &str, row: usize| -> f64 {
|
||||||
|
let Some(c) = b.column_by_name(col) else { return 0.0; };
|
||||||
|
if let Some(arr) = c.as_primitive_opt::<arrow::datatypes::Float64Type>() {
|
||||||
|
if arr.is_null(row) { return 0.0; }
|
||||||
|
return arr.value(row);
|
||||||
|
}
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
|
||||||
|
for b in &batches {
|
||||||
|
for row in 0..b.num_rows() {
|
||||||
|
total += 1;
|
||||||
|
let certs = get_string(b, "certifications", row);
|
||||||
|
for c in certs.split(['|', ',']).map(|s| s.trim()).filter(|s| !s.is_empty() && *s != "none") {
|
||||||
|
*cert_counts.entry(c.to_string()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
let skills = get_string(b, "skills", row);
|
||||||
|
for s in skills.split(['|', ',']).map(|s| s.trim()).filter(|s| !s.is_empty()) {
|
||||||
|
*skill_counts.entry(s.to_string()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
let arch = get_string(b, "archetype", row);
|
||||||
|
if !arch.is_empty() {
|
||||||
|
*arch_counts.entry(arch).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
let rel = get_f64(b, "reliability", row);
|
||||||
|
if rel > 0.0 { reliabilities.push(rel); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_f = total.max(1) as f32;
|
||||||
|
let to_freq = |m: HashMap<String, usize>, min: f32| -> Vec<TraitFreq> {
|
||||||
|
let mut v: Vec<TraitFreq> = m.into_iter()
|
||||||
|
.map(|(name, count)| TraitFreq { name, count, frequency: count as f32 / total_f })
|
||||||
|
.filter(|t| t.frequency >= min)
|
||||||
|
.collect();
|
||||||
|
v.sort_by(|a, b| b.count.cmp(&a.count));
|
||||||
|
v.truncate(8);
|
||||||
|
v
|
||||||
|
};
|
||||||
|
let common_certifications = to_freq(cert_counts, min_trait_frequency);
|
||||||
|
let common_skills = to_freq(skill_counts, min_trait_frequency);
|
||||||
|
let modal_archetype = arch_counts.into_iter()
|
||||||
|
.max_by_key(|(_, c)| *c)
|
||||||
|
.map(|(name, _)| name);
|
||||||
|
|
||||||
|
reliabilities.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
let p50 = if reliabilities.is_empty() { 0.0 } else { reliabilities[reliabilities.len() / 2] };
|
||||||
|
let rmin = reliabilities.first().copied().unwrap_or(0.0);
|
||||||
|
let rmax = reliabilities.last().copied().unwrap_or(0.0);
|
||||||
|
|
||||||
|
// Build a human-readable discovered-pattern summary
|
||||||
|
let mut parts: Vec<String> = vec![
|
||||||
|
format!("Across {} similar past playbooks ({} workers examined)", matched.len(), total),
|
||||||
|
];
|
||||||
|
if !common_certifications.is_empty() {
|
||||||
|
let head: Vec<String> = common_certifications.iter().take(3)
|
||||||
|
.map(|t| format!("{} ({:.0}%)", t.name, t.frequency * 100.0)).collect();
|
||||||
|
parts.push(format!("recurring certifications: {}", head.join(", ")));
|
||||||
|
}
|
||||||
|
if !common_skills.is_empty() {
|
||||||
|
let head: Vec<String> = common_skills.iter().take(3)
|
||||||
|
.map(|t| format!("{} ({:.0}%)", t.name, t.frequency * 100.0)).collect();
|
||||||
|
parts.push(format!("recurring skills: {}", head.join(", ")));
|
||||||
|
}
|
||||||
|
if let Some(a) = &modal_archetype { parts.push(format!("archetype mostly: {a}")); }
|
||||||
|
if !reliabilities.is_empty() {
|
||||||
|
parts.push(format!("reliability median {:.2} (range {:.2}–{:.2})", p50, rmin, rmax));
|
||||||
|
}
|
||||||
|
let discovered_pattern = parts.join(" · ");
|
||||||
|
|
||||||
|
Ok(PatternReport {
|
||||||
|
query: query.into(),
|
||||||
|
matched_playbooks: matched.len(),
|
||||||
|
total_workers_examined: total,
|
||||||
|
common_certifications, common_skills,
|
||||||
|
modal_archetype, reliability_p50: p50,
|
||||||
|
reliability_min: rmin, reliability_max: rmax,
|
||||||
|
matched_playbook_ids: matched_ids,
|
||||||
|
discovered_pattern,
|
||||||
|
duration_secs: t0.elapsed().as_secs_f32(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------- Persist memory → SQL (Path 2 foundation) ----------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct PersistReport {
|
||||||
|
pub rows_persisted: usize,
|
||||||
|
pub dataset_name: String,
|
||||||
|
pub fingerprint: String,
|
||||||
|
pub duration_secs: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Dump current in-memory state to a queryable Parquet under
|
||||||
|
/// `successful_playbooks_live`. Registers fresh objects each call — safe
|
||||||
|
/// because in-memory state is the source of truth here, so REPLACING the
|
||||||
|
/// objects list reflects the real state, not destroying it.
|
||||||
|
///
|
||||||
|
/// Distinct from the existing `successful_playbooks` dataset (which is
|
||||||
|
/// read by `rebuild()`), so this never collides with operator imports of
|
||||||
|
/// historical playbook data. Recruiter-facing SQL surfaces should query
|
||||||
|
/// `successful_playbooks_live` for current operator activity.
|
||||||
|
pub async fn persist_to_sql(
|
||||||
|
memory: &PlaybookMemory,
|
||||||
|
catalog: &catalogd::registry::Registry,
|
||||||
|
) -> Result<PersistReport, String> {
|
||||||
|
use arrow::array::StringArray;
|
||||||
|
use arrow::datatypes::{DataType, Field, Schema};
|
||||||
|
use arrow::record_batch::RecordBatch;
|
||||||
|
|
||||||
|
let t0 = std::time::Instant::now();
|
||||||
|
let entries = memory.snapshot().await;
|
||||||
|
|
||||||
|
let schema = Arc::new(Schema::new(vec![
|
||||||
|
Field::new("timestamp", DataType::Utf8, true),
|
||||||
|
Field::new("operation", DataType::Utf8, true),
|
||||||
|
Field::new("approach", DataType::Utf8, true),
|
||||||
|
Field::new("result", DataType::Utf8, true),
|
||||||
|
Field::new("context", DataType::Utf8, true),
|
||||||
|
]));
|
||||||
|
|
||||||
|
let timestamps: Vec<&str> = entries.iter().map(|e| e.timestamp.as_str()).collect();
|
||||||
|
let operations: Vec<&str> = entries.iter().map(|e| e.operation.as_str()).collect();
|
||||||
|
let approaches: Vec<&str> = entries.iter().map(|e| e.approach.as_str()).collect();
|
||||||
|
let contexts: Vec<&str> = entries.iter().map(|e| e.context.as_str()).collect();
|
||||||
|
// Result column is reconstructed from endorsed_names so SQL queries
|
||||||
|
// against successful_playbooks_live see the same shape as the original
|
||||||
|
// CSV-fed successful_playbooks ("N/N filled → Name1, Name2").
|
||||||
|
let results: Vec<String> = entries.iter().map(|e| {
|
||||||
|
if e.endorsed_names.is_empty() {
|
||||||
|
String::new()
|
||||||
|
} else {
|
||||||
|
let n = e.endorsed_names.len();
|
||||||
|
format!("{}/{} filled → {}", n, n, e.endorsed_names.join(", "))
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
let result_refs: Vec<&str> = results.iter().map(|s| s.as_str()).collect();
|
||||||
|
|
||||||
|
let batch = RecordBatch::try_new(schema.clone(), vec![
|
||||||
|
Arc::new(StringArray::from(timestamps)),
|
||||||
|
Arc::new(StringArray::from(operations)),
|
||||||
|
Arc::new(StringArray::from(approaches)),
|
||||||
|
Arc::new(StringArray::from(result_refs)),
|
||||||
|
Arc::new(StringArray::from(contexts)),
|
||||||
|
]).map_err(|e| format!("build record batch: {e}"))?;
|
||||||
|
|
||||||
|
let parquet_bytes = shared::arrow_helpers::record_batch_to_parquet(&batch)?;
|
||||||
|
let fp = shared::arrow_helpers::fingerprint_schema(&schema);
|
||||||
|
|
||||||
|
let key = "datasets/successful_playbooks_live.parquet";
|
||||||
|
ops::put(&memory.store, key, parquet_bytes.clone()).await?;
|
||||||
|
|
||||||
|
let obj = shared::types::ObjectRef {
|
||||||
|
bucket: "primary".into(),
|
||||||
|
key: key.into(),
|
||||||
|
size_bytes: parquet_bytes.len() as u64,
|
||||||
|
created_at: chrono::Utc::now(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let manifest = catalog.register(
|
||||||
|
"successful_playbooks_live".into(),
|
||||||
|
fp.clone(),
|
||||||
|
vec![obj],
|
||||||
|
).await?;
|
||||||
|
|
||||||
|
Ok(PersistReport {
|
||||||
|
rows_persisted: entries.len(),
|
||||||
|
dataset_name: manifest.name,
|
||||||
|
fingerprint: fp.0,
|
||||||
|
duration_secs: t0.elapsed().as_secs_f32(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------- Rebuild (the core of Phase 19) ----------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct RebuildReport {
|
||||||
|
pub rows_scanned: usize,
|
||||||
|
pub entries_built: usize,
|
||||||
|
pub total_names_endorsed: usize,
|
||||||
|
pub duration_secs: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Full rebuild: scan `successful_playbooks`, extract endorsements, embed
|
||||||
|
/// each row's operation+approach+context, replace the in-memory state.
|
||||||
|
///
|
||||||
|
/// Returns the report so callers can show operators what happened.
|
||||||
|
pub async fn rebuild(
|
||||||
|
memory: &PlaybookMemory,
|
||||||
|
ai_client: &AiClient,
|
||||||
|
catalog: &catalogd::registry::Registry,
|
||||||
|
buckets: &Arc<storaged::registry::BucketRegistry>,
|
||||||
|
) -> Result<RebuildReport, String> {
|
||||||
|
let t0 = std::time::Instant::now();
|
||||||
|
|
||||||
|
// 1. Pull every row of successful_playbooks through the query engine.
|
||||||
|
let sql = "SELECT timestamp, operation, approach, result, context \
|
||||||
|
FROM successful_playbooks";
|
||||||
|
let engine = queryd::context::QueryEngine::new(
|
||||||
|
catalog.clone(),
|
||||||
|
buckets.clone(),
|
||||||
|
queryd::cache::MemCache::new(0),
|
||||||
|
);
|
||||||
|
let batches = engine
|
||||||
|
.query(sql)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("query successful_playbooks: {e}"))?;
|
||||||
|
|
||||||
|
let mut rows: Vec<(String, String, String, String, String)> = Vec::new();
|
||||||
|
for b in &batches {
|
||||||
|
let n = b.num_rows();
|
||||||
|
let get = |col: &str, row: usize| -> String {
|
||||||
|
use arrow::array::{Array, AsArray};
|
||||||
|
let Some(c) = b.column_by_name(col) else { return String::new(); };
|
||||||
|
if let Some(arr) = c.as_string_view_opt() {
|
||||||
|
if arr.is_null(row) { return String::new(); }
|
||||||
|
return arr.value(row).to_string();
|
||||||
|
}
|
||||||
|
if let Some(arr) = c.as_string_opt::<i32>() {
|
||||||
|
if arr.is_null(row) { return String::new(); }
|
||||||
|
return arr.value(row).to_string();
|
||||||
|
}
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
for row in 0..n {
|
||||||
|
rows.push((
|
||||||
|
get("timestamp", row),
|
||||||
|
get("operation", row),
|
||||||
|
get("approach", row),
|
||||||
|
get("result", row),
|
||||||
|
get("context", row),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let rows_scanned = rows.len();
|
||||||
|
|
||||||
|
// 2. For each row, build a PlaybookEntry (no embedding yet). Parse
|
||||||
|
// the operation for (city, state) and the result for names.
|
||||||
|
let mut entries: Vec<PlaybookEntry> = rows
|
||||||
|
.into_iter()
|
||||||
|
.map(|(ts, op, approach, result, ctx)| {
|
||||||
|
let (city, state) = parse_city_state(&op);
|
||||||
|
let names = parse_names(&result);
|
||||||
|
PlaybookEntry {
|
||||||
|
playbook_id: stable_id(&ts, &op),
|
||||||
|
operation: op,
|
||||||
|
approach,
|
||||||
|
context: ctx,
|
||||||
|
timestamp: ts,
|
||||||
|
endorsed_names: names,
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
embedding: None,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// 3. Embed in one batch. Sidecar's embed handles batching internally;
|
||||||
|
// chunk here to ~64 per request to keep memory flat.
|
||||||
|
const EMBED_BATCH: usize = 64;
|
||||||
|
for chunk_start in (0..entries.len()).step_by(EMBED_BATCH) {
|
||||||
|
let end = (chunk_start + EMBED_BATCH).min(entries.len());
|
||||||
|
let texts: Vec<String> = entries[chunk_start..end]
|
||||||
|
.iter()
|
||||||
|
.map(embed_text)
|
||||||
|
.collect();
|
||||||
|
let req = EmbedRequest { texts, model: None };
|
||||||
|
let resp = ai_client
|
||||||
|
.embed(req)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("embed batch [{chunk_start}..{end}]: {e}"))?;
|
||||||
|
for (i, v) in resp.embeddings.iter().enumerate() {
|
||||||
|
let f32v: Vec<f32> = v.iter().map(|&x| x as f32).collect();
|
||||||
|
entries[chunk_start + i].embedding = Some(f32v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_names_endorsed: usize = entries.iter().map(|e| e.endorsed_names.len()).sum();
|
||||||
|
let entries_built = entries.len();
|
||||||
|
|
||||||
|
memory.set_entries(entries).await?;
|
||||||
|
|
||||||
|
Ok(RebuildReport {
|
||||||
|
rows_scanned,
|
||||||
|
entries_built,
|
||||||
|
total_names_endorsed,
|
||||||
|
duration_secs: t0.elapsed().as_secs_f32(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn embed_text(e: &PlaybookEntry) -> String {
|
||||||
|
// Compact one-liner per playbook. Excludes timestamp (no semantic
|
||||||
|
// signal) and includes the fills as words (they're occasionally
|
||||||
|
// meaningful — "Luis Harris" might semantically correlate with
|
||||||
|
// Spanish-speaker names in future queries).
|
||||||
|
format!(
|
||||||
|
"{} | {} | {} | fills: {}",
|
||||||
|
e.operation,
|
||||||
|
e.approach,
|
||||||
|
e.context,
|
||||||
|
e.endorsed_names.join(", "),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Derive a stable id from (timestamp, operation). Two playbooks with
|
||||||
|
/// identical timestamp+operation collapse to one — benign dedup.
|
||||||
|
fn stable_id(ts: &str, op: &str) -> String {
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
|
let mut h = Sha256::new();
|
||||||
|
h.update(ts.as_bytes());
|
||||||
|
h.update(b"|");
|
||||||
|
h.update(op.as_bytes());
|
||||||
|
let bytes = h.finalize();
|
||||||
|
format!("pb-{}", hex_short(&bytes, 12))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hex_short(b: &[u8], n: usize) -> String {
|
||||||
|
let mut s = String::with_capacity(n * 2);
|
||||||
|
for byte in &b[..b.len().min(n)] {
|
||||||
|
s.push_str(&format!("{byte:02x}"));
|
||||||
|
}
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse "fill: Welder x2 in Toledo, OH" → ("Toledo", "OH").
|
||||||
|
/// Returns None for malformed operations.
|
||||||
|
fn parse_city_state(op: &str) -> (Option<String>, Option<String>) {
|
||||||
|
// Split on " in " then parse "City, ST"
|
||||||
|
let after_in = match op.split(" in ").nth(1) {
|
||||||
|
Some(s) => s,
|
||||||
|
None => return (None, None),
|
||||||
|
};
|
||||||
|
let parts: Vec<&str> = after_in.splitn(2, ',').collect();
|
||||||
|
if parts.len() != 2 {
|
||||||
|
return (None, None);
|
||||||
|
}
|
||||||
|
let city = parts[0].trim().to_string();
|
||||||
|
// state might be followed by more context; take leading alpha chars
|
||||||
|
let state: String = parts[1].trim()
|
||||||
|
.chars()
|
||||||
|
.take_while(|c| c.is_ascii_alphabetic())
|
||||||
|
.collect();
|
||||||
|
if city.is_empty() || state.is_empty() {
|
||||||
|
return (None, None);
|
||||||
|
}
|
||||||
|
(Some(city), Some(state))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse "2/2 filled → Matthew Roberts, Amy Davis" → ["Matthew Roberts", "Amy Davis"].
|
||||||
|
fn parse_names(result: &str) -> Vec<String> {
|
||||||
|
// Everything after the arrow; split on ", ".
|
||||||
|
let after_arrow = match result.split('→').nth(1) {
|
||||||
|
Some(s) => s.trim(),
|
||||||
|
None => return Vec::new(),
|
||||||
|
};
|
||||||
|
// Strip trailing noise like "(and N more)" that some emitters add.
|
||||||
|
let cleaned = after_arrow.split(" (").next().unwrap_or(after_arrow);
|
||||||
|
cleaned
|
||||||
|
.split(',')
|
||||||
|
.map(|n| n.trim().to_string())
|
||||||
|
.filter(|n| !n.is_empty())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_city_state_extracts_both() {
|
||||||
|
let (c, s) = parse_city_state("fill: Welder x2 in Toledo, OH");
|
||||||
|
assert_eq!(c.as_deref(), Some("Toledo"));
|
||||||
|
assert_eq!(s.as_deref(), Some("OH"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_city_state_handles_multiword_city() {
|
||||||
|
let (c, s) = parse_city_state("fill: Loader x1 in Grand Rapids, MI");
|
||||||
|
assert_eq!(c.as_deref(), Some("Grand Rapids"));
|
||||||
|
assert_eq!(s.as_deref(), Some("MI"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_city_state_malformed_returns_none() {
|
||||||
|
let (c, s) = parse_city_state("fill: something weird");
|
||||||
|
assert!(c.is_none());
|
||||||
|
assert!(s.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_names_extracts_after_arrow() {
|
||||||
|
let ns = parse_names("2/2 filled → Matthew Roberts, Amy Davis");
|
||||||
|
assert_eq!(ns, vec!["Matthew Roberts".to_string(), "Amy Davis".to_string()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_names_handles_single_fill() {
|
||||||
|
let ns = parse_names("1/1 filled → Jose Reed");
|
||||||
|
assert_eq!(ns, vec!["Jose Reed".to_string()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_names_handles_no_arrow() {
|
||||||
|
let ns = parse_names("0/2 filled");
|
||||||
|
assert!(ns.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stable_id_is_deterministic() {
|
||||||
|
let a = stable_id("2026-04-20T00:00:00Z", "fill: Welder x2 in Toledo, OH");
|
||||||
|
let b = stable_id("2026-04-20T00:00:00Z", "fill: Welder x2 in Toledo, OH");
|
||||||
|
assert_eq!(a, b);
|
||||||
|
assert!(a.starts_with("pb-"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn boost_caps_per_worker() {
|
||||||
|
// Even with 100 similar playbooks all endorsing the same name, the
|
||||||
|
// boost never exceeds MAX_BOOST_PER_WORKER.
|
||||||
|
let pm = PlaybookMemory::new(Arc::new(object_store::memory::InMemory::new()));
|
||||||
|
let entries: Vec<PlaybookEntry> = (0..100)
|
||||||
|
.map(|i| PlaybookEntry {
|
||||||
|
playbook_id: format!("pb-{i}"),
|
||||||
|
operation: "fill: Welder x1 in Toledo, OH".into(),
|
||||||
|
approach: "transfer".into(),
|
||||||
|
context: "".into(),
|
||||||
|
timestamp: "2026-04-20".into(),
|
||||||
|
endorsed_names: vec!["Deborah Powell".into()],
|
||||||
|
city: Some("Toledo".into()),
|
||||||
|
state: Some("OH".into()),
|
||||||
|
embedding: Some(vec![1.0, 0.0, 0.0]),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||||
|
pm.set_entries(entries).await.unwrap();
|
||||||
|
let boosts = pm.compute_boost_for(&[1.0, 0.0, 0.0], 100, 0.5).await;
|
||||||
|
let key = ("Toledo".into(), "OH".into(), "Deborah Powell".into());
|
||||||
|
let entry = boosts.get(&key).expect("boost entry present");
|
||||||
|
assert!(entry.boost <= MAX_BOOST_PER_WORKER + 1e-6,
|
||||||
|
"boost {} exceeded cap {}", entry.boost, MAX_BOOST_PER_WORKER);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -12,7 +12,7 @@ use std::sync::Arc;
|
|||||||
use aibridge::client::{AiClient, EmbedRequest, GenerateRequest};
|
use aibridge::client::{AiClient, EmbedRequest, GenerateRequest};
|
||||||
use catalogd::registry::Registry as CatalogRegistry;
|
use catalogd::registry::Registry as CatalogRegistry;
|
||||||
use storaged::registry::BucketRegistry;
|
use storaged::registry::BucketRegistry;
|
||||||
use crate::{agent, autotune, chunker, embedding_cache, harness, hnsw, index_registry, jobs, lance_backend, promotion, rag, refresh, search, store, supervisor, trial};
|
use crate::{agent, autotune, chunker, embedding_cache, harness, hnsw, index_registry, jobs, lance_backend, playbook_memory, promotion, rag, refresh, search, store, supervisor, trial};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct VectorState {
|
pub struct VectorState {
|
||||||
@ -23,6 +23,9 @@ pub struct VectorState {
|
|||||||
pub hnsw_store: hnsw::HnswStore,
|
pub hnsw_store: hnsw::HnswStore,
|
||||||
pub embedding_cache: embedding_cache::EmbeddingCache,
|
pub embedding_cache: embedding_cache::EmbeddingCache,
|
||||||
pub trial_journal: trial::TrialJournal,
|
pub trial_journal: trial::TrialJournal,
|
||||||
|
/// Federation-aware harness store — resolves eval artifacts to each
|
||||||
|
/// index's recorded bucket, falling back to primary for legacy evals.
|
||||||
|
pub harness_store: harness::HarnessStore,
|
||||||
/// Catalog registry — needed by the Phase C refresh path to mark/clear
|
/// Catalog registry — needed by the Phase C refresh path to mark/clear
|
||||||
/// staleness and look up dataset manifests.
|
/// staleness and look up dataset manifests.
|
||||||
pub catalog: CatalogRegistry,
|
pub catalog: CatalogRegistry,
|
||||||
@ -46,6 +49,10 @@ pub struct VectorState {
|
|||||||
/// ADR-019 hybrid: handles to Lance datasets keyed by index name.
|
/// ADR-019 hybrid: handles to Lance datasets keyed by index name.
|
||||||
/// Lazy-created on first /vectors/lance/* call.
|
/// Lazy-created on first /vectors/lance/* call.
|
||||||
pub lance: lance_backend::LanceRegistry,
|
pub lance: lance_backend::LanceRegistry,
|
||||||
|
/// Phase 19 — meta-index feedback. Embeds past successful_playbooks
|
||||||
|
/// and, when `use_playbook_memory` is set on /vectors/hybrid, boosts
|
||||||
|
/// workers that were actually filled in semantically-similar past ops.
|
||||||
|
pub playbook_memory: playbook_memory::PlaybookMemory,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// What the active-profile singleton records. Narrow — we don't need the
|
/// What the active-profile singleton records. Narrow — we don't need the
|
||||||
@ -63,6 +70,7 @@ pub fn router(state: VectorState) -> Router {
|
|||||||
.route("/index", post(create_index))
|
.route("/index", post(create_index))
|
||||||
.route("/indexes", get(list_indexes))
|
.route("/indexes", get(list_indexes))
|
||||||
.route("/indexes/{name}", get(get_index_meta))
|
.route("/indexes/{name}", get(get_index_meta))
|
||||||
|
.route("/indexes/{name}/bucket", axum::routing::patch(migrate_index_bucket))
|
||||||
.route("/jobs", get(list_jobs))
|
.route("/jobs", get(list_jobs))
|
||||||
.route("/jobs/{id}", get(get_job))
|
.route("/jobs/{id}", get(get_job))
|
||||||
.route("/search", post(search_index))
|
.route("/search", post(search_index))
|
||||||
@ -110,6 +118,12 @@ pub fn router(state: VectorState) -> Router {
|
|||||||
.route("/lance/stats/{index_name}", get(lance_stats))
|
.route("/lance/stats/{index_name}", get(lance_stats))
|
||||||
.route("/lance/scalar-index/{index_name}/{column}", post(lance_build_scalar_index))
|
.route("/lance/scalar-index/{index_name}/{column}", post(lance_build_scalar_index))
|
||||||
.route("/lance/recall/{index_name}", post(lance_recall_harness))
|
.route("/lance/recall/{index_name}", post(lance_recall_harness))
|
||||||
|
// Phase 19: playbook memory — the meta-index feedback loop
|
||||||
|
.route("/playbook_memory/rebuild", post(rebuild_playbook_memory))
|
||||||
|
.route("/playbook_memory/stats", get(playbook_memory_stats))
|
||||||
|
.route("/playbook_memory/seed", post(seed_playbook_memory))
|
||||||
|
.route("/playbook_memory/persist_sql", post(persist_playbook_memory_sql))
|
||||||
|
.route("/playbook_memory/patterns", post(discover_playbook_patterns))
|
||||||
.with_state(state)
|
.with_state(state)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -259,6 +273,174 @@ async fn get_index_meta(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct MigrateBucketRequest {
|
||||||
|
dest_bucket: String,
|
||||||
|
/// If true, delete artifacts from the source bucket after the pointer
|
||||||
|
/// flip. Default false — keeping source copies means a failed migration
|
||||||
|
/// is recoverable by editing IndexMeta.bucket back, and a successful
|
||||||
|
/// migration leaves inspectable forensics until an operator sweeps.
|
||||||
|
#[serde(default)]
|
||||||
|
delete_source: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct MigrateBucketReport {
|
||||||
|
index_name: String,
|
||||||
|
source_bucket: String,
|
||||||
|
dest_bucket: String,
|
||||||
|
/// Artifact keys that were copied (or attempted). Order follows copy order.
|
||||||
|
copied: Vec<String>,
|
||||||
|
/// Artifact prefixes that had nothing to copy (optional files missing,
|
||||||
|
/// trial journal empty, etc).
|
||||||
|
skipped: Vec<String>,
|
||||||
|
/// Subset of `copied` that was subsequently deleted from the source.
|
||||||
|
deleted_source: Vec<String>,
|
||||||
|
duration_secs: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Move an index's artifacts from its current bucket to `dest_bucket`.
|
||||||
|
/// Parquet-backed indexes only — Lance migration needs URI rewriting that
|
||||||
|
/// isn't in scope for this endpoint. Copies the vector data, trial journal,
|
||||||
|
/// promotion file, and auto-generated harness; updates `IndexMeta.bucket`
|
||||||
|
/// last so a mid-flight failure leaves the index still usable at its
|
||||||
|
/// original location. Evicts the `EmbeddingCache` entry so the next load
|
||||||
|
/// re-reads from the new bucket.
|
||||||
|
async fn migrate_index_bucket(
|
||||||
|
State(state): State<VectorState>,
|
||||||
|
Path(name): Path<String>,
|
||||||
|
Json(req): Json<MigrateBucketRequest>,
|
||||||
|
) -> Result<Json<MigrateBucketReport>, (StatusCode, String)> {
|
||||||
|
let t0 = std::time::Instant::now();
|
||||||
|
|
||||||
|
let mut meta = state
|
||||||
|
.index_registry
|
||||||
|
.get(&name)
|
||||||
|
.await
|
||||||
|
.ok_or_else(|| (StatusCode::NOT_FOUND, format!("index '{name}' not found")))?;
|
||||||
|
|
||||||
|
if meta.vector_backend == shared::types::VectorBackend::Lance {
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
"Lance-backed indexes cannot be migrated via this endpoint — \
|
||||||
|
Lance URIs are bucket-specific; a separate migrate_lance tool \
|
||||||
|
is needed".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !state.bucket_registry.contains(&req.dest_bucket) {
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
format!("dest bucket '{}' not registered", req.dest_bucket),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let source_bucket = meta.bucket.clone();
|
||||||
|
if source_bucket == req.dest_bucket {
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
format!("source and dest are both '{source_bucket}' — nothing to migrate"),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let src = state
|
||||||
|
.bucket_registry
|
||||||
|
.get(&source_bucket)
|
||||||
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
|
||||||
|
let dst = state
|
||||||
|
.bucket_registry
|
||||||
|
.get(&req.dest_bucket)
|
||||||
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
|
||||||
|
|
||||||
|
let mut copied: Vec<String> = Vec::new();
|
||||||
|
let mut skipped: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
// 1. Vector data (single parquet file for this backend).
|
||||||
|
copy_key(&src, &dst, &meta.storage_key)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
(StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
format!("copy {}: {e}", meta.storage_key))
|
||||||
|
})?;
|
||||||
|
copied.push(meta.storage_key.clone());
|
||||||
|
|
||||||
|
// 2. Trial journal batches — per-index directory of JSONL files.
|
||||||
|
let trial_prefix = format!("_hnsw_trials/{name}/");
|
||||||
|
let trial_keys = storaged::ops::list(&src, Some(&trial_prefix))
|
||||||
|
.await
|
||||||
|
.unwrap_or_default();
|
||||||
|
if trial_keys.is_empty() {
|
||||||
|
skipped.push(trial_prefix);
|
||||||
|
}
|
||||||
|
for k in &trial_keys {
|
||||||
|
copy_key(&src, &dst, k)
|
||||||
|
.await
|
||||||
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("copy {k}: {e}")))?;
|
||||||
|
copied.push(k.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Promotion file (optional — absent for never-promoted indexes).
|
||||||
|
let promo_key = format!("_hnsw_promotions/{name}.json");
|
||||||
|
match copy_key(&src, &dst, &promo_key).await {
|
||||||
|
Ok(()) => copied.push(promo_key),
|
||||||
|
Err(_) => skipped.push(promo_key),
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Auto-generated harness (optional — absent if agent never ran).
|
||||||
|
let harness_key = format!("_hnsw_evals/{name}_auto.json");
|
||||||
|
match copy_key(&src, &dst, &harness_key).await {
|
||||||
|
Ok(()) => copied.push(harness_key),
|
||||||
|
Err(_) => skipped.push(harness_key),
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Pointer flip — IndexMeta.bucket now points at destination. This
|
||||||
|
// is the commit point; earlier failures leave copies in dest but the
|
||||||
|
// index still usable at source.
|
||||||
|
meta.bucket = req.dest_bucket.clone();
|
||||||
|
state
|
||||||
|
.index_registry
|
||||||
|
.register(meta)
|
||||||
|
.await
|
||||||
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("update meta: {e}")))?;
|
||||||
|
|
||||||
|
// 6. Cache eviction — next load reads the new bucket's parquet.
|
||||||
|
state.embedding_cache.evict(&name).await;
|
||||||
|
|
||||||
|
// 7. Optional source cleanup.
|
||||||
|
let mut deleted_source: Vec<String> = Vec::new();
|
||||||
|
if req.delete_source {
|
||||||
|
for k in &copied {
|
||||||
|
if storaged::ops::delete(&src, k).await.is_ok() {
|
||||||
|
deleted_source.push(k.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Json(MigrateBucketReport {
|
||||||
|
index_name: name,
|
||||||
|
source_bucket,
|
||||||
|
dest_bucket: req.dest_bucket,
|
||||||
|
copied,
|
||||||
|
skipped,
|
||||||
|
deleted_source,
|
||||||
|
duration_secs: t0.elapsed().as_secs_f32(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stream a single object from one bucket to another. Uses the existing
|
||||||
|
/// `storaged::ops` get + put primitives — no native copy in object_store
|
||||||
|
/// across heterogeneous backends (local ↔ S3), so an in-memory hop is
|
||||||
|
/// unavoidable. Bounded by individual object size, which for our parquet
|
||||||
|
/// + jsonl artifacts tops out around a few hundred MB.
|
||||||
|
async fn copy_key(
|
||||||
|
src: &Arc<dyn ObjectStore>,
|
||||||
|
dst: &Arc<dyn ObjectStore>,
|
||||||
|
key: &str,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
let data = storaged::ops::get(src, key).await?;
|
||||||
|
storaged::ops::put(dst, key, data).await
|
||||||
|
}
|
||||||
|
|
||||||
// --- unused legacy function below, kept for reference ---
|
// --- unused legacy function below, kept for reference ---
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
@ -420,6 +602,15 @@ struct HybridRequest {
|
|||||||
/// If false, just return the ranked matches (faster, no Ollama gen).
|
/// If false, just return the ranked matches (faster, no Ollama gen).
|
||||||
#[serde(default = "default_true")]
|
#[serde(default = "default_true")]
|
||||||
generate: bool,
|
generate: bool,
|
||||||
|
/// Phase 19: consult `playbook_memory` and boost workers that past
|
||||||
|
/// similar playbooks successfully filled. Off by default so current
|
||||||
|
/// callers keep deterministic ranking; opt-in unlocks the feedback.
|
||||||
|
#[serde(default)]
|
||||||
|
use_playbook_memory: bool,
|
||||||
|
/// Number of past playbooks to consider when `use_playbook_memory`
|
||||||
|
/// is on. Ignored otherwise. Defaults to 5.
|
||||||
|
#[serde(default)]
|
||||||
|
playbook_memory_k: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_true() -> bool { true }
|
fn default_true() -> bool { true }
|
||||||
@ -442,8 +633,18 @@ struct HybridSource {
|
|||||||
chunk_text: String,
|
chunk_text: String,
|
||||||
score: f32,
|
score: f32,
|
||||||
sql_verified: bool,
|
sql_verified: bool,
|
||||||
|
/// Phase 19: how much the playbook_memory boost lifted this hit's
|
||||||
|
/// score. 0.0 when `use_playbook_memory=false` or no past playbook
|
||||||
|
/// endorsed this worker.
|
||||||
|
#[serde(default, skip_serializing_if = "is_zero")]
|
||||||
|
playbook_boost: f32,
|
||||||
|
/// playbook_ids whose endorsement contributed to `playbook_boost`.
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
playbook_citations: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_zero(x: &f32) -> bool { x.abs() < 1e-6 }
|
||||||
|
|
||||||
async fn hybrid_search(
|
async fn hybrid_search(
|
||||||
State(state): State<VectorState>,
|
State(state): State<VectorState>,
|
||||||
Json(req): Json<HybridRequest>,
|
Json(req): Json<HybridRequest>,
|
||||||
@ -556,6 +757,11 @@ async fn hybrid_search(
|
|||||||
.and_then(|m| m.id_prefix.clone());
|
.and_then(|m| m.id_prefix.clone());
|
||||||
|
|
||||||
let sql_count = valid_ids.as_ref().map(|s| s.len()).unwrap_or(0);
|
let sql_count = valid_ids.as_ref().map(|s| s.len()).unwrap_or(0);
|
||||||
|
// Phase 19: when playbook_memory is consulted, pull a wider candidate
|
||||||
|
// pool so endorsed workers outside the vanilla top-K can still be
|
||||||
|
// boosted into visibility. 5× is a conservative multiplier — plenty
|
||||||
|
// for a +0.25 boost to flip rankings without dragging the cost up.
|
||||||
|
let fetch_k = if req.use_playbook_memory { req.top_k * 5 } else { req.top_k };
|
||||||
let filtered: Vec<search::SearchResult> = if let Some(ref ids) = valid_ids {
|
let filtered: Vec<search::SearchResult> = if let Some(ref ids) = valid_ids {
|
||||||
all_results.into_iter()
|
all_results.into_iter()
|
||||||
.filter(|r| {
|
.filter(|r| {
|
||||||
@ -572,20 +778,54 @@ async fn hybrid_search(
|
|||||||
};
|
};
|
||||||
ids.contains(raw_id)
|
ids.contains(raw_id)
|
||||||
})
|
})
|
||||||
.take(req.top_k)
|
.take(fetch_k)
|
||||||
.collect()
|
.collect()
|
||||||
} else {
|
} else {
|
||||||
all_results.into_iter().take(req.top_k).collect()
|
all_results.into_iter().take(fetch_k).collect()
|
||||||
};
|
};
|
||||||
|
|
||||||
// Step 4: Build sources with SQL-verified flag.
|
// Step 4: Build sources with SQL-verified flag.
|
||||||
let sources: Vec<HybridSource> = filtered.iter().map(|r| HybridSource {
|
let mut sources: Vec<HybridSource> = filtered.iter().map(|r| HybridSource {
|
||||||
doc_id: r.doc_id.clone(),
|
doc_id: r.doc_id.clone(),
|
||||||
chunk_text: r.chunk_text.clone(),
|
chunk_text: r.chunk_text.clone(),
|
||||||
score: r.score,
|
score: r.score,
|
||||||
sql_verified: valid_ids.is_some(),
|
sql_verified: valid_ids.is_some(),
|
||||||
|
playbook_boost: 0.0,
|
||||||
|
playbook_citations: Vec::new(),
|
||||||
}).collect();
|
}).collect();
|
||||||
|
|
||||||
|
// Step 4b (Phase 19): if use_playbook_memory, look up semantically
|
||||||
|
// similar past playbooks and boost workers they endorsed. Name-match
|
||||||
|
// is on the tuple (city, state, name) extracted from chunk_text —
|
||||||
|
// hybrid_search's SQL filter already narrowed to one city+state, so
|
||||||
|
// this just needs to check the name against each playbook's endorsed
|
||||||
|
// set. Additive boost on the existing vector score, then re-sort.
|
||||||
|
if req.use_playbook_memory {
|
||||||
|
let boost_k = req.playbook_memory_k.unwrap_or(playbook_memory::DEFAULT_TOP_K_PLAYBOOKS);
|
||||||
|
// We embedded the question as `qv` above — reuse it for the
|
||||||
|
// playbook similarity lookup so we don't double-pay Ollama.
|
||||||
|
let boosts = state.playbook_memory.compute_boost_for(&qv, boost_k, 0.5).await;
|
||||||
|
for src in sources.iter_mut() {
|
||||||
|
// Parse "{Name} — {Role} in {City}, {State}. …" chunk. Being
|
||||||
|
// defensive: chunks from other datasets may not follow this
|
||||||
|
// exact shape, so absent fields just skip the boost.
|
||||||
|
if let Some((name, city, state)) = parse_worker_chunk(&src.chunk_text) {
|
||||||
|
let key = (city, state, name);
|
||||||
|
if let Some(entry) = boosts.get(&key) {
|
||||||
|
src.score += entry.boost;
|
||||||
|
src.playbook_boost = entry.boost;
|
||||||
|
src.playbook_citations = entry.citations.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Re-rank: boosted scores can flip ordering.
|
||||||
|
sources.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
// Finally trim to the caller's requested top_k — we pulled fetch_k
|
||||||
|
// (5× wider) above specifically so the boost could reach workers
|
||||||
|
// that would otherwise have been trimmed pre-boost.
|
||||||
|
sources.truncate(req.top_k);
|
||||||
|
}
|
||||||
|
|
||||||
// Step 5: Generate answer if requested.
|
// Step 5: Generate answer if requested.
|
||||||
let answer = if req.generate && !sources.is_empty() {
|
let answer = if req.generate && !sources.is_empty() {
|
||||||
let context: String = sources.iter().enumerate().map(|(i, s)| {
|
let context: String = sources.iter().enumerate().map(|(i, s)| {
|
||||||
@ -734,7 +974,7 @@ async fn run_trial(
|
|||||||
State(state): State<VectorState>,
|
State(state): State<VectorState>,
|
||||||
Json(req): Json<TrialRequest>,
|
Json(req): Json<TrialRequest>,
|
||||||
) -> Result<Json<trial::Trial>, (StatusCode, String)> {
|
) -> Result<Json<trial::Trial>, (StatusCode, String)> {
|
||||||
let mut harness_set = harness::EvalSet::load(&state.store, &req.harness)
|
let mut harness_set = state.harness_store.load_for_index(&req.index_name, &req.harness)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| (StatusCode::NOT_FOUND, format!("harness not found: {e}")))?;
|
.map_err(|e| (StatusCode::NOT_FOUND, format!("harness not found: {e}")))?;
|
||||||
|
|
||||||
@ -764,8 +1004,8 @@ async fn run_trial(
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
|
||||||
tracing::info!("trial: ground truth built in {:.1}s", t0.elapsed().as_secs_f32());
|
tracing::info!("trial: ground truth built in {:.1}s", t0.elapsed().as_secs_f32());
|
||||||
harness_set
|
state.harness_store
|
||||||
.save(&state.store)
|
.save(&harness_set)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save harness: {e}")))?;
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save harness: {e}")))?;
|
||||||
}
|
}
|
||||||
@ -890,17 +1130,14 @@ async fn best_trial(
|
|||||||
// --- Harness management ---
|
// --- Harness management ---
|
||||||
|
|
||||||
async fn list_evals(State(state): State<VectorState>) -> impl IntoResponse {
|
async fn list_evals(State(state): State<VectorState>) -> impl IntoResponse {
|
||||||
match harness::EvalSet::list(&state.store).await {
|
Json(state.harness_store.list_all().await)
|
||||||
Ok(names) => Ok(Json(names)),
|
|
||||||
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_eval(
|
async fn get_eval(
|
||||||
State(state): State<VectorState>,
|
State(state): State<VectorState>,
|
||||||
Path(name): Path<String>,
|
Path(name): Path<String>,
|
||||||
) -> impl IntoResponse {
|
) -> impl IntoResponse {
|
||||||
match harness::EvalSet::load(&state.store, &name).await {
|
match state.harness_store.get_any(&name).await {
|
||||||
Ok(e) => Ok(Json(e)),
|
Ok(e) => Ok(Json(e)),
|
||||||
Err(err) => Err((StatusCode::NOT_FOUND, err)),
|
Err(err) => Err((StatusCode::NOT_FOUND, err)),
|
||||||
}
|
}
|
||||||
@ -916,7 +1153,7 @@ async fn put_eval(
|
|||||||
.queries
|
.queries
|
||||||
.iter()
|
.iter()
|
||||||
.all(|q| q.ground_truth.is_some());
|
.all(|q| q.ground_truth.is_some());
|
||||||
match harness_set.save(&state.store).await {
|
match state.harness_store.save(&harness_set).await {
|
||||||
Ok(()) => Ok(Json(harness_set)),
|
Ok(()) => Ok(Json(harness_set)),
|
||||||
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
|
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
|
||||||
}
|
}
|
||||||
@ -957,8 +1194,8 @@ async fn autogen_eval(
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ground truth: {e}")))?;
|
||||||
|
|
||||||
harness_set
|
state.harness_store
|
||||||
.save(&state.store)
|
.save(&harness_set)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save: {e}")))?;
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("save: {e}")))?;
|
||||||
|
|
||||||
@ -1407,7 +1644,12 @@ async fn profile_scoped_search(
|
|||||||
let lance_store = state.lance.store_for(&req.index_name).await
|
let lance_store = state.lance.store_for(&req.index_name).await
|
||||||
.map_err(|e| (StatusCode::BAD_REQUEST, e))?;
|
.map_err(|e| (StatusCode::BAD_REQUEST, e))?;
|
||||||
let t0 = std::time::Instant::now();
|
let t0 = std::time::Instant::now();
|
||||||
match lance_store.search(&query_vec, top_k).await {
|
match lance_store.search(
|
||||||
|
&query_vec,
|
||||||
|
top_k,
|
||||||
|
Some(LANCE_DEFAULT_NPROBES),
|
||||||
|
Some(LANCE_DEFAULT_REFINE_FACTOR),
|
||||||
|
).await {
|
||||||
Ok(hits) => Ok(Json(serde_json::json!({
|
Ok(hits) => Ok(Json(serde_json::json!({
|
||||||
"profile": profile.id,
|
"profile": profile.id,
|
||||||
"source": index_meta.source,
|
"source": index_meta.source,
|
||||||
@ -1516,6 +1758,7 @@ async fn run_autotune_endpoint(
|
|||||||
&state.index_registry,
|
&state.index_registry,
|
||||||
&state.trial_journal,
|
&state.trial_journal,
|
||||||
&state.promotion_registry,
|
&state.promotion_registry,
|
||||||
|
&state.harness_store,
|
||||||
&state.job_tracker,
|
&state.job_tracker,
|
||||||
).await {
|
).await {
|
||||||
Ok(result) => Ok(Json(result)),
|
Ok(result) => Ok(Json(result)),
|
||||||
@ -1636,8 +1879,25 @@ struct LanceSearchRequest {
|
|||||||
query: String,
|
query: String,
|
||||||
#[serde(default = "default_top_k")]
|
#[serde(default = "default_top_k")]
|
||||||
top_k: usize,
|
top_k: usize,
|
||||||
|
/// IVF partitions to probe. `None` uses Lance's built-in default of
|
||||||
|
/// 1, which caps recall well below the index's real capability.
|
||||||
|
/// Recommended: 5–10% of num_partitions (≈20 for a 316-partition
|
||||||
|
/// index). Omitting it here picks the server-side default.
|
||||||
|
#[serde(default)]
|
||||||
|
nprobes: Option<usize>,
|
||||||
|
/// Refine factor — re-rank `top_k * factor` PQ-approximate candidates
|
||||||
|
/// with exact distances before returning `top_k`. Recovers recall
|
||||||
|
/// lost to product quantization.
|
||||||
|
#[serde(default)]
|
||||||
|
refine_factor: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Server-side defaults when the caller doesn't pin nprobes / refine
|
||||||
|
/// themselves. Tuned for the ~100K × 768d reference workload; see
|
||||||
|
/// docs/ADR-019-vector-storage.md for the recall / latency trade-off.
|
||||||
|
const LANCE_DEFAULT_NPROBES: usize = 20;
|
||||||
|
const LANCE_DEFAULT_REFINE_FACTOR: u32 = 5;
|
||||||
|
|
||||||
fn default_top_k() -> usize { 5 }
|
fn default_top_k() -> usize { 5 }
|
||||||
|
|
||||||
/// Vector search against a Lance dataset. Embeds the query text via the
|
/// Vector search against a Lance dataset. Embeds the query text via the
|
||||||
@ -1660,7 +1920,9 @@ async fn lance_search(
|
|||||||
.map_err(|e| (StatusCode::BAD_REQUEST, e))?;
|
.map_err(|e| (StatusCode::BAD_REQUEST, e))?;
|
||||||
|
|
||||||
let t0 = std::time::Instant::now();
|
let t0 = std::time::Instant::now();
|
||||||
let hits = lance_store.search(&qv, req.top_k).await
|
let nprobes = req.nprobes.or(Some(LANCE_DEFAULT_NPROBES));
|
||||||
|
let refine = req.refine_factor.or(Some(LANCE_DEFAULT_REFINE_FACTOR));
|
||||||
|
let hits = lance_store.search(&qv, req.top_k, nprobes, refine).await
|
||||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e))?;
|
||||||
|
|
||||||
Ok(Json(serde_json::json!({
|
Ok(Json(serde_json::json!({
|
||||||
@ -1761,6 +2023,12 @@ struct LanceRecallRequest {
|
|||||||
harness: String,
|
harness: String,
|
||||||
#[serde(default = "default_top_k")]
|
#[serde(default = "default_top_k")]
|
||||||
top_k: usize,
|
top_k: usize,
|
||||||
|
/// Override server defaults so operators can sweep nprobes /
|
||||||
|
/// refine_factor to chart the recall-vs-latency curve.
|
||||||
|
#[serde(default)]
|
||||||
|
nprobes: Option<usize>,
|
||||||
|
#[serde(default)]
|
||||||
|
refine_factor: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize)]
|
#[derive(serde::Serialize)]
|
||||||
@ -1784,6 +2052,214 @@ struct LanceRecallQuery {
|
|||||||
hits_returned: usize,
|
hits_returned: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Phase 19: playbook memory endpoints ---
|
||||||
|
|
||||||
|
/// Extract (name, city, state) from a chunk formatted like
|
||||||
|
/// "{Name} — {Role} in {City}, {State}. Skills: …".
|
||||||
|
/// Returns None if the chunk doesn't match the shape; callers simply
|
||||||
|
/// skip the boost for that hit.
|
||||||
|
fn parse_worker_chunk(chunk: &str) -> Option<(String, String, String)> {
|
||||||
|
// "Name — Role in City, ST. …" → split on "—" then " in " then ","
|
||||||
|
let (name_part, rest) = chunk.split_once('—')?;
|
||||||
|
let rest = rest.trim();
|
||||||
|
let (_role, loc_part) = rest.split_once(" in ")?;
|
||||||
|
let loc_part = loc_part.trim();
|
||||||
|
let (city, state_plus) = loc_part.split_once(',')?;
|
||||||
|
let state: String = state_plus.trim()
|
||||||
|
.chars()
|
||||||
|
.take_while(|c| c.is_ascii_alphabetic())
|
||||||
|
.collect();
|
||||||
|
let name = name_part.trim().to_string();
|
||||||
|
let city = city.trim().to_string();
|
||||||
|
if name.is_empty() || city.is_empty() || state.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some((name, city, state))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct SeedPlaybookRequest {
|
||||||
|
/// One playbook with {operation, approach, context, endorsed_names}.
|
||||||
|
/// City + state are parsed from the operation text.
|
||||||
|
operation: String,
|
||||||
|
#[serde(default)]
|
||||||
|
approach: String,
|
||||||
|
#[serde(default)]
|
||||||
|
context: String,
|
||||||
|
endorsed_names: Vec<String>,
|
||||||
|
/// Append to the existing memory rather than replacing. Default true —
|
||||||
|
/// seeding is a bootstrap/demo tool, not a rebuild substitute.
|
||||||
|
#[serde(default = "default_true")]
|
||||||
|
append: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bootstrap / test-only: inject a playbook entry directly into
|
||||||
|
/// `playbook_memory` without going through `successful_playbooks`. Useful
|
||||||
|
/// when the source dataset has stale or phantom entries (as the initial
|
||||||
|
/// staffing seed did — names that don't correspond to real workers), and
|
||||||
|
/// you want to demonstrate the feedback loop with a known-good fixture.
|
||||||
|
///
|
||||||
|
/// Production path is always `/rebuild` — this endpoint is for operators
|
||||||
|
/// who need to prime the memory before real playbooks accumulate.
|
||||||
|
async fn seed_playbook_memory(
|
||||||
|
State(state): State<VectorState>,
|
||||||
|
Json(req): Json<SeedPlaybookRequest>,
|
||||||
|
) -> impl IntoResponse {
|
||||||
|
// Embed the entry through the same text shape `rebuild` uses so
|
||||||
|
// similarity math is comparable across seed + real entries.
|
||||||
|
let tmp_entry = playbook_memory::PlaybookEntry {
|
||||||
|
playbook_id: String::new(),
|
||||||
|
operation: req.operation.clone(),
|
||||||
|
approach: req.approach.clone(),
|
||||||
|
context: req.context.clone(),
|
||||||
|
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||||
|
endorsed_names: req.endorsed_names.clone(),
|
||||||
|
city: None, state: None, embedding: None,
|
||||||
|
};
|
||||||
|
let text = format!(
|
||||||
|
"{} | {} | {} | fills: {}",
|
||||||
|
tmp_entry.operation, tmp_entry.approach, tmp_entry.context,
|
||||||
|
tmp_entry.endorsed_names.join(", "),
|
||||||
|
);
|
||||||
|
let resp = match state.ai_client.embed(EmbedRequest { texts: vec![text], model: None }).await {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => return Err((StatusCode::BAD_GATEWAY, format!("embed seed: {e}"))),
|
||||||
|
};
|
||||||
|
if resp.embeddings.is_empty() {
|
||||||
|
return Err((StatusCode::BAD_GATEWAY, "embed returned nothing".into()));
|
||||||
|
}
|
||||||
|
let emb: Vec<f32> = resp.embeddings[0].iter().map(|&x| x as f32).collect();
|
||||||
|
|
||||||
|
// Parse city/state from the operation ("fill: Role xN in City, ST").
|
||||||
|
// Parser lives in playbook_memory::rebuild — expose via a tiny helper
|
||||||
|
// or inline the same logic here; duplicated briefly since this seed
|
||||||
|
// path is stable but infrequently called.
|
||||||
|
let (city, state_) = {
|
||||||
|
let after_in = req.operation.split(" in ").nth(1).unwrap_or("");
|
||||||
|
let mut parts = after_in.splitn(2, ',');
|
||||||
|
let city = parts.next().map(|s| s.trim().to_string()).filter(|s| !s.is_empty());
|
||||||
|
let state = parts.next().map(|s| s.trim().chars().take_while(|c| c.is_ascii_alphabetic()).collect::<String>()).filter(|s| !s.is_empty());
|
||||||
|
(city, state)
|
||||||
|
};
|
||||||
|
if city.is_none() || state_.is_none() {
|
||||||
|
return Err((StatusCode::BAD_REQUEST,
|
||||||
|
"operation must match 'fill: Role xN in City, ST' shape".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stable id: hash of timestamp + operation. Callers get the id back
|
||||||
|
// so they can reference it in citations.
|
||||||
|
let ts = chrono::Utc::now().to_rfc3339();
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
|
let mut h = Sha256::new();
|
||||||
|
h.update(ts.as_bytes());
|
||||||
|
h.update(b"|");
|
||||||
|
h.update(req.operation.as_bytes());
|
||||||
|
let bytes = h.finalize();
|
||||||
|
let pid = format!("pb-seed-{}", bytes.iter().take(8).map(|b| format!("{b:02x}")).collect::<String>());
|
||||||
|
|
||||||
|
let new_entry = playbook_memory::PlaybookEntry {
|
||||||
|
playbook_id: pid.clone(),
|
||||||
|
operation: req.operation,
|
||||||
|
approach: req.approach,
|
||||||
|
context: req.context,
|
||||||
|
timestamp: ts,
|
||||||
|
endorsed_names: req.endorsed_names,
|
||||||
|
city, state: state_,
|
||||||
|
embedding: Some(emb),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut current = state.playbook_memory.snapshot().await;
|
||||||
|
if req.append {
|
||||||
|
current.push(new_entry);
|
||||||
|
} else {
|
||||||
|
current = vec![new_entry];
|
||||||
|
}
|
||||||
|
if let Err(e) = state.playbook_memory.set_entries(current).await {
|
||||||
|
return Err((StatusCode::INTERNAL_SERVER_ERROR, format!("persist: {e}")));
|
||||||
|
}
|
||||||
|
Ok(Json(serde_json::json!({ "playbook_id": pid, "entries_after": state.playbook_memory.entry_count().await })))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn rebuild_playbook_memory(
|
||||||
|
State(state): State<VectorState>,
|
||||||
|
) -> impl IntoResponse {
|
||||||
|
match playbook_memory::rebuild(
|
||||||
|
&state.playbook_memory,
|
||||||
|
&state.ai_client,
|
||||||
|
&state.catalog,
|
||||||
|
&state.bucket_registry,
|
||||||
|
).await {
|
||||||
|
Ok(report) => Ok(Json(report)),
|
||||||
|
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Path 2 foundation — dump in-memory playbook_memory state to a fresh
|
||||||
|
// `successful_playbooks_live` dataset. Cheap to call (writes one parquet,
|
||||||
|
// updates one manifest), so /log can call it after every seed to keep the
|
||||||
|
// SQL-queryable surface honest without the destructive REPLACE bug that
|
||||||
|
// /ingest/file has.
|
||||||
|
async fn persist_playbook_memory_sql(
|
||||||
|
State(state): State<VectorState>,
|
||||||
|
) -> impl IntoResponse {
|
||||||
|
match playbook_memory::persist_to_sql(&state.playbook_memory, &state.catalog).await {
|
||||||
|
Ok(report) => Ok(Json(report)),
|
||||||
|
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct PatternsRequest {
|
||||||
|
query: String,
|
||||||
|
#[serde(default = "default_pattern_k")]
|
||||||
|
top_k_playbooks: usize,
|
||||||
|
/// Minimum frequency (0.0-1.0) for a trait to make the report.
|
||||||
|
/// Default 0.4 — at least 40% of examined workers must share it.
|
||||||
|
#[serde(default = "default_pattern_min_freq")]
|
||||||
|
min_trait_frequency: f32,
|
||||||
|
}
|
||||||
|
fn default_pattern_k() -> usize { 10 }
|
||||||
|
fn default_pattern_min_freq() -> f32 { 0.4 }
|
||||||
|
|
||||||
|
// Path 2 — meta-index discovery surface. "What did past similar fills
|
||||||
|
// have in common that I didn't ask about?" — surfaces signals like
|
||||||
|
// recurring certifications, skill clusters, archetype tendencies.
|
||||||
|
async fn discover_playbook_patterns(
|
||||||
|
State(state): State<VectorState>,
|
||||||
|
Json(req): Json<PatternsRequest>,
|
||||||
|
) -> impl IntoResponse {
|
||||||
|
match playbook_memory::discover_patterns(
|
||||||
|
&state.playbook_memory,
|
||||||
|
&state.ai_client,
|
||||||
|
&state.catalog,
|
||||||
|
&state.bucket_registry,
|
||||||
|
&req.query,
|
||||||
|
req.top_k_playbooks,
|
||||||
|
req.min_trait_frequency,
|
||||||
|
).await {
|
||||||
|
Ok(report) => Ok(Json(report)),
|
||||||
|
Err(e) => Err((StatusCode::INTERNAL_SERVER_ERROR, e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn playbook_memory_stats(
|
||||||
|
State(state): State<VectorState>,
|
||||||
|
) -> impl IntoResponse {
|
||||||
|
let entries = state.playbook_memory.snapshot().await;
|
||||||
|
Json(serde_json::json!({
|
||||||
|
"entries": entries.len(),
|
||||||
|
"total_names_endorsed": entries.iter().map(|e| e.endorsed_names.len()).sum::<usize>(),
|
||||||
|
"entries_with_embeddings": entries.iter().filter(|e| e.embedding.is_some()).count(),
|
||||||
|
"sample": entries.iter().take(3).map(|e| serde_json::json!({
|
||||||
|
"id": e.playbook_id,
|
||||||
|
"operation": e.operation,
|
||||||
|
"city": e.city,
|
||||||
|
"state": e.state,
|
||||||
|
"endorsed": e.endorsed_names,
|
||||||
|
})).collect::<Vec<_>>(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
async fn lance_recall_harness(
|
async fn lance_recall_harness(
|
||||||
State(state): State<VectorState>,
|
State(state): State<VectorState>,
|
||||||
Path(index_name): Path<String>,
|
Path(index_name): Path<String>,
|
||||||
@ -1791,7 +2267,7 @@ async fn lance_recall_harness(
|
|||||||
) -> impl IntoResponse {
|
) -> impl IntoResponse {
|
||||||
let t0 = std::time::Instant::now();
|
let t0 = std::time::Instant::now();
|
||||||
|
|
||||||
let harness_set = harness::EvalSet::load(&state.store, &req.harness).await
|
let harness_set = state.harness_store.load_for_index(&index_name, &req.harness).await
|
||||||
.map_err(|e| (StatusCode::NOT_FOUND, format!("harness: {e}")))?;
|
.map_err(|e| (StatusCode::NOT_FOUND, format!("harness: {e}")))?;
|
||||||
if !harness_set.ground_truth_built {
|
if !harness_set.ground_truth_built {
|
||||||
return Err((StatusCode::BAD_REQUEST,
|
return Err((StatusCode::BAD_REQUEST,
|
||||||
@ -1817,7 +2293,12 @@ async fn lance_recall_harness(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let qt0 = std::time::Instant::now();
|
let qt0 = std::time::Instant::now();
|
||||||
let hits = lance_store.search(qv, k).await
|
let hits = lance_store.search(
|
||||||
|
qv,
|
||||||
|
k,
|
||||||
|
Some(req.nprobes.unwrap_or(LANCE_DEFAULT_NPROBES)),
|
||||||
|
Some(req.refine_factor.unwrap_or(LANCE_DEFAULT_REFINE_FACTOR)),
|
||||||
|
).await
|
||||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("search: {e}")))?;
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("search: {e}")))?;
|
||||||
let lat_us = qt0.elapsed().as_micros() as f32;
|
let lat_us = qt0.elapsed().as_micros() as f32;
|
||||||
|
|
||||||
|
|||||||
@ -74,7 +74,10 @@ server.tool(
|
|||||||
top_k: z.number().default(5),
|
top_k: z.number().default(5),
|
||||||
},
|
},
|
||||||
async ({ question, sql_filter, dataset, id_column, top_k }) => {
|
async ({ question, sql_filter, dataset, id_column, top_k }) => {
|
||||||
const body: any = { question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true };
|
const body: any = {
|
||||||
|
question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true,
|
||||||
|
use_playbook_memory: true,
|
||||||
|
};
|
||||||
if (sql_filter) body.sql_filter = sql_filter;
|
if (sql_filter) body.sql_filter = sql_filter;
|
||||||
const r = await api("POST", "/vectors/hybrid", body);
|
const r = await api("POST", "/vectors/hybrid", body);
|
||||||
return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] };
|
return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] };
|
||||||
@ -109,6 +112,7 @@ server.tool(
|
|||||||
index_name: "workers_500k_v1", sql_filter: filter,
|
index_name: "workers_500k_v1", sql_filter: filter,
|
||||||
filter_dataset: "ethereal_workers", id_column: "worker_id",
|
filter_dataset: "ethereal_workers", id_column: "worker_id",
|
||||||
top_k: headcount * 2, generate: false,
|
top_k: headcount * 2, generate: false,
|
||||||
|
use_playbook_memory: true,
|
||||||
});
|
});
|
||||||
let matches = r.sources || [];
|
let matches = r.sources || [];
|
||||||
if (required_certs.length > 0) {
|
if (required_certs.length > 0) {
|
||||||
@ -384,6 +388,11 @@ async function main() {
|
|||||||
question: b.question, index_name: b.index || "workers_500k_v1",
|
question: b.question, index_name: b.index || "workers_500k_v1",
|
||||||
sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers",
|
sql_filter: b.sql_filter, filter_dataset: b.dataset || "ethereal_workers",
|
||||||
id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
|
id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
|
||||||
|
use_playbook_memory: b.use_playbook_memory !== false,
|
||||||
|
// Forward explicitly so Bun /search isn't capped by the
|
||||||
|
// server's default — boost silently misses good matches when
|
||||||
|
// memory has >25 entries and only top-5 playbooks are scanned.
|
||||||
|
playbook_memory_k: b.playbook_memory_k ?? 25,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -403,6 +412,8 @@ async function main() {
|
|||||||
index_name: b.index || "workers_500k_v1", sql_filter: filter,
|
index_name: b.index || "workers_500k_v1", sql_filter: filter,
|
||||||
filter_dataset: b.dataset || "ethereal_workers",
|
filter_dataset: b.dataset || "ethereal_workers",
|
||||||
id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false,
|
id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false,
|
||||||
|
use_playbook_memory: true,
|
||||||
|
playbook_memory_k: 25,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -418,14 +429,57 @@ async function main() {
|
|||||||
return ok(await api("POST", "/vectors/rag", { index_name: b.index || "workers_500k_v1", question: b.question, top_k: b.top_k || 5 }));
|
return ok(await api("POST", "/vectors/rag", { index_name: b.index || "workers_500k_v1", question: b.question, top_k: b.top_k || 5 }));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tool: log success
|
// Tool: log success.
|
||||||
|
//
|
||||||
|
// BUG FIX 2026-04-20: previously this also POSTed a 1-row CSV to
|
||||||
|
// /ingest/file?name=successful_playbooks. That endpoint REPLACES
|
||||||
|
// the dataset's object list rather than appending — so every /log
|
||||||
|
// call destroyed all prior rows in the SQL-queryable
|
||||||
|
// successful_playbooks table. Chain-of-custody trace caught it:
|
||||||
|
// sp_rows went 33 → 1 in a single /log call.
|
||||||
|
//
|
||||||
|
// Until a proper append endpoint exists (Phase 8 delta write
|
||||||
|
// surface for the SQL table), /log writes ONLY to playbook_memory
|
||||||
|
// (in-memory append-only store, works correctly for boost). The
|
||||||
|
// SQL successful_playbooks table is now treated as derived state
|
||||||
|
// that gets rebuilt explicitly via /vectors/playbook_memory/rebuild
|
||||||
|
// — never written to by the recruiter path.
|
||||||
if (url.pathname === "/log") {
|
if (url.pathname === "/log") {
|
||||||
const b = await json();
|
const b = await json();
|
||||||
const csv = `timestamp,operation,approach,result,context\n"${new Date().toISOString()}","${(b.operation||"").replace(/"/g,'""')}","${(b.approach||"").replace(/"/g,'""')}","${(b.result||"").replace(/"/g,'""')}","${(b.context||"").replace(/"/g,'""')}"`;
|
// Result format expected: "{filled}/{needed} filled → Name1, Name2, Name3"
|
||||||
const form = new FormData();
|
const result = String(b.result || "");
|
||||||
form.append("file", new Blob([csv], { type: "text/csv" }), "playbook.csv");
|
const arrowIdx = result.indexOf("→");
|
||||||
const r = await fetch(`${BASE}/ingest/file?name=successful_playbooks`, { method: "POST", body: form });
|
const namesPart = arrowIdx >= 0 ? result.slice(arrowIdx + 1) : "";
|
||||||
return ok({ logged: true, response: await r.text() });
|
const endorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean);
|
||||||
|
let seeded = 0;
|
||||||
|
let persisted_rows = 0;
|
||||||
|
if (endorsed.length && /fill:.+ in .+,.+/i.test(String(b.operation || ""))) {
|
||||||
|
const canonicalApproach = `${(b.approach || "manual log").split(/[\.\n]/)[0]}`.slice(0, 80);
|
||||||
|
const canonicalContext = `${(b.context || "").split(/[\.\n]/)[0]}`.slice(0, 80);
|
||||||
|
const seedRes = await api("POST", "/vectors/playbook_memory/seed", {
|
||||||
|
operation: b.operation,
|
||||||
|
approach: canonicalApproach,
|
||||||
|
context: canonicalContext,
|
||||||
|
endorsed_names: endorsed,
|
||||||
|
append: true,
|
||||||
|
}).catch(() => null) as any;
|
||||||
|
if (seedRes && seedRes.playbook_id) {
|
||||||
|
seeded = endorsed.length;
|
||||||
|
// After every successful seed, persist memory → SQL so the
|
||||||
|
// successful_playbooks_live table reflects current operator
|
||||||
|
// activity. /persist_sql writes the FULL state, which is safe
|
||||||
|
// because in-memory playbook_memory IS the source of truth
|
||||||
|
// (no concurrent writer outside this process modifies it).
|
||||||
|
const pr = await api("POST", "/vectors/playbook_memory/persist_sql", {}).catch(() => null) as any;
|
||||||
|
if (pr && typeof pr.rows_persisted === "number") persisted_rows = pr.rows_persisted;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ok({
|
||||||
|
logged: true,
|
||||||
|
seeded,
|
||||||
|
persisted_to_sql: persisted_rows,
|
||||||
|
note: "successful_playbooks_live (NOT successful_playbooks) is the SQL surface for live operator activity. /log is non-destructive.",
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tool: get playbooks
|
// Tool: get playbooks
|
||||||
@ -480,6 +534,7 @@ async function main() {
|
|||||||
question: "reliable forklift operator", index_name: "workers_500k_v1",
|
question: "reliable forklift operator", index_name: "workers_500k_v1",
|
||||||
sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
|
sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
|
||||||
filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
|
filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
|
||||||
|
use_playbook_memory: true,
|
||||||
});
|
});
|
||||||
tests.push({
|
tests.push({
|
||||||
name: "Hybrid SQL+Vector Search", ms: Date.now() - ht0,
|
name: "Hybrid SQL+Vector Search", ms: Date.now() - ht0,
|
||||||
@ -987,6 +1042,7 @@ tr:hover{background:#111827}
|
|||||||
question: "reliable forklift operator", index_name: "workers_500k_v1",
|
question: "reliable forklift operator", index_name: "workers_500k_v1",
|
||||||
sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
|
sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
|
||||||
filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
|
filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
|
||||||
|
use_playbook_memory: true,
|
||||||
});
|
});
|
||||||
tests.push({
|
tests.push({
|
||||||
name: "Hybrid SQL+Vector", ms: Date.now() - ht0,
|
name: "Hybrid SQL+Vector", ms: Date.now() - ht0,
|
||||||
@ -1435,6 +1491,26 @@ const SCENARIOS = [
|
|||||||
|
|
||||||
function pick<T>(arr: T[]): T { return arr[Math.floor(Math.random() * arr.length)]; }
|
function pick<T>(arr: T[]): T { return arr[Math.floor(Math.random() * arr.length)]; }
|
||||||
|
|
||||||
|
// Seed playbook_memory from a filled contract so the next hybrid query
|
||||||
|
// ranks against it. Used by both runWeekSimulation (per-day) and the /log
|
||||||
|
// endpoint (per manual logging). Fail-soft — seeding is best-effort.
|
||||||
|
async function seedPlaybookFromContract(c: any) {
|
||||||
|
const names = (c.matches || []).slice(0, 5)
|
||||||
|
.map((m: any) => m.name || m.doc_id)
|
||||||
|
.filter((n: string) => n && !n.startsWith("W500-"));
|
||||||
|
if (!names.length) return;
|
||||||
|
const op = `fill: ${c.role} x${c.headcount} in ${c.city}, ${c.state}`;
|
||||||
|
try {
|
||||||
|
await api("POST", "/vectors/playbook_memory/seed", {
|
||||||
|
operation: op,
|
||||||
|
approach: `${c.situation || c.priority || "fill"} → hybrid search`,
|
||||||
|
context: `client=${c.client || ""} start=${c.start || ""}`,
|
||||||
|
endorsed_names: names,
|
||||||
|
append: true,
|
||||||
|
});
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
async function runWeekSimulation() {
|
async function runWeekSimulation() {
|
||||||
const days = ["Monday","Tuesday","Wednesday","Thursday","Friday"];
|
const days = ["Monday","Tuesday","Wednesday","Thursday","Friday"];
|
||||||
const staffers = ["Sarah (Lead)","Mike (Senior)","Kim (Junior)"];
|
const staffers = ["Sarah (Lead)","Mike (Senior)","Kim (Junior)"];
|
||||||
@ -1468,7 +1544,7 @@ async function runWeekSimulation() {
|
|||||||
if (priority === "urgent") emergencies++;
|
if (priority === "urgent") emergencies++;
|
||||||
totalNeeded += headcount;
|
totalNeeded += headcount;
|
||||||
|
|
||||||
// Run hybrid search
|
// Run hybrid search — Phase 19: boost on so past playbooks shape ranking
|
||||||
let filled = 0;
|
let filled = 0;
|
||||||
let matches: any[] = [];
|
let matches: any[] = [];
|
||||||
try {
|
try {
|
||||||
@ -1481,12 +1557,15 @@ async function runWeekSimulation() {
|
|||||||
id_column: "worker_id",
|
id_column: "worker_id",
|
||||||
top_k: headcount + 2,
|
top_k: headcount + 2,
|
||||||
generate: false,
|
generate: false,
|
||||||
|
use_playbook_memory: true,
|
||||||
});
|
});
|
||||||
matches = (r.sources || []).slice(0, headcount).map((s: any) => ({
|
matches = (r.sources || []).slice(0, headcount).map((s: any) => ({
|
||||||
doc_id: s.doc_id,
|
doc_id: s.doc_id,
|
||||||
name: s.chunk_text?.split("—")[0]?.trim() || s.doc_id,
|
name: s.chunk_text?.split("—")[0]?.trim() || s.doc_id,
|
||||||
score: s.score,
|
score: s.score,
|
||||||
chunk_text: s.chunk_text || "",
|
chunk_text: s.chunk_text || "",
|
||||||
|
playbook_boost: s.playbook_boost || 0,
|
||||||
|
playbook_citations: s.playbook_citations || [],
|
||||||
}));
|
}));
|
||||||
filled = matches.length;
|
filled = matches.length;
|
||||||
} catch {}
|
} catch {}
|
||||||
@ -1501,7 +1580,15 @@ async function runWeekSimulation() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// End of day: log playbook + prepare handoff
|
// End of day: seed playbook_memory with TODAY's filled contracts so
|
||||||
|
// tomorrow's hybrid search ranks against them. This is the in-week
|
||||||
|
// feedback loop — without this, day 5 doesn't benefit from day 1.
|
||||||
|
for (const c of contracts) {
|
||||||
|
if (c.matches && c.matches.length) {
|
||||||
|
await seedPlaybookFromContract(c).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (d < 4) {
|
if (d < 4) {
|
||||||
handoffs++;
|
handoffs++;
|
||||||
try {
|
try {
|
||||||
@ -1530,29 +1617,18 @@ async function runWeekSimulation() {
|
|||||||
playbook_entries: playbookEntries,
|
playbook_entries: playbookEntries,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Log every filled contract as a playbook entry — this is the training data
|
// BUG FIX 2026-04-20: previously this POSTed a multi-row CSV to
|
||||||
try {
|
// /ingest/file?name=successful_playbooks at end of every simulation.
|
||||||
const ts = new Date().toISOString();
|
// That endpoint REPLACES the dataset's object list — so each
|
||||||
const rows: string[] = [];
|
// /simulation/run wiped the prior simulation's rows. The SQL
|
||||||
for (const day of results) {
|
// successful_playbooks table was never accumulating; it always reflected
|
||||||
for (const c of day.contracts) {
|
// only the most-recent simulation batch.
|
||||||
if (c.matches && c.matches.length > 0) {
|
//
|
||||||
const workerNames = c.matches.slice(0, 3).map((m: any) => m.name || m.doc_id).join(", ");
|
// Per-day per-contract seeding via /vectors/playbook_memory/seed
|
||||||
const op = `fill: ${c.role} x${c.headcount} in ${c.city}, ${c.state}`;
|
// (added Pass 1, runs inside the day loop above) is the path that
|
||||||
const approach = `${c.situation} (${c.priority}) → hybrid search`;
|
// actually accumulates feedback. The SQL successful_playbooks table is
|
||||||
const result = `${c.filled}/${c.headcount} filled → ${workerNames}`;
|
// intentionally not written by /simulation/run anymore until a proper
|
||||||
const context = `client=${c.client} start=${c.start} scenario=${c.situation}`;
|
// append surface exists.
|
||||||
rows.push(`"${ts}","${op.replace(/"/g,'""')}","${approach}","${result.replace(/"/g,'""')}","${context.replace(/"/g,'""')}"`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (rows.length) {
|
|
||||||
const csv = `timestamp,operation,approach,result,context\n${rows.join("\n")}`;
|
|
||||||
const form = new FormData();
|
|
||||||
form.append("file", new Blob([csv], { type: "text/csv" }), "playbook.csv");
|
|
||||||
await fetch(`${BASE}/ingest/file?name=successful_playbooks`, { method: "POST", body: form });
|
|
||||||
}
|
|
||||||
} catch {}
|
|
||||||
|
|
||||||
return { days: results, summary };
|
return { days: results, summary };
|
||||||
}
|
}
|
||||||
|
|||||||
@ -384,11 +384,13 @@ function addContractInsight(parent,c,isUrgent){
|
|||||||
if(isUrgent&&i===0)label='FIRST CHOICE — highest match score, call first';
|
if(isUrgent&&i===0)label='FIRST CHOICE — highest match score, call first';
|
||||||
else if(isUrgent&&i>0&&i<c.headcount)label='';
|
else if(isUrgent&&i>0&&i<c.headcount)label='';
|
||||||
else if(isUrgent&&i>=c.headcount)label='BACKUP — if someone above can\'t make it';
|
else if(isUrgent&&i>=c.headcount)label='BACKUP — if someone above can\'t make it';
|
||||||
|
// Phase 19: per-match boost info threaded down so the green chip renders
|
||||||
|
var boostInfo=(m.playbook_boost>0)?{boost:m.playbook_boost,citations:m.playbook_citations||[]}:null;
|
||||||
addWorkerInsight(cd,w.nm,
|
addWorkerInsight(cd,w.nm,
|
||||||
[w.role,w.loc].filter(Boolean).join(' · '),
|
[w.role,w.loc].filter(Boolean).join(' · '),
|
||||||
label||buildWhyText(w,c),i,
|
label||buildWhyText(w,c),i,
|
||||||
isUrgent&&i===0?'#f85149':isUrgent&&i>=c.headcount?'#484f58':null,
|
isUrgent&&i===0?'#f85149':isUrgent&&i>=c.headcount?'#484f58':null,
|
||||||
w);
|
w,boostInfo);
|
||||||
});
|
});
|
||||||
var remaining=c.matches.length-showCount;
|
var remaining=c.matches.length-showCount;
|
||||||
if(remaining>0){
|
if(remaining>0){
|
||||||
@ -570,12 +572,23 @@ function addWorkerInsight(parent,name,detail,why,idx,highlight){
|
|||||||
if(highlight)w.style.borderLeft='3px solid '+highlight;
|
if(highlight)w.style.borderLeft='3px solid '+highlight;
|
||||||
w.style.cursor='pointer';
|
w.style.cursor='pointer';
|
||||||
var workerDataRef=arguments[6]||null; // passed as 7th arg
|
var workerDataRef=arguments[6]||null; // passed as 7th arg
|
||||||
|
var boostInfo=arguments[7]||null; // {boost, citations} — Phase 19
|
||||||
w.onclick=function(){if(workerDataRef)showProfile(workerDataRef)};
|
w.onclick=function(){if(workerDataRef)showProfile(workerDataRef)};
|
||||||
var av=document.createElement('div');av.className='av';av.style.background=AC[(idx||0)%AC.length];
|
var av=document.createElement('div');av.className='av';av.style.background=AC[(idx||0)%AC.length];
|
||||||
av.textContent=(name||'?').split(' ').map(function(n){return(n[0]||'').toUpperCase()}).join('').substring(0,2);
|
av.textContent=(name||'?').split(' ').map(function(n){return(n[0]||'').toUpperCase()}).join('').substring(0,2);
|
||||||
w.appendChild(av);
|
w.appendChild(av);
|
||||||
var info=document.createElement('div');info.className='info';
|
var info=document.createElement('div');info.className='info';
|
||||||
var nm=document.createElement('div');nm.className='nm';nm.textContent=name;
|
var nm=document.createElement('div');nm.className='nm';nm.textContent=name;
|
||||||
|
// Phase 19: when a past playbook endorsed this worker, show a green chip
|
||||||
|
// next to the name. Hover reveals the citation IDs.
|
||||||
|
if(boostInfo && boostInfo.boost > 0){
|
||||||
|
var chip=document.createElement('span');
|
||||||
|
chip.style.cssText='display:inline-block;margin-left:8px;padding:2px 7px;border-radius:9px;font-size:10px;font-weight:600;background:#0d2818;border:1px solid #2ea043;color:#3fb950;vertical-align:middle';
|
||||||
|
var n=(boostInfo.citations && boostInfo.citations.length) || 0;
|
||||||
|
chip.textContent='Endorsed · '+n+' playbook'+(n!==1?'s':'');
|
||||||
|
chip.title='Boosted by past playbooks: '+(boostInfo.citations||[]).join(', ');
|
||||||
|
nm.appendChild(chip);
|
||||||
|
}
|
||||||
var dt=document.createElement('div');dt.className='detail';dt.textContent=detail;
|
var dt=document.createElement('div');dt.className='detail';dt.textContent=detail;
|
||||||
info.appendChild(nm);info.appendChild(dt);
|
info.appendChild(nm);info.appendChild(dt);
|
||||||
if(why){var wh=document.createElement('div');wh.className='why';wh.textContent=why;info.appendChild(wh)}
|
if(why){var wh=document.createElement('div');wh.className='why';wh.textContent=why;info.appendChild(wh)}
|
||||||
|
|||||||
351
tests/multi-agent/agent.ts
Normal file
351
tests/multi-agent/agent.ts
Normal file
@ -0,0 +1,351 @@
|
|||||||
|
// Shared runtime for one agent. An agent is a role (executor or reviewer),
|
||||||
|
// a model name, and a conversation the orchestrator hands it. The agent
|
||||||
|
// produces ONE structured Action per turn; the orchestrator applies tool
|
||||||
|
// calls and feeds results back.
|
||||||
|
//
|
||||||
|
// Fail-fast: every HTTP error, parse error, and Ollama error throws. The
|
||||||
|
// orchestrator catches at the top and exits non-zero with the full log.
|
||||||
|
|
||||||
|
export const GATEWAY = "http://localhost:3100";
|
||||||
|
export const SIDECAR = "http://localhost:3200";
|
||||||
|
|
||||||
|
// --- Shared types ---
|
||||||
|
|
||||||
|
export type Role = "executor" | "reviewer";
|
||||||
|
|
||||||
|
export interface TaskSpec {
|
||||||
|
id: string;
|
||||||
|
operation: string; // "fill: Welder x2 in Columbus, OH"
|
||||||
|
target_role: string; // "Welder"
|
||||||
|
target_count: number; // 2
|
||||||
|
target_city: string; // "Columbus"
|
||||||
|
target_state: string; // "OH"
|
||||||
|
approach_hint?: string; // e.g. "hybrid search"; agent is free to ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LogEntry {
|
||||||
|
turn: number;
|
||||||
|
role: Role;
|
||||||
|
model: string;
|
||||||
|
at: string;
|
||||||
|
kind:
|
||||||
|
| "plan"
|
||||||
|
| "tool_call"
|
||||||
|
| "tool_result"
|
||||||
|
| "critique"
|
||||||
|
| "propose_done"
|
||||||
|
| "consensus_done"
|
||||||
|
| "error";
|
||||||
|
content: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Action = what an agent returns on one turn. Strict shape so we can
|
||||||
|
// enforce it at parse time rather than prompt-engineer around malformed
|
||||||
|
// JSON.
|
||||||
|
export type Action =
|
||||||
|
| { kind: "tool_call"; tool: string; args: Record<string, any>; rationale: string }
|
||||||
|
| { kind: "propose_done"; fills: Fill[]; rationale: string }
|
||||||
|
| { kind: "critique"; verdict: "continue" | "drift" | "approve_done"; notes: string }
|
||||||
|
| { kind: "plan"; steps: string[] };
|
||||||
|
|
||||||
|
export interface Fill {
|
||||||
|
candidate_id: string;
|
||||||
|
name: string;
|
||||||
|
reason: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- HTTP helpers (fail-fast) ---
|
||||||
|
|
||||||
|
async function http<T>(method: string, url: string, body?: any): Promise<T> {
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method,
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: body ? JSON.stringify(body) : undefined,
|
||||||
|
});
|
||||||
|
if (!res.ok) {
|
||||||
|
const text = await res.text();
|
||||||
|
throw new Error(`${method} ${url} → ${res.status}: ${text}`);
|
||||||
|
}
|
||||||
|
return (await res.json()) as T;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tool calls land in the Phase 12 audit log keyed by this agent name.
|
||||||
|
// Distinguishable from human-driven calls (agent=="operator" or similar)
|
||||||
|
// so post-hoc queries can separate multi-agent runs.
|
||||||
|
export const TOOL_AGENT_ID = "multi-agent-test";
|
||||||
|
|
||||||
|
export async function callTool(tool: string, args: Record<string, any>): Promise<any> {
|
||||||
|
return http("POST", `${GATEWAY}/tools/${tool}/call`, {
|
||||||
|
params: args,
|
||||||
|
agent: TOOL_AGENT_ID,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function hybridSearch(sql_filter: string, question: string, k = 10): Promise<any> {
|
||||||
|
return http("POST", `${GATEWAY}/vectors/hybrid`, { sql_filter, question, k });
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function sqlQuery(sql: string): Promise<any> {
|
||||||
|
return http("POST", `${GATEWAY}/query/sql`, { sql, format: "json" });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sidecar generate. Ollama's default keep_alive (5 min) keeps the model
|
||||||
|
// warm between turns on its own, so we don't need to pass it through.
|
||||||
|
export async function generate(model: string, prompt: string, opts: {
|
||||||
|
max_tokens?: number;
|
||||||
|
temperature?: number;
|
||||||
|
system?: string;
|
||||||
|
} = {}): Promise<string> {
|
||||||
|
const body: Record<string, any> = {
|
||||||
|
model,
|
||||||
|
prompt,
|
||||||
|
temperature: opts.temperature ?? 0.3,
|
||||||
|
max_tokens: opts.max_tokens ?? 800,
|
||||||
|
};
|
||||||
|
if (opts.system) body.system = opts.system;
|
||||||
|
const r = await http<any>("POST", `${SIDECAR}/generate`, body);
|
||||||
|
const text = r.text ?? "";
|
||||||
|
if (!text || typeof text !== "string") {
|
||||||
|
throw new Error(`generate returned empty text from ${model}: ${JSON.stringify(r).slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Prompt construction ---
|
||||||
|
|
||||||
|
const TOOL_CATALOG = `
|
||||||
|
Available tools (each takes a JSON "args" object):
|
||||||
|
|
||||||
|
- hybrid_search(sql_filter: string, question: string, index_name: string, k?: number)
|
||||||
|
→ Narrow workers via SQL WHERE clause, then rank by semantic match.
|
||||||
|
→ Canonical production tool for fill tasks. Always use this FIRST.
|
||||||
|
→ Example args:
|
||||||
|
{"index_name":"workers_500k_v1",
|
||||||
|
"sql_filter":"LOWER(role) LIKE '%weld%' AND city = 'Toledo' AND state = 'OH' AND availability > 0.5",
|
||||||
|
"question":"reliable welder with OSHA certs",
|
||||||
|
"k":10}
|
||||||
|
|
||||||
|
- sql(query: string)
|
||||||
|
→ Raw read-only SELECT. Use for verification (confirm a worker exists,
|
||||||
|
check city/role/availability) after hybrid_search surfaces candidates.
|
||||||
|
→ Schema of workers_500k: worker_id, name, role, email, phone, city,
|
||||||
|
state, zip, skills, certifications, archetype, reliability,
|
||||||
|
responsiveness, engagement, communications, compliance, availability,
|
||||||
|
resume_text.
|
||||||
|
→ Example args:
|
||||||
|
{"query":"SELECT worker_id, name, role, city, state, availability FROM workers_500k WHERE worker_id = 'W123456'"}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- hybrid_search returns sources[] each with {doc_id, chunk_text, score, sql_verified}.
|
||||||
|
- **ID mapping:** vector doc_ids look like "W500K-7995" (prefix + number).
|
||||||
|
The SQL worker_id is an INTEGER. To go from doc_id to SQL, strip the
|
||||||
|
"W500K-" prefix and cast:
|
||||||
|
SELECT ... FROM workers_500k WHERE worker_id = CAST(SUBSTR('W500K-7995', 7) AS BIGINT)
|
||||||
|
or more simply: WHERE worker_id = 7995.
|
||||||
|
- Names are NOT unique. Always identify by worker_id, never by name alone.
|
||||||
|
- Return EXACTLY ONE JSON object per turn. No prose outside the JSON.
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Smart per-kind summary so agents see the substance of each prior turn
|
||||||
|
// without a raw-JSON wall of text. hybrid_search results especially need
|
||||||
|
// this — raw JSON buries sources[] past any reasonable 400-char truncation.
|
||||||
|
function summarizeEntry(e: LogEntry): string {
|
||||||
|
const c = e.content ?? {};
|
||||||
|
switch (e.kind) {
|
||||||
|
case "plan":
|
||||||
|
return `PLAN: ${(c.steps ?? []).map((s: string, i: number) => `${i + 1}.${s}`).join(" ")}`;
|
||||||
|
case "tool_call":
|
||||||
|
return `TOOL_CALL ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 250)})${c.rationale ? ` — ${c.rationale}` : ""}`;
|
||||||
|
case "tool_result": {
|
||||||
|
if (c.error) return `TOOL_RESULT error: ${c.error}`;
|
||||||
|
// hybrid_search response
|
||||||
|
if (Array.isArray(c.sources)) {
|
||||||
|
const head = c.sources.slice(0, 5).map((s: any) =>
|
||||||
|
`${s.doc_id}${s.sql_verified ? "✓" : ""} score=${(s.score ?? 0).toFixed(2)}: ${String(s.chunk_text ?? "").slice(0, 80)}`
|
||||||
|
).join(" | ");
|
||||||
|
return `TOOL_RESULT hybrid: sql_matches=${c.sql_matches} vector_reranked=${c.vector_reranked} sources=[${head}${c.sources.length > 5 ? ` +${c.sources.length - 5} more` : ""}]`;
|
||||||
|
}
|
||||||
|
// sql response
|
||||||
|
if (Array.isArray(c.rows)) {
|
||||||
|
const head = c.rows.slice(0, 5).map((r: any) => JSON.stringify(r)).join(" | ");
|
||||||
|
return `TOOL_RESULT sql: ${c.rows.length} rows${c.rows.length > 0 ? ` — ${head}${c.rows.length > 5 ? ` +${c.rows.length - 5} more` : ""}` : ""}`;
|
||||||
|
}
|
||||||
|
// fallback
|
||||||
|
return `TOOL_RESULT ${JSON.stringify(c).slice(0, 250)}`;
|
||||||
|
}
|
||||||
|
case "critique":
|
||||||
|
return `CRITIQUE verdict=${c.verdict} notes: ${String(c.notes ?? "").slice(0, 200)}`;
|
||||||
|
case "propose_done":
|
||||||
|
return `PROPOSE_DONE fills=[${(c.fills ?? []).map((f: Fill) => `${f.candidate_id}:${f.name}`).join(", ")}] rationale: ${String(c.rationale ?? "").slice(0, 120)}`;
|
||||||
|
case "consensus_done":
|
||||||
|
return `CONSENSUS ✓`;
|
||||||
|
case "error":
|
||||||
|
return `ERROR ${c.message ?? JSON.stringify(c)}`;
|
||||||
|
}
|
||||||
|
return JSON.stringify(c).slice(0, 200);
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderLogForPrompt(log: LogEntry[]): string {
|
||||||
|
if (log.length === 0) return "(no turns yet)";
|
||||||
|
return log.slice(-12).map(e =>
|
||||||
|
`[t${e.turn} ${e.role}] ${summarizeEntry(e)}`
|
||||||
|
).join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Crawl the log for every hybrid_search tool_result and collect the
|
||||||
|
// worker names + ids seen so far. LLMs routinely "forget" earlier turns
|
||||||
|
// once the conversation grows, so we surface a running ledger in the
|
||||||
|
// prompt as orchestrator-maintained state. The executor doesn't have to
|
||||||
|
// track this itself — it just reads it.
|
||||||
|
function candidatesSeen(log: LogEntry[]): Array<{ doc_id: string; name: string; city: string; state: string }> {
|
||||||
|
const seen = new Map<string, { doc_id: string; name: string; city: string; state: string }>();
|
||||||
|
for (const e of log) {
|
||||||
|
if (e.kind !== "tool_result") continue;
|
||||||
|
const sources = (e.content as any)?.sources;
|
||||||
|
if (!Array.isArray(sources)) continue;
|
||||||
|
for (const s of sources) {
|
||||||
|
// chunk_text shape "Name — Role in City, ST. …"
|
||||||
|
const t = String(s.chunk_text ?? "");
|
||||||
|
const [namePart, rest] = t.split("—", 2);
|
||||||
|
if (!namePart || !rest) continue;
|
||||||
|
const loc = rest.split(" in ")[1] ?? "";
|
||||||
|
const [city, stateRaw] = loc.split(",", 2);
|
||||||
|
const state = (stateRaw ?? "").trim().replace(/[^A-Za-z].*/, "");
|
||||||
|
if (!s.doc_id || !namePart.trim() || !city?.trim() || !state) continue;
|
||||||
|
if (!seen.has(s.doc_id)) {
|
||||||
|
seen.set(s.doc_id, {
|
||||||
|
doc_id: s.doc_id,
|
||||||
|
name: namePart.trim(),
|
||||||
|
city: city.trim(),
|
||||||
|
state,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Array.from(seen.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
export function executorPrompt(task: TaskSpec, log: LogEntry[]): string {
|
||||||
|
const logStr = renderLogForPrompt(log);
|
||||||
|
const seen = candidatesSeen(log);
|
||||||
|
const seenBlock = seen.length === 0
|
||||||
|
? "(no candidates surfaced yet — start with hybrid_search)"
|
||||||
|
: seen.map(s => ` - ${s.doc_id} ${s.name} (${s.city}, ${s.state})`).join("\n");
|
||||||
|
|
||||||
|
return `You are the EXECUTOR agent. Your job is to complete this task:
|
||||||
|
|
||||||
|
OPERATION: ${task.operation}
|
||||||
|
TARGET: ${task.target_count} × ${task.target_role} in ${task.target_city}, ${task.target_state}
|
||||||
|
${task.approach_hint ? `HINT: ${task.approach_hint}` : ""}
|
||||||
|
|
||||||
|
The REVIEWER agent is watching every turn. They will flag drift. Stay on target.
|
||||||
|
|
||||||
|
${TOOL_CATALOG}
|
||||||
|
|
||||||
|
CANDIDATES SURFACED SO FAR (orchestrator-tracked, do not forget these):
|
||||||
|
${seenBlock}
|
||||||
|
|
||||||
|
SHARED LOG (recent turns):
|
||||||
|
${logStr}
|
||||||
|
|
||||||
|
Your next action MUST be a JSON object matching one of these shapes:
|
||||||
|
{"kind":"plan","steps":["short step 1","short step 2",...]}
|
||||||
|
— use on turn 1 to outline your approach. Steps must be concrete.
|
||||||
|
{"kind":"tool_call","tool":"...","args":{...},"rationale":"why"}
|
||||||
|
— call a tool and see its result next turn.
|
||||||
|
{"kind":"propose_done","fills":[{"candidate_id":"...","name":"First Last","reason":"why them"}],"rationale":"..."}
|
||||||
|
— propose you've met the target. fills MUST have EXACTLY ${task.target_count} entries — count twice before emitting.
|
||||||
|
|
||||||
|
Strategy tip: once "CANDIDATES SURFACED SO FAR" has ≥ ${task.target_count} entries in ${task.target_city}, ${task.target_state} matching ${task.target_role}, verify ONE via the sql tool (to satisfy the reviewer's SQL-verification criterion) and then propose_done with the top ${task.target_count}. Don't keep re-searching.
|
||||||
|
|
||||||
|
Respond with ONLY the JSON object. No markdown fences, no prose.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function reviewerPrompt(task: TaskSpec, log: LogEntry[]): string {
|
||||||
|
const logStr = renderLogForPrompt(log);
|
||||||
|
|
||||||
|
// If the most recent executor action was propose_done, the reviewer
|
||||||
|
// must commit to an up-or-down vote this turn — "continue" would stall
|
||||||
|
// the orchestrator forever. The wider prompt still describes all three
|
||||||
|
// verdicts, but we add a hard rule at the end that the model must obey.
|
||||||
|
const lastExec = [...log].reverse().find(e => e.role === "executor");
|
||||||
|
const awaitingApproval = lastExec?.kind === "propose_done";
|
||||||
|
|
||||||
|
return `You are the REVIEWER agent. The EXECUTOR is trying to complete this task:
|
||||||
|
|
||||||
|
OPERATION: ${task.operation}
|
||||||
|
TARGET: ${task.target_count} × ${task.target_role} in ${task.target_city}, ${task.target_state}
|
||||||
|
|
||||||
|
Your job: catch drift. Agents often wander from the actual objective. Specifically watch for:
|
||||||
|
- Proposing candidates who aren't in ${task.target_city}, ${task.target_state}.
|
||||||
|
- Proposing candidates who don't have ${task.target_role} skill.
|
||||||
|
- Proposing fewer or more than ${task.target_count} fills.
|
||||||
|
- Irrelevant tool calls (e.g. revenue_by_client when the task is a fill).
|
||||||
|
|
||||||
|
Available tools (for reference, but YOU don't call them):
|
||||||
|
- hybrid_search(sql_filter, question, index_name, k) — production fill path
|
||||||
|
- sql(query) — read-only SELECT for verification
|
||||||
|
|
||||||
|
SHARED LOG (recent turns):
|
||||||
|
${logStr}
|
||||||
|
|
||||||
|
Your next action MUST be a JSON object:
|
||||||
|
{"kind":"critique","verdict":"continue" | "drift" | "approve_done","notes":"..."}
|
||||||
|
|
||||||
|
- "continue" → executor is on a reasonable path, let them keep going.
|
||||||
|
- "drift" → executor is off-track; notes MUST tell them how to redirect.
|
||||||
|
- "approve_done" → executor's propose_done meets the criteria. Seal it.
|
||||||
|
|
||||||
|
APPROVAL CRITERIA (use these only for propose_done):
|
||||||
|
1. Exactly ${task.target_count} fills.
|
||||||
|
2. Each fill's name appears in a prior tool_result from ${task.target_city}, ${task.target_state} matching role "${task.target_role}".
|
||||||
|
3. Executor has SQL-verified at least one of the fills (any prior sql tool_result with that worker).
|
||||||
|
If 1–3 all hold, return approve_done. Do not demand further verification.
|
||||||
|
${awaitingApproval ? `
|
||||||
|
|
||||||
|
HARD RULE: The executor's most recent action was propose_done. On this turn you CANNOT return "continue" — it would stall the task. Choose approve_done (proposal is valid by the 3 criteria above) or drift (it fails one; state which in notes).` : ""}
|
||||||
|
|
||||||
|
Respond with ONLY the JSON object.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse an agent's response into an Action, or throw.
|
||||||
|
export function parseAction(raw: string, role: Role): Action {
|
||||||
|
// Models sometimes wrap JSON in ```json fences; strip them.
|
||||||
|
let s = raw.trim();
|
||||||
|
if (s.startsWith("```")) {
|
||||||
|
s = s.replace(/^```(?:json)?\n?/, "").replace(/```$/, "").trim();
|
||||||
|
}
|
||||||
|
// Find the first {...} block.
|
||||||
|
const start = s.indexOf("{");
|
||||||
|
const end = s.lastIndexOf("}");
|
||||||
|
if (start < 0 || end <= start) {
|
||||||
|
throw new Error(`no JSON object in ${role} response: ${raw.slice(0, 300)}`);
|
||||||
|
}
|
||||||
|
const json = s.slice(start, end + 1);
|
||||||
|
let obj: any;
|
||||||
|
try {
|
||||||
|
obj = JSON.parse(json);
|
||||||
|
} catch (e) {
|
||||||
|
throw new Error(`invalid JSON from ${role}: ${(e as Error).message} | raw: ${json.slice(0, 300)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (role === "executor") {
|
||||||
|
if (obj.kind === "plan" && Array.isArray(obj.steps)) return obj as Action;
|
||||||
|
if (obj.kind === "tool_call" && typeof obj.tool === "string" && typeof obj.args === "object") return obj as Action;
|
||||||
|
if (obj.kind === "propose_done" && Array.isArray(obj.fills)) return obj as Action;
|
||||||
|
throw new Error(`executor returned unexpected shape: ${JSON.stringify(obj).slice(0, 200)}`);
|
||||||
|
} else {
|
||||||
|
// Normalize: some models (qwen2.5, mistral) emit the verdict AS the
|
||||||
|
// `kind` field directly instead of nesting it under a "critique"
|
||||||
|
// wrapper. Accept both shapes rather than hard-failing — the
|
||||||
|
// semantic content is identical, and rejecting would stall the
|
||||||
|
// orchestrator on a cosmetic schema miss.
|
||||||
|
if (obj.kind === "critique" && ["continue", "drift", "approve_done"].includes(obj.verdict)) {
|
||||||
|
return obj as Action;
|
||||||
|
}
|
||||||
|
if (["continue", "drift", "approve_done"].includes(obj.kind)) {
|
||||||
|
return { kind: "critique", verdict: obj.kind, notes: obj.notes ?? "" } as Action;
|
||||||
|
}
|
||||||
|
throw new Error(`reviewer returned unexpected shape: ${JSON.stringify(obj).slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
335
tests/multi-agent/chain_of_custody.ts
Normal file
335
tests/multi-agent/chain_of_custody.ts
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
// Chain-of-custody trace test.
|
||||||
|
//
|
||||||
|
// J's framing: "we have enough synthetic data, we've run enough AI responses
|
||||||
|
// saved to the database. Test true quality. Don't ignore chain of custody.
|
||||||
|
// Use real applications. Understand each aspect of the flow — not just
|
||||||
|
// 'write a file or directory and open it'."
|
||||||
|
//
|
||||||
|
// One real recruiter operation, traced end-to-end through EVERY layer of the
|
||||||
|
// live substrate. Every layer must record the operation correctly. Any layer
|
||||||
|
// that drops it = chain-of-custody break = surfaced as a real bug.
|
||||||
|
//
|
||||||
|
// Layers verified:
|
||||||
|
// L0 Bun /search — recruiter app surface (NOT bare /vectors/hybrid)
|
||||||
|
// L1 /vectors/hybrid — direct gateway (parity check vs L0)
|
||||||
|
// L2 /vectors/playbook_memory/stats — feedback loop count
|
||||||
|
// L3 Bun /log — recruiter records the pick
|
||||||
|
// L4 successful_playbooks — SQL-queryable table of past fills
|
||||||
|
// L5 /vectors/playbook_memory/stats — count grew
|
||||||
|
// L6 tools/audit — Phase 12 governance trail
|
||||||
|
// L7 /access/audit — Phase 13 access trail
|
||||||
|
// L8 /journal/recent — Phase 9 mutation events
|
||||||
|
// L9 /storage/errors — Federation error journal (no new errors)
|
||||||
|
// L10 /vectors/profile/{id}/activate — Phase 17 hot-swap
|
||||||
|
// L11 Bun /search again — boost lifts the just-logged worker
|
||||||
|
// L12 verifier qwen2.5 — reads cross-layer state, judges integrity
|
||||||
|
//
|
||||||
|
// Run: bun run tests/multi-agent/chain_of_custody.ts
|
||||||
|
//
|
||||||
|
// Prints per-layer BEFORE/AFTER/DELTA. Exit non-zero on any chain break.
|
||||||
|
|
||||||
|
import { generate, GATEWAY } from "./agent.ts";
|
||||||
|
|
||||||
|
const BUN = "http://localhost:3700";
|
||||||
|
const PROFILE_ID = "staffing-recruiter";
|
||||||
|
|
||||||
|
// The trace operation — small, deterministic, real city/role with supply.
|
||||||
|
// Helen Sanchez (worker_id 4661) is a known Toledo Welder; we record her
|
||||||
|
// as the manual pick the recruiter would make from the /search results.
|
||||||
|
const OPERATION = "fill: Welder x1 in Toledo, OH";
|
||||||
|
const OP_ROLE = "Welder";
|
||||||
|
const OP_CITY = "Toledo";
|
||||||
|
const OP_STATE = "OH";
|
||||||
|
const PICKED_WORKER = "Helen Sanchez"; // verified earlier to be a Toledo OH Welder
|
||||||
|
|
||||||
|
// ─────────────────────── helpers ───────────────────────
|
||||||
|
|
||||||
|
async function getJSON<T = any>(url: string): Promise<T | null> {
|
||||||
|
try {
|
||||||
|
const r = await fetch(url);
|
||||||
|
if (!r.ok) return null;
|
||||||
|
return r.json() as Promise<T>;
|
||||||
|
} catch { return null; }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function postJSON<T = any>(url: string, body: any): Promise<T | null> {
|
||||||
|
try {
|
||||||
|
const r = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
||||||
|
if (!r.ok) return { _error: `${r.status}: ${await r.text()}` } as any;
|
||||||
|
return r.json() as Promise<T>;
|
||||||
|
} catch (e) { return { _error: (e as Error).message } as any; }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function sql(query: string): Promise<{ rows?: any[]; error?: string } | null> {
|
||||||
|
return postJSON(`${GATEWAY}/query/sql`, { sql: query });
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Snapshot {
|
||||||
|
pm_entries: number;
|
||||||
|
pm_names: number;
|
||||||
|
sp_rows: number; // successful_playbooks SQL row count
|
||||||
|
audit_count: number; // tools/audit count
|
||||||
|
access_count: number; // access/audit count
|
||||||
|
journal_count: number; // journal/stats events
|
||||||
|
storage_errors: number; // bucket error journal
|
||||||
|
}
|
||||||
|
|
||||||
|
async function snapshot(): Promise<Snapshot> {
|
||||||
|
const pm = await getJSON<any>(`${GATEWAY}/vectors/playbook_memory/stats`);
|
||||||
|
// successful_playbooks_live is the live SQL surface populated by /log
|
||||||
|
// via /vectors/playbook_memory/persist_sql. The original
|
||||||
|
// successful_playbooks table is now legacy/historical (no writes).
|
||||||
|
const sp = await sql(`SELECT COUNT(*) AS c FROM successful_playbooks_live`);
|
||||||
|
const audit = await getJSON<any[]>(`${GATEWAY}/tools/audit`);
|
||||||
|
const access = await getJSON<any>(`${GATEWAY}/access/audit`);
|
||||||
|
const journalStats = await getJSON<any>(`${GATEWAY}/journal/stats`);
|
||||||
|
const storageErrors = await getJSON<any[]>(`${GATEWAY}/storage/errors`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
pm_entries: pm?.entries ?? -1,
|
||||||
|
pm_names: pm?.total_names_endorsed ?? -1,
|
||||||
|
sp_rows: Number(sp?.rows?.[0]?.c ?? -1),
|
||||||
|
audit_count: Array.isArray(audit) ? audit.length : (audit as any)?.events?.length ?? -1,
|
||||||
|
access_count: Array.isArray(access) ? access.length : (access as any)?.events?.length ?? (access as any)?.audit?.length ?? -1,
|
||||||
|
journal_count: journalStats?.event_count ?? journalStats?.total_events ?? journalStats?.events ?? -1,
|
||||||
|
storage_errors: Array.isArray(storageErrors) ? storageErrors.length : (storageErrors as any)?.events?.length ?? 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function delta(b: Snapshot, a: Snapshot): Record<string, number> {
|
||||||
|
return {
|
||||||
|
pm_entries: a.pm_entries - b.pm_entries,
|
||||||
|
pm_names: a.pm_names - b.pm_names,
|
||||||
|
sp_rows: a.sp_rows - b.sp_rows,
|
||||||
|
audit_count: a.audit_count - b.audit_count,
|
||||||
|
access_count: a.access_count - b.access_count,
|
||||||
|
journal_count: a.journal_count - b.journal_count,
|
||||||
|
storage_errors: a.storage_errors - b.storage_errors,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtRow(label: string, b: number, a: number): string {
|
||||||
|
const d = a - b;
|
||||||
|
const dStr = d === 0 ? " · " : d > 0 ? ` +${d}` : ` ${d}`;
|
||||||
|
return ` ${label.padEnd(28)} ${String(b).padStart(6)} → ${String(a).padStart(6)} ${dStr}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────── trace ───────────────────────
|
||||||
|
|
||||||
|
interface TraceResult {
|
||||||
|
layer: string;
|
||||||
|
ok: boolean;
|
||||||
|
detail: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runTrace(): Promise<TraceResult[]> {
|
||||||
|
const out: TraceResult[] = [];
|
||||||
|
const note = (layer: string, ok: boolean, detail: string) => {
|
||||||
|
out.push({ layer, ok, detail });
|
||||||
|
console.log(` ${ok ? "✓" : "✗"} ${layer.padEnd(32)} ${detail}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`\n▶ Trace operation: ${OPERATION} → pick=${PICKED_WORKER}\n`);
|
||||||
|
|
||||||
|
// ── BEFORE snapshot ──
|
||||||
|
console.log(`▶ Before-snapshot:`);
|
||||||
|
const before = await snapshot();
|
||||||
|
console.log(` pm_entries=${before.pm_entries} pm_names=${before.pm_names} sp_rows=${before.sp_rows} `
|
||||||
|
+ `audit=${before.audit_count} access=${before.access_count} journal=${before.journal_count} `
|
||||||
|
+ `storage_errors=${before.storage_errors}\n`);
|
||||||
|
|
||||||
|
// ── L0: Bun /search ──
|
||||||
|
console.log(`▶ L0 — Bun /search (recruiter app surface)`);
|
||||||
|
const sql_filter = `role = '${OP_ROLE}' AND state = '${OP_STATE}' AND city = '${OP_CITY}'`;
|
||||||
|
const bunSearch = await postJSON<any>(`${BUN}/search`, {
|
||||||
|
question: `Welder in ${OP_CITY}, ${OP_STATE}`,
|
||||||
|
sql_filter, top_k: 5, generate: false,
|
||||||
|
id_column: "worker_id", dataset: "workers_500k", use_playbook_memory: true,
|
||||||
|
});
|
||||||
|
if (bunSearch?._error) {
|
||||||
|
note("L0 Bun /search", false, `error: ${bunSearch._error}`);
|
||||||
|
} else {
|
||||||
|
const sources = bunSearch?.sources ?? [];
|
||||||
|
const boostedHits = sources.filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
|
||||||
|
note("L0 Bun /search", true, `sources=${sources.length} boosted=${boostedHits} sql_matches=${bunSearch?.sql_matches}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── L1: direct /vectors/hybrid (parity check) ──
|
||||||
|
console.log(`\n▶ L1 — Direct /vectors/hybrid (parity check vs Bun)`);
|
||||||
|
const directSearch = await postJSON<any>(`${GATEWAY}/vectors/hybrid`, {
|
||||||
|
index_name: "workers_500k_v1", filter_dataset: "workers_500k", id_column: "worker_id",
|
||||||
|
sql_filter, question: `Welder in ${OP_CITY}, ${OP_STATE}`,
|
||||||
|
top_k: 5, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
|
||||||
|
});
|
||||||
|
const directBoosted = (directSearch?.sources ?? []).filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
|
||||||
|
note("L1 Direct /vectors/hybrid", true, `boosted=${directBoosted} sql=${directSearch?.sql_matches}`);
|
||||||
|
|
||||||
|
const bunBoosted = (bunSearch?.sources ?? []).filter((s: any) => (s.playbook_boost ?? 0) > 0).length;
|
||||||
|
if (bunBoosted < directBoosted) {
|
||||||
|
note("CHAIN BREAK: Bun↔Direct parity", false,
|
||||||
|
`Bun=${bunBoosted} boosted vs Direct=${directBoosted}. Bun /search likely missing playbook_memory_k forward.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── L3: Bun /log (recruiter records the pick) ──
|
||||||
|
console.log(`\n▶ L3 — Bun /log (recruiter records the pick)`);
|
||||||
|
const logged = await postJSON<any>(`${BUN}/log`, {
|
||||||
|
operation: OPERATION,
|
||||||
|
approach: "chain-of-custody trace",
|
||||||
|
result: `1/1 filled → ${PICKED_WORKER}`,
|
||||||
|
context: `client=COC-${Date.now()} start=08:00 scenario=trace`,
|
||||||
|
});
|
||||||
|
if (logged?._error) note("L3 Bun /log", false, `error: ${logged._error}`);
|
||||||
|
else note("L3 Bun /log", true, `logged=${logged?.logged} seeded=${logged?.seeded}`);
|
||||||
|
|
||||||
|
// The /log response carries the result of the underlying /ingest/file too.
|
||||||
|
// If "response" mentions "different schema" or "error", the SQL-queryable
|
||||||
|
// path is broken even though seed succeeded. That's a chain break.
|
||||||
|
const logResp = String((logged as any)?.response ?? "");
|
||||||
|
if (logResp.includes("error") || logResp.includes("different schema") || logResp.includes("Error")) {
|
||||||
|
note("CHAIN BREAK: Bun /log → SQL ingest", false,
|
||||||
|
`successful_playbooks ingest failed. Bun returned logged=true but /log's underlying ingest reported: ${logResp.slice(0, 150)}`);
|
||||||
|
} else {
|
||||||
|
note("L3a /log → /ingest/file", true, "ingest accepted");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Give the system a beat for any async fan-out (audit/journal/etc).
|
||||||
|
await new Promise(r => setTimeout(r, 500));
|
||||||
|
|
||||||
|
// ── AFTER snapshot ──
|
||||||
|
console.log(`\n▶ After-snapshot:`);
|
||||||
|
const after = await snapshot();
|
||||||
|
const d = delta(before, after);
|
||||||
|
console.log(fmtRow("playbook_memory.entries", before.pm_entries, after.pm_entries));
|
||||||
|
console.log(fmtRow("playbook_memory.names", before.pm_names, after.pm_names));
|
||||||
|
console.log(fmtRow("successful_playbooks.rows", before.sp_rows, after.sp_rows));
|
||||||
|
console.log(fmtRow("tools/audit.count", before.audit_count, after.audit_count));
|
||||||
|
console.log(fmtRow("access/audit.count", before.access_count, after.access_count));
|
||||||
|
console.log(fmtRow("journal.events", before.journal_count, after.journal_count));
|
||||||
|
console.log(fmtRow("storage/errors.count", before.storage_errors,after.storage_errors));
|
||||||
|
|
||||||
|
// ── L5: playbook_memory grew? ──
|
||||||
|
if (d.pm_entries === 1) note("L5 playbook_memory growth", true, "+1 entry as expected");
|
||||||
|
else note("L5 playbook_memory growth", d.pm_entries > 0,
|
||||||
|
`delta=${d.pm_entries} (expected 1 — seed-after-log path)`);
|
||||||
|
|
||||||
|
// ── L4: successful_playbooks SQL row appeared? ──
|
||||||
|
if (d.sp_rows >= 1) note("L4 successful_playbooks SQL", true, `+${d.sp_rows} row(s)`);
|
||||||
|
else note("L4 successful_playbooks SQL", false,
|
||||||
|
`delta=${d.sp_rows} — Bun /log claims success but SQL table didn't grow. Recruiter querying via SQL would miss this fill.`);
|
||||||
|
|
||||||
|
// ── L9: storage errors stayed quiet ──
|
||||||
|
if (d.storage_errors === 0) note("L9 storage error journal", true, "no new bucket op errors");
|
||||||
|
else note("L9 storage error journal", false, `+${d.storage_errors} new errors`);
|
||||||
|
|
||||||
|
// ── L10: Phase 17 profile activation ──
|
||||||
|
console.log(`\n▶ L10 — Activate profile ${PROFILE_ID}`);
|
||||||
|
const act = await postJSON<any>(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, {});
|
||||||
|
if (act?._error) note("L10 profile activation", false, `error: ${act._error}`);
|
||||||
|
else note("L10 profile activation", true,
|
||||||
|
`warmed=${(act?.warmed_indexes ?? []).length} duration_ms=${act?.duration_ms ?? "?"}`);
|
||||||
|
|
||||||
|
// ── L11: Bun /search again — boost should now lift PICKED_WORKER ──
|
||||||
|
console.log(`\n▶ L11 — Bun /search second time (boost lift verification)`);
|
||||||
|
const search2 = await postJSON<any>(`${BUN}/search`, {
|
||||||
|
question: `Welder in ${OP_CITY}, ${OP_STATE}`,
|
||||||
|
sql_filter, top_k: 10, generate: false,
|
||||||
|
id_column: "worker_id", dataset: "workers_500k", use_playbook_memory: true,
|
||||||
|
});
|
||||||
|
const sources2 = search2?.sources ?? [];
|
||||||
|
const pickedHit = sources2.find((s: any) => String(s.chunk_text ?? "").includes(PICKED_WORKER));
|
||||||
|
if (!pickedHit) {
|
||||||
|
note("L11 boost lifts logged pick (Bun)", false,
|
||||||
|
`${PICKED_WORKER} not in top-10 via Bun /search. Could be Bun-not-forwarding-playbook_memory_k bug from L1.`);
|
||||||
|
} else if ((pickedHit.playbook_boost ?? 0) > 0) {
|
||||||
|
note("L11 boost lifts logged pick (Bun)", true,
|
||||||
|
`${PICKED_WORKER} boost=+${(pickedHit.playbook_boost as number).toFixed(3)} cites=${(pickedHit.playbook_citations ?? []).length}`);
|
||||||
|
} else {
|
||||||
|
note("L11 boost lifts logged pick (Bun)", false,
|
||||||
|
`${PICKED_WORKER} present but boost=0 — playbook_memory_k forward bug likely`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same probe via direct gateway to isolate Bun vs gateway
|
||||||
|
const direct2 = await postJSON<any>(`${GATEWAY}/vectors/hybrid`, {
|
||||||
|
index_name: "workers_500k_v1", filter_dataset: "workers_500k", id_column: "worker_id",
|
||||||
|
sql_filter, question: `Welder in ${OP_CITY}, ${OP_STATE}`,
|
||||||
|
top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
|
||||||
|
});
|
||||||
|
const sources2d = direct2?.sources ?? [];
|
||||||
|
const pickedHitD = sources2d.find((s: any) => String(s.chunk_text ?? "").includes(PICKED_WORKER));
|
||||||
|
if (pickedHitD && (pickedHitD.playbook_boost ?? 0) > 0) {
|
||||||
|
note("L11b boost via direct gateway", true,
|
||||||
|
`${PICKED_WORKER} boost=+${(pickedHitD.playbook_boost as number).toFixed(3)} cites=${(pickedHitD.playbook_citations ?? []).length}`);
|
||||||
|
} else {
|
||||||
|
note("L11b boost via direct gateway", false, `direct call also did not boost ${PICKED_WORKER}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────── verifier (fresh agent) ───────────────────────
|
||||||
|
|
||||||
|
async function verifierJudgment(trace: TraceResult[]): Promise<{ verdict: string; confidence: number }> {
|
||||||
|
const summary = trace.map(t => ` ${t.ok ? "ok" : "FAIL"} ${t.layer}: ${t.detail}`).join("\n");
|
||||||
|
const prompt = `You are the CHAIN-OF-CUSTODY VERIFIER agent. A real recruiter operation was just
|
||||||
|
traced through every layer of the staffing substrate. Read the per-layer results and judge
|
||||||
|
whether the system kept chain of custody intact (every layer recorded the operation as
|
||||||
|
expected) or where it broke.
|
||||||
|
|
||||||
|
Per-layer trace:
|
||||||
|
${summary}
|
||||||
|
|
||||||
|
Reply with ONE JSON object only:
|
||||||
|
{"verdict": "<one tight sentence — what's the integrity status>", "confidence": 0-100}
|
||||||
|
|
||||||
|
Be specific about which layer broke if any. confidence is how sure you are about the verdict.`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const raw = await generate("qwen2.5:latest", prompt, { temperature: 0.1, max_tokens: 200 });
|
||||||
|
const start = raw.indexOf("{"), end = raw.lastIndexOf("}");
|
||||||
|
if (start < 0 || end <= start) return { verdict: "verifier could not produce JSON", confidence: 0 };
|
||||||
|
const j = JSON.parse(raw.slice(start, end + 1));
|
||||||
|
return { verdict: j.verdict ?? "no verdict", confidence: Number(j.confidence) || 0 };
|
||||||
|
} catch (e) {
|
||||||
|
return { verdict: `verifier error: ${(e as Error).message}`, confidence: 0 };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────── main ───────────────────────
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log(`▶ Chain-of-custody trace — single real recruiter operation through every layer`);
|
||||||
|
|
||||||
|
const trace = await runTrace();
|
||||||
|
|
||||||
|
console.log(`\n▶ L12 — Verifier (fresh qwen2.5 agent reads the cross-layer trace)`);
|
||||||
|
const v = await verifierJudgment(trace);
|
||||||
|
console.log(` verdict (${v.confidence}%): ${v.verdict}`);
|
||||||
|
|
||||||
|
// Hard gate: any explicit CHAIN BREAK note = fail
|
||||||
|
const breaks = trace.filter(t => !t.ok && t.layer.startsWith("CHAIN BREAK"));
|
||||||
|
const fails = trace.filter(t => !t.ok);
|
||||||
|
|
||||||
|
console.log(`\n▶ Summary:`);
|
||||||
|
console.log(` passing layers: ${trace.filter(t => t.ok).length}/${trace.length}`);
|
||||||
|
console.log(` chain breaks: ${breaks.length}`);
|
||||||
|
console.log(` total failures: ${fails.length}`);
|
||||||
|
console.log(` verifier confidence: ${v.confidence}%`);
|
||||||
|
|
||||||
|
if (breaks.length > 0) {
|
||||||
|
console.log(`\n✗ Chain of custody BROKEN at ${breaks.length} layer(s):`);
|
||||||
|
for (const b of breaks) console.log(` - ${b.layer}: ${b.detail}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
if (fails.length > 0) {
|
||||||
|
console.log(`\n◑ Trace completed with ${fails.length} non-blocking failures (no formal chain break)`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
console.log(`\n✓ Chain of custody intact across all layers`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => {
|
||||||
|
console.error(`\n✗ ${(e as Error).message}`);
|
||||||
|
if ((e as any).stack) console.error((e as any).stack);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
469
tests/multi-agent/network_proving.ts
Normal file
469
tests/multi-agent/network_proving.ts
Normal file
@ -0,0 +1,469 @@
|
|||||||
|
// Network proving: continuous build → verify → repeat with hot-swap profile.
|
||||||
|
//
|
||||||
|
// J's framing: "have them guide each other, when the test is complete we have
|
||||||
|
// a successful playbook, then spin up another agent that tests the viability
|
||||||
|
// of our network with the playbook and the hot-swap profile. Keep spinning up
|
||||||
|
// agents and testing — pass theory, real-world execution, not isolated unit
|
||||||
|
// tests."
|
||||||
|
//
|
||||||
|
// Each round = TWO phases:
|
||||||
|
//
|
||||||
|
// 1. BUILD phase. Two agents (mistral executor + qwen2.5 reviewer) work
|
||||||
|
// on a real staffing fill task. They guide each other via the critique
|
||||||
|
// loop. On consensus → seal a playbook with CANONICAL short seed text
|
||||||
|
// (the Pass 1 lesson — verbose seeds silently kill boost). Real Ollama,
|
||||||
|
// real workers_500k, real /vectors/hybrid path.
|
||||||
|
//
|
||||||
|
// 2. VERIFY phase. A FRESH qwen2.5 agent spins up, activates the
|
||||||
|
// staffing-recruiter profile (Phase 17 hot-swap), runs a probe query
|
||||||
|
// against the same network, and judges from the live response whether
|
||||||
|
// prior rounds' playbooks actually surface relevant workers higher.
|
||||||
|
// The verifier writes a verdict: did the network learn?
|
||||||
|
//
|
||||||
|
// Three rounds, progressively harder:
|
||||||
|
// R0: Welder x2 in Toledo, OH — baseline
|
||||||
|
// R1: Welder x2 in Cleveland, OH — same role, different city
|
||||||
|
// → tests geo discrimination
|
||||||
|
// (Toledo workers MUST NOT
|
||||||
|
// bleed into Cleveland boost)
|
||||||
|
// R2: Welder x3 in Toledo, OH — re-fill same city, bigger
|
||||||
|
// count → tests compounding
|
||||||
|
// (R0's endorsements should
|
||||||
|
// still rank up here)
|
||||||
|
//
|
||||||
|
// Run: bun run tests/multi-agent/network_proving.ts
|
||||||
|
//
|
||||||
|
// Fail-fast: any HTTP error or model crash bubbles to top-level, exits 1.
|
||||||
|
|
||||||
|
import {
|
||||||
|
type LogEntry,
|
||||||
|
type TaskSpec,
|
||||||
|
type Action,
|
||||||
|
type Fill,
|
||||||
|
GATEWAY,
|
||||||
|
generate,
|
||||||
|
parseAction,
|
||||||
|
executorPrompt,
|
||||||
|
reviewerPrompt,
|
||||||
|
sqlQuery,
|
||||||
|
callTool,
|
||||||
|
} from "./agent.ts";
|
||||||
|
|
||||||
|
const EXECUTOR_MODEL = "mistral:latest";
|
||||||
|
const REVIEWER_MODEL = "qwen2.5:latest";
|
||||||
|
const VERIFIER_MODEL = "qwen2.5:latest";
|
||||||
|
const PROFILE_ID = "staffing-recruiter";
|
||||||
|
const INDEX_NAME = "workers_500k_v1";
|
||||||
|
const MAX_TURNS = 12;
|
||||||
|
const MAX_TOOL_ERRORS = 3;
|
||||||
|
const MAX_DRIFTS = 3;
|
||||||
|
|
||||||
|
const TASK_DECK: TaskSpec[] = [
|
||||||
|
{
|
||||||
|
id: "R0", operation: "fill: Welder x2 in Toledo, OH",
|
||||||
|
target_role: "Welder", target_count: 2, target_city: "Toledo", target_state: "OH",
|
||||||
|
approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "R1", operation: "fill: Welder x2 in Cleveland, OH",
|
||||||
|
target_role: "Welder", target_count: 2, target_city: "Cleveland", target_state: "OH",
|
||||||
|
approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "R2", operation: "fill: Welder x3 in Toledo, OH",
|
||||||
|
target_role: "Welder", target_count: 3, target_city: "Toledo", target_state: "OH",
|
||||||
|
approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
interface BuildResult {
|
||||||
|
ok: boolean;
|
||||||
|
task: TaskSpec;
|
||||||
|
fills: Fill[];
|
||||||
|
turns: number;
|
||||||
|
duration_secs: number;
|
||||||
|
playbook_id?: string;
|
||||||
|
entries_after_seed?: number;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface VerifyResult {
|
||||||
|
profile_activated: boolean;
|
||||||
|
warmed_indexes: number;
|
||||||
|
probe_boost_total: number; // sum of playbook_boost across top-K
|
||||||
|
probe_boosted_hits: number; // how many hits had boost > 0
|
||||||
|
probe_top_citations: string[]; // playbook_ids cited
|
||||||
|
geo_discrimination_ok: boolean; // when prior playbook is in different city, boost should NOT bleed
|
||||||
|
verdict: string; // qwen2.5's natural-language judgment
|
||||||
|
confidence: number; // 0-100 self-rated
|
||||||
|
duration_secs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RoundLedger {
|
||||||
|
round: number;
|
||||||
|
task: TaskSpec;
|
||||||
|
build: BuildResult;
|
||||||
|
verify: VerifyResult;
|
||||||
|
score: number; // /10 per round
|
||||||
|
notes: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────── BUILD phase (two-agent loop) ───────────────────────
|
||||||
|
|
||||||
|
async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
|
||||||
|
if (name === "hybrid_search") {
|
||||||
|
const { sql_filter, question, index_name, k } = args;
|
||||||
|
if (!sql_filter || !question || !index_name) {
|
||||||
|
throw new Error(`hybrid_search needs sql_filter+question+index_name, got keys=${Object.keys(args).join(",")}`);
|
||||||
|
}
|
||||||
|
const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
|
||||||
|
method: "POST", headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
sql_filter, question, index_name,
|
||||||
|
filter_dataset: "workers_500k", id_column: "worker_id",
|
||||||
|
top_k: k ?? 10, generate: false, use_playbook_memory: true,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(`hybrid → ${r.status}: ${await r.text()}`);
|
||||||
|
return r.json();
|
||||||
|
}
|
||||||
|
if (name === "sql") {
|
||||||
|
if (!args.query) throw new Error("sql needs query");
|
||||||
|
if (!/^\s*SELECT/i.test(args.query)) throw new Error("sql allows SELECT only");
|
||||||
|
return sqlQuery(args.query);
|
||||||
|
}
|
||||||
|
return callTool(name, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
function trim(r: any) {
|
||||||
|
if (r && Array.isArray(r.rows)) return { ...r, rows: r.rows.slice(0, 20) };
|
||||||
|
if (r && Array.isArray(r.sources)) return { ...r, sources: r.sources.slice(0, 12) };
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtTurn(prefix: string, e: Omit<LogEntry, "at">): string {
|
||||||
|
const c: any = e.content ?? {};
|
||||||
|
const head = `[${prefix} t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
|
||||||
|
if (e.kind === "tool_call") return `${head} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 70)})`;
|
||||||
|
if (e.kind === "tool_result") {
|
||||||
|
if (c.error) return `${head} error: ${c.error}`;
|
||||||
|
if (Array.isArray(c.sources)) return `${head} hybrid sql=${c.sql_matches} reranked=${c.vector_reranked}`;
|
||||||
|
if (Array.isArray(c.rows)) return `${head} sql ${c.rows.length} rows`;
|
||||||
|
return `${head} ${JSON.stringify(c).slice(0, 70)}`;
|
||||||
|
}
|
||||||
|
if (e.kind === "critique") return `${head} verdict=${c.verdict} ${(c.notes ?? "").slice(0, 50)}`;
|
||||||
|
if (e.kind === "propose_done") return `${head} ${(c.fills ?? []).length} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
|
||||||
|
if (e.kind === "consensus_done") return `${head} ✓`;
|
||||||
|
if (e.kind === "plan") return `${head} ${(c.steps ?? []).length} steps`;
|
||||||
|
return `${head} ${JSON.stringify(c).slice(0, 60)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function buildPhase(task: TaskSpec, prefix: string): Promise<BuildResult> {
|
||||||
|
const t0 = Date.now();
|
||||||
|
const log: LogEntry[] = [];
|
||||||
|
let turn = 0, sealed: { fills: Fill[]; approach: string } | null = null;
|
||||||
|
let toolErrors = 0, drifts = 0;
|
||||||
|
|
||||||
|
const append = (e: Omit<LogEntry, "at">): LogEntry => {
|
||||||
|
const full: LogEntry = { ...e, at: new Date().toISOString() };
|
||||||
|
log.push(full);
|
||||||
|
console.log(fmtTurn(prefix, e));
|
||||||
|
return full;
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (turn < MAX_TURNS && !sealed) {
|
||||||
|
turn += 1;
|
||||||
|
|
||||||
|
const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 600 });
|
||||||
|
const execAction = parseAction(execRaw, "executor");
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });
|
||||||
|
|
||||||
|
if (execAction.kind === "tool_call") {
|
||||||
|
try {
|
||||||
|
const r = await executeToolCall(execAction.tool, execAction.args);
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", content: trim(r) });
|
||||||
|
toolErrors = 0;
|
||||||
|
} catch (e) {
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
|
||||||
|
content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
|
||||||
|
toolErrors += 1;
|
||||||
|
if (toolErrors >= MAX_TOOL_ERRORS) throw new Error(`${MAX_TOOL_ERRORS} consecutive tool errors`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 400 });
|
||||||
|
const revAction = parseAction(revRaw, "reviewer");
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });
|
||||||
|
|
||||||
|
if (revAction.kind !== "critique") throw new Error(`reviewer non-critique`);
|
||||||
|
if (revAction.verdict === "drift") {
|
||||||
|
drifts += 1;
|
||||||
|
if (drifts >= MAX_DRIFTS) throw new Error(`${MAX_DRIFTS} consecutive drifts`);
|
||||||
|
} else drifts = 0;
|
||||||
|
|
||||||
|
if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
|
||||||
|
if (execAction.fills.length !== task.target_count) {
|
||||||
|
throw new Error(`fills=${execAction.fills.length} target=${task.target_count}`);
|
||||||
|
}
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", content: { fills: execAction.fills } });
|
||||||
|
sealed = { fills: execAction.fills, approach: (execAction as any).rationale ?? "multi-agent" };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
|
||||||
|
|
||||||
|
// Phase 19 seed — CANONICAL short text (Pass 1 lesson). The verbose
|
||||||
|
// executor rationale stays out of the embedding; we keep a separate
|
||||||
|
// human-readable record in the playbook log.
|
||||||
|
const canonicalApproach = `${task.target_role.toLowerCase()} fill via hybrid search`;
|
||||||
|
const canonicalContext = `${task.target_role} fill in ${task.target_city}, ${task.target_state}`;
|
||||||
|
let playbook_id: string | undefined;
|
||||||
|
let entries_after_seed: number | undefined;
|
||||||
|
try {
|
||||||
|
const sr = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
|
||||||
|
method: "POST", headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
operation: task.operation,
|
||||||
|
approach: canonicalApproach,
|
||||||
|
context: canonicalContext,
|
||||||
|
endorsed_names: sealed.fills.map(f => f.name),
|
||||||
|
append: true,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (sr.ok) {
|
||||||
|
const j = await sr.json() as any;
|
||||||
|
playbook_id = j.playbook_id;
|
||||||
|
entries_after_seed = j.entries_after;
|
||||||
|
console.log(`[${prefix}] ↳ seeded id=${playbook_id} entries=${entries_after_seed}`);
|
||||||
|
} else {
|
||||||
|
console.warn(`[${prefix}] ↳ seed failed: ${sr.status} ${await sr.text()}`);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`[${prefix}] ↳ seed errored: ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
ok: true, task, fills: sealed.fills, turns: turn,
|
||||||
|
duration_secs: Math.round((Date.now() - t0) / 1000),
|
||||||
|
playbook_id, entries_after_seed,
|
||||||
|
};
|
||||||
|
} catch (e) {
|
||||||
|
return {
|
||||||
|
ok: false, task, fills: [], turns: turn,
|
||||||
|
duration_secs: Math.round((Date.now() - t0) / 1000),
|
||||||
|
error: (e as Error).message,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────── VERIFY phase (fresh single agent) ───────────────────────
|
||||||
|
|
||||||
|
async function activateProfile(): Promise<{ ok: boolean; warmed: number; ms: number }> {
|
||||||
|
const t0 = Date.now();
|
||||||
|
const r = await fetch(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, { method: "POST" });
|
||||||
|
const ms = Date.now() - t0;
|
||||||
|
if (!r.ok) {
|
||||||
|
console.warn(`profile activation failed: ${r.status} ${await r.text()}`);
|
||||||
|
return { ok: false, warmed: 0, ms };
|
||||||
|
}
|
||||||
|
const j = await r.json() as any;
|
||||||
|
return { ok: true, warmed: (j.warmed_indexes ?? []).length, ms };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function probeWithBoost(task: TaskSpec) {
|
||||||
|
const sql_filter = `role = '${task.target_role.replace(/'/g, "''")}' `
|
||||||
|
+ `AND state = '${task.target_state}' `
|
||||||
|
+ `AND city = '${task.target_city.replace(/'/g, "''")}'`;
|
||||||
|
const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
|
||||||
|
method: "POST", headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
index_name: INDEX_NAME, filter_dataset: "workers_500k", id_column: "worker_id",
|
||||||
|
sql_filter, question: `${task.target_role} in ${task.target_city}, ${task.target_state}`,
|
||||||
|
top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(`probe → ${r.status}: ${await r.text()}`);
|
||||||
|
const j = (await r.json()) as any;
|
||||||
|
const sources: any[] = j.sources ?? [];
|
||||||
|
const boostedHits = sources.filter(s => (s.playbook_boost ?? 0) > 0).length;
|
||||||
|
const totalBoost = sources.reduce((s, x) => s + (x.playbook_boost ?? 0), 0);
|
||||||
|
const cites = Array.from(new Set(sources.flatMap(s => s.playbook_citations ?? []))).slice(0, 5);
|
||||||
|
const topNames = sources.slice(0, 5).map(s => {
|
||||||
|
const t = String(s.chunk_text ?? "");
|
||||||
|
return t.split("—")[0]?.trim() ?? s.doc_id;
|
||||||
|
});
|
||||||
|
return { sources, boostedHits, totalBoost, cites, topNames };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verifier prompt — fresh agent, no shared log with the build pair. It
|
||||||
|
// gets the round's task, the prior rounds' sealed playbooks, and the live
|
||||||
|
// probe result, and renders a human-readable verdict with a confidence.
|
||||||
|
function verifierPrompt(task: TaskSpec, priorPlaybooks: Array<{op: string; fills: string[]}>,
|
||||||
|
probe: { boostedHits: number; totalBoost: number; cites: string[]; topNames: string[] }
|
||||||
|
): string {
|
||||||
|
const priorBlock = priorPlaybooks.length === 0
|
||||||
|
? "(no prior playbooks — this is the first round)"
|
||||||
|
: priorPlaybooks.map((p, i) => ` ${i+1}. ${p.op} → endorsed [${p.fills.join(", ")}]`).join("\n");
|
||||||
|
|
||||||
|
return `You are the VERIFIER agent. A fresh round just sealed a playbook on a real staffing
|
||||||
|
substrate. Your job: judge whether the system learned from prior rounds.
|
||||||
|
|
||||||
|
CURRENT ROUND:
|
||||||
|
task: ${task.operation}
|
||||||
|
in city: ${task.target_city}, ${task.target_state}
|
||||||
|
|
||||||
|
PRIOR PLAYBOOKS (in playbook_memory):
|
||||||
|
${priorBlock}
|
||||||
|
|
||||||
|
I activated the staffing-recruiter profile and ran a hybrid query for this exact task with
|
||||||
|
use_playbook_memory=true. Live result from the substrate:
|
||||||
|
- top-5 surfaced workers: ${probe.topNames.join(", ")}
|
||||||
|
- hits with non-zero playbook_boost: ${probe.boostedHits} / 10
|
||||||
|
- total boost across top-10: ${probe.totalBoost.toFixed(3)}
|
||||||
|
- playbook citations: [${probe.cites.join(", ")}]
|
||||||
|
|
||||||
|
JUDGE:
|
||||||
|
1. If a prior playbook covered this same city + role, the boost should fire on the workers
|
||||||
|
it endorsed (boostedHits > 0, citations non-empty).
|
||||||
|
2. If no prior playbook covers this combo, boost should be ~0 — that means the system is
|
||||||
|
correctly NOT bleeding endorsements across geos.
|
||||||
|
3. Anything in between (e.g. some boost but for the wrong reason) is a partial pass.
|
||||||
|
|
||||||
|
Respond with ONE JSON object only:
|
||||||
|
{"learned": true|false, "verdict": "<one sentence>", "confidence": 0-100}
|
||||||
|
|
||||||
|
learned=true means the network behaved as expected for this round (whether that's "boost fired
|
||||||
|
because it should" or "boost stayed zero because it should"). learned=false means the system
|
||||||
|
either failed to learn from a relevant prior playbook OR bled an irrelevant one. confidence is
|
||||||
|
how sure you are.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function verifyPhase(task: TaskSpec, ledger: RoundLedger[]): Promise<VerifyResult> {
|
||||||
|
const t0 = Date.now();
|
||||||
|
const act = await activateProfile();
|
||||||
|
const probe = await probeWithBoost(task);
|
||||||
|
|
||||||
|
// Decide what counts as geo-correct based on prior playbooks
|
||||||
|
const priorMatchesThisGeo = ledger.some(r =>
|
||||||
|
r.build.ok &&
|
||||||
|
r.task.target_city === task.target_city &&
|
||||||
|
r.task.target_state === task.target_state &&
|
||||||
|
r.task.target_role === task.target_role
|
||||||
|
);
|
||||||
|
const priorOtherGeo = ledger.some(r =>
|
||||||
|
r.build.ok &&
|
||||||
|
r.task.target_role === task.target_role &&
|
||||||
|
!(r.task.target_city === task.target_city && r.task.target_state === task.target_state)
|
||||||
|
);
|
||||||
|
|
||||||
|
let geo_discrimination_ok: boolean;
|
||||||
|
if (priorMatchesThisGeo) {
|
||||||
|
geo_discrimination_ok = probe.boostedHits > 0; // expected lift
|
||||||
|
} else if (priorOtherGeo) {
|
||||||
|
geo_discrimination_ok = probe.boostedHits === 0; // must NOT bleed
|
||||||
|
} else {
|
||||||
|
geo_discrimination_ok = true; // no signal expected either way
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spin up the fresh verifier agent
|
||||||
|
const priorPlaybooks = ledger.filter(r => r.build.ok).map(r => ({
|
||||||
|
op: r.task.operation, fills: r.build.fills.map(f => f.name),
|
||||||
|
}));
|
||||||
|
|
||||||
|
let verdict = "verifier failed to respond"; let confidence = 0;
|
||||||
|
try {
|
||||||
|
const raw = await generate(VERIFIER_MODEL, verifierPrompt(task, priorPlaybooks, probe), {
|
||||||
|
temperature: 0.1, max_tokens: 250,
|
||||||
|
});
|
||||||
|
const start = raw.indexOf("{"), end = raw.lastIndexOf("}");
|
||||||
|
if (start >= 0 && end > start) {
|
||||||
|
const j = JSON.parse(raw.slice(start, end + 1));
|
||||||
|
verdict = j.verdict ?? verdict;
|
||||||
|
confidence = Number(j.confidence) || 0;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
verdict = `verifier parse error: ${(e as Error).message}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
profile_activated: act.ok,
|
||||||
|
warmed_indexes: act.warmed,
|
||||||
|
probe_boost_total: probe.totalBoost,
|
||||||
|
probe_boosted_hits: probe.boostedHits,
|
||||||
|
probe_top_citations: probe.cites,
|
||||||
|
geo_discrimination_ok,
|
||||||
|
verdict, confidence,
|
||||||
|
duration_secs: Math.round((Date.now() - t0) / 1000),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────── round scoring ───────────────────────
|
||||||
|
|
||||||
|
function scoreRound(r: RoundLedger): { score: number; notes: string[] } {
|
||||||
|
const notes: string[] = [];
|
||||||
|
let s = 0;
|
||||||
|
if (r.build.ok) { s += 3; notes.push(`✓ build sealed (${r.build.fills.map(f => f.name).join(", ")})`); }
|
||||||
|
else { notes.push(`✗ build failed: ${r.build.error}`); }
|
||||||
|
if (r.build.playbook_id) { s += 1; notes.push(`✓ seeded id=${r.build.playbook_id}`); }
|
||||||
|
if (r.verify.profile_activated) { s += 1; notes.push(`✓ profile activated (warmed=${r.verify.warmed_indexes})`); }
|
||||||
|
if (r.verify.geo_discrimination_ok) { s += 3; notes.push(`✓ geo discrimination correct (boostedHits=${r.verify.probe_boosted_hits})`); }
|
||||||
|
else { notes.push(`✗ geo discrimination failed (boostedHits=${r.verify.probe_boosted_hits})`); }
|
||||||
|
if (r.verify.confidence >= 60) { s += 2; notes.push(`✓ verifier confident (${r.verify.confidence}%): ${r.verify.verdict}`); }
|
||||||
|
else { notes.push(`◑ verifier confidence ${r.verify.confidence}%: ${r.verify.verdict}`); }
|
||||||
|
return { score: s, notes };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────── main loop ───────────────────────
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log(`▶ Network proving — ${TASK_DECK.length} rounds, profile=${PROFILE_ID}`);
|
||||||
|
console.log(`▶ build pair: ${EXECUTOR_MODEL} + ${REVIEWER_MODEL}; verifier: ${VERIFIER_MODEL}\n`);
|
||||||
|
|
||||||
|
const ledger: RoundLedger[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < TASK_DECK.length; i++) {
|
||||||
|
const task = TASK_DECK[i];
|
||||||
|
console.log(`\n══════════ Round ${i} — ${task.operation} ══════════`);
|
||||||
|
|
||||||
|
console.log(`\n[${task.id}] BUILD phase (two agents collaborating)`);
|
||||||
|
const build = await buildPhase(task, task.id);
|
||||||
|
|
||||||
|
console.log(`\n[${task.id}] VERIFY phase (fresh agent + hot-swap profile)`);
|
||||||
|
const verify = await verifyPhase(task, ledger);
|
||||||
|
console.log(` profile=${verify.profile_activated ? "ok" : "fail"} warmed=${verify.warmed_indexes} `
|
||||||
|
+ `boosted=${verify.probe_boosted_hits}/10 totalBoost=${verify.probe_boost_total.toFixed(3)} `
|
||||||
|
+ `cites=${verify.probe_top_citations.length} confidence=${verify.confidence}%`);
|
||||||
|
console.log(` verdict: ${verify.verdict}`);
|
||||||
|
|
||||||
|
const round: RoundLedger = { round: i, task, build, verify, score: 0, notes: [] };
|
||||||
|
const sc = scoreRound(round);
|
||||||
|
round.score = sc.score; round.notes = sc.notes;
|
||||||
|
ledger.push(round);
|
||||||
|
|
||||||
|
console.log(`\n Round ${i} score: ${round.score}/10`);
|
||||||
|
for (const n of round.notes) console.log(` ${n}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n══════════ Network viability summary ══════════`);
|
||||||
|
const total = ledger.reduce((s, r) => s + r.score, 0);
|
||||||
|
const max = ledger.length * 10;
|
||||||
|
const avg = total / ledger.length;
|
||||||
|
for (const r of ledger) console.log(` R${r.round} ${r.task.target_city.padEnd(10)} ${r.task.target_role.padEnd(20)} ${r.score}/10`);
|
||||||
|
console.log(`\n TOTAL: ${total}/${max} AVG: ${avg.toFixed(1)}/10`);
|
||||||
|
|
||||||
|
// Hard gate: at least 2/3 rounds must show the verifier is confident enough
|
||||||
|
// AND build phase succeeded
|
||||||
|
const passed = ledger.filter(r => r.build.ok && r.score >= 6).length;
|
||||||
|
if (passed < Math.ceil(ledger.length * 2 / 3)) {
|
||||||
|
throw new Error(`network proving gate failed — only ${passed}/${ledger.length} rounds passed (need ≥${Math.ceil(ledger.length * 2 / 3)})`);
|
||||||
|
}
|
||||||
|
console.log(`\n✓ Network proven over ${passed}/${ledger.length} rounds`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => {
|
||||||
|
console.error(`\n✗ ${(e as Error).message}`);
|
||||||
|
if ((e as any).stack) console.error((e as any).stack);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
302
tests/multi-agent/orchestrator.ts
Normal file
302
tests/multi-agent/orchestrator.ts
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
// Two-agent orchestrator. Both agents run as concurrent async loops
|
||||||
|
// coordinated through a shared in-memory log; one turn of executor then
|
||||||
|
// one turn of reviewer, interleaved until consensus_done, drift-cycle
|
||||||
|
// blown, or hard turn cap. On success writes a playbook JSON; on failure
|
||||||
|
// exits non-zero with the full log for inspection.
|
||||||
|
//
|
||||||
|
// Fail-fast: every caught error is appended to the log AND rethrown, so
|
||||||
|
// the orchestrator top-level catches, dumps, and exits with code 1. The
|
||||||
|
// test harness reads the exit code to decide if the substrate is healthy.
|
||||||
|
|
||||||
|
import {
|
||||||
|
type LogEntry,
|
||||||
|
type TaskSpec,
|
||||||
|
type Action,
|
||||||
|
type Fill,
|
||||||
|
callTool,
|
||||||
|
hybridSearch,
|
||||||
|
sqlQuery,
|
||||||
|
generate,
|
||||||
|
parseAction,
|
||||||
|
executorPrompt,
|
||||||
|
reviewerPrompt,
|
||||||
|
GATEWAY,
|
||||||
|
} from "./agent.ts";
|
||||||
|
import { mkdir, writeFile } from "node:fs/promises";
|
||||||
|
import { join } from "node:path";
|
||||||
|
|
||||||
|
const EXECUTOR_MODEL = "mistral:latest";
|
||||||
|
const REVIEWER_MODEL = "qwen2.5:latest";
|
||||||
|
const MAX_TURNS = 12; // executor turns; reviewer gets one per
|
||||||
|
const MAX_CONSECUTIVE_DRIFTS = 3; // drift-cycle blown → give up
|
||||||
|
|
||||||
|
// Default task. Override via argv[2] if you want something else; see
|
||||||
|
// `parseTaskFromArg`. Picked from the real-world staffing pattern but
|
||||||
|
// not in the existing successful_playbooks list — this is a fresh fill.
|
||||||
|
// Default task lifted from the production pattern in successful_playbooks.
|
||||||
|
// Toledo, OH has 342 welders in workers_500k so supply is ample — the test
|
||||||
|
// is about collaboration and drift correction, not needle-in-haystack.
|
||||||
|
const DEFAULT_TASK: TaskSpec = {
|
||||||
|
id: `task-${Date.now()}`,
|
||||||
|
operation: "fill: Welder x2 in Toledo, OH",
|
||||||
|
target_role: "Welder",
|
||||||
|
target_count: 2,
|
||||||
|
target_city: "Toledo",
|
||||||
|
target_state: "OH",
|
||||||
|
approach_hint: "hybrid search against workers_500k_v1, narrow by role+city+state+availability, rank semantically",
|
||||||
|
};
|
||||||
|
|
||||||
|
function parseTaskFromArg(): TaskSpec {
|
||||||
|
const arg = process.argv[2];
|
||||||
|
if (!arg) return DEFAULT_TASK;
|
||||||
|
// Accept "role:Welder count:2 city:Columbus state:OH" style for ad-hoc
|
||||||
|
// tasks without standing up a JSON file. Anything more complex, feed
|
||||||
|
// it a JSON path.
|
||||||
|
if (arg.endsWith(".json")) {
|
||||||
|
return JSON.parse(require("node:fs").readFileSync(arg, "utf-8"));
|
||||||
|
}
|
||||||
|
const kv: Record<string, string> = {};
|
||||||
|
for (const token of arg.split(/\s+/)) {
|
||||||
|
const [k, ...v] = token.split(":");
|
||||||
|
kv[k] = v.join(":");
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
id: `task-${Date.now()}`,
|
||||||
|
operation: `fill: ${kv.role} x${kv.count} in ${kv.city}, ${kv.state}`,
|
||||||
|
target_role: kv.role,
|
||||||
|
target_count: Number(kv.count),
|
||||||
|
target_city: kv.city,
|
||||||
|
target_state: kv.state,
|
||||||
|
approach_hint: kv.hint ?? "hybrid search",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: pretty one-line print for each log entry so the human watching
|
||||||
|
// stdout can follow without pulling the JSONL file.
|
||||||
|
// Defensive one-line formatter. Models sometimes omit optional fields
|
||||||
|
// (rationale, notes), so every access is guarded.
|
||||||
|
function fmt(e: LogEntry): string {
|
||||||
|
const tag = `[t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
|
||||||
|
const c = e.content ?? {};
|
||||||
|
const trim = (s: any, n: number) => String(s ?? "").slice(0, n);
|
||||||
|
if (e.kind === "tool_call")
|
||||||
|
return `${tag} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 80)}) — ${trim(c.rationale, 60)}`;
|
||||||
|
if (e.kind === "tool_result") {
|
||||||
|
const rows = c?.rows?.length ?? c?.sources?.length ?? undefined;
|
||||||
|
return `${tag} ${rows !== undefined ? `rows=${rows}` : JSON.stringify(c).slice(0, 80)}`;
|
||||||
|
}
|
||||||
|
if (e.kind === "critique") return `${tag} verdict=${c.verdict} — ${trim(c.notes, 80)}`;
|
||||||
|
if (e.kind === "propose_done")
|
||||||
|
return `${tag} ${c.fills?.length ?? 0} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
|
||||||
|
if (e.kind === "consensus_done") return `${tag} ✓`;
|
||||||
|
if (e.kind === "plan") return `${tag} ${c.steps?.length ?? 0} steps: ${(c.steps ?? []).slice(0, 2).join(" / ")}`;
|
||||||
|
if (e.kind === "error") return `${tag} ${c.message ?? c}`;
|
||||||
|
return `${tag} ${JSON.stringify(c).slice(0, 100)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute one tool call. The tool catalog in the prompt lists both the
|
||||||
|
// registered Phase 12 tools AND a pseudo-tool "hybrid_search" for the
|
||||||
|
// /vectors/hybrid endpoint — unify here so the executor doesn't need to
|
||||||
|
// know which surface a capability lives on.
|
||||||
|
async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
|
||||||
|
if (name === "hybrid_search") {
|
||||||
|
const { sql_filter, question, index_name, k } = args;
|
||||||
|
if (!sql_filter || !question || !index_name) {
|
||||||
|
throw new Error(`hybrid_search needs sql_filter + question + index_name, got ${JSON.stringify(args)}`);
|
||||||
|
}
|
||||||
|
// Pass through to /vectors/hybrid. id_column defaults to worker_id
|
||||||
|
// server-side, which is what workers_500k uses.
|
||||||
|
const body: any = { sql_filter, question, index_name, top_k: k ?? 10, generate: false };
|
||||||
|
return (await (await fetch("http://localhost:3100/vectors/hybrid", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
})).json());
|
||||||
|
}
|
||||||
|
if (name === "sql") {
|
||||||
|
const { query } = args;
|
||||||
|
if (!query || typeof query !== "string") throw new Error(`sql needs query (string), got ${JSON.stringify(args)}`);
|
||||||
|
if (!/^\s*SELECT/i.test(query)) throw new Error(`sql tool allows SELECT only: ${query}`);
|
||||||
|
return sqlQuery(query);
|
||||||
|
}
|
||||||
|
// Fall through to Phase 12 registry for any other named tool.
|
||||||
|
return callTool(name, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const task = parseTaskFromArg();
|
||||||
|
const log: LogEntry[] = [];
|
||||||
|
let turn = 0;
|
||||||
|
let consecutiveDrifts = 0;
|
||||||
|
let sealed: { fills: Fill[]; approach: string } | null = null;
|
||||||
|
|
||||||
|
const append = (e: Omit<LogEntry, "at">): LogEntry => {
|
||||||
|
const full: LogEntry = { ...e, at: new Date().toISOString() };
|
||||||
|
log.push(full);
|
||||||
|
console.log(fmt(full));
|
||||||
|
return full;
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`▶ task: ${task.operation}`);
|
||||||
|
console.log(`▶ executor=${EXECUTOR_MODEL} reviewer=${REVIEWER_MODEL}`);
|
||||||
|
console.log();
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (turn < MAX_TURNS && !sealed) {
|
||||||
|
turn += 1;
|
||||||
|
|
||||||
|
// --- EXECUTOR TURN ---
|
||||||
|
const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), {
|
||||||
|
temperature: 0.2,
|
||||||
|
max_tokens: 600,
|
||||||
|
});
|
||||||
|
let execAction: Action;
|
||||||
|
try {
|
||||||
|
execAction = parseAction(execRaw, "executor");
|
||||||
|
} catch (e) {
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "error",
|
||||||
|
content: { message: (e as Error).message, raw: execRaw.slice(0, 400) } });
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL,
|
||||||
|
kind: execAction.kind as any, content: execAction });
|
||||||
|
|
||||||
|
// If tool_call, execute and feed result back into the log. Tool
|
||||||
|
// validation / server errors come back as a tool_result with an
|
||||||
|
// `error` field — the executor reads its own error on the next turn
|
||||||
|
// and self-corrects (e.g. "oh, I forgot the `question` argument").
|
||||||
|
// This is softer than hard-failing the orchestrator: the whole
|
||||||
|
// point of two-agent collaboration is letting agents learn from
|
||||||
|
// immediate feedback instead of crashing the run.
|
||||||
|
if (execAction.kind === "tool_call") {
|
||||||
|
try {
|
||||||
|
const result = await executeToolCall(execAction.tool, execAction.args);
|
||||||
|
const trimmed = trimResult(result);
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL,
|
||||||
|
kind: "tool_result", content: trimmed });
|
||||||
|
} catch (e) {
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL,
|
||||||
|
kind: "tool_result",
|
||||||
|
content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
|
||||||
|
// Count as a soft drift — if the executor keeps throwing tool
|
||||||
|
// errors, consecutiveDrifts still trips the abort.
|
||||||
|
consecutiveDrifts += 1;
|
||||||
|
if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
|
||||||
|
throw new Error(`aborting — ${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors, executor can't self-correct`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- REVIEWER TURN ---
|
||||||
|
const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), {
|
||||||
|
temperature: 0.1,
|
||||||
|
max_tokens: 400,
|
||||||
|
});
|
||||||
|
let revAction: Action;
|
||||||
|
try {
|
||||||
|
revAction = parseAction(revRaw, "reviewer");
|
||||||
|
} catch (e) {
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "error",
|
||||||
|
content: { message: (e as Error).message, raw: revRaw.slice(0, 400) } });
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL,
|
||||||
|
kind: "critique", content: revAction });
|
||||||
|
|
||||||
|
if (revAction.kind !== "critique") throw new Error(`reviewer emitted non-critique: ${revAction.kind}`);
|
||||||
|
|
||||||
|
if (revAction.verdict === "drift") {
|
||||||
|
consecutiveDrifts += 1;
|
||||||
|
if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
|
||||||
|
throw new Error(`aborting — ${MAX_CONSECUTIVE_DRIFTS} consecutive drift flags, executor can't self-correct`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
consecutiveDrifts = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consensus: executor proposed done AND reviewer approved.
|
||||||
|
if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
|
||||||
|
if (execAction.fills.length !== task.target_count) {
|
||||||
|
throw new Error(`consensus malformed — ${execAction.fills.length} fills vs target ${task.target_count}`);
|
||||||
|
}
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done",
|
||||||
|
content: { fills: execAction.fills } });
|
||||||
|
sealed = { fills: execAction.fills, approach: execAction.rationale };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns — task incomplete`);
|
||||||
|
|
||||||
|
// Write playbook entry matching the successful_playbooks schema.
|
||||||
|
const playbook = {
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
operation: task.operation,
|
||||||
|
approach: sealed.approach,
|
||||||
|
result: `${sealed.fills.length}/${task.target_count} filled → ${sealed.fills.map(f => f.name).join(", ")}`,
|
||||||
|
context: `executor=${EXECUTOR_MODEL} reviewer=${REVIEWER_MODEL} turns=${turn}`,
|
||||||
|
task,
|
||||||
|
fills: sealed.fills,
|
||||||
|
log,
|
||||||
|
};
|
||||||
|
await mkdir("./tests/multi-agent/playbooks", { recursive: true });
|
||||||
|
const path = join("./tests/multi-agent/playbooks", `${task.id}.json`);
|
||||||
|
await writeFile(path, JSON.stringify(playbook, null, 2));
|
||||||
|
console.log(`\n✓ playbook written: ${path}`);
|
||||||
|
console.log(` ${playbook.result}`);
|
||||||
|
|
||||||
|
// Phase 19.5: write-through to playbook_memory. The sealed fills are
|
||||||
|
// the endorsement; next semantically-similar query will surface them
|
||||||
|
// higher. /seed bypasses the successful_playbooks ingest round-trip
|
||||||
|
// — when that ingest path ships, this block should switch to append
|
||||||
|
// + rebuild instead.
|
||||||
|
try {
|
||||||
|
// Seed context is what the embedding model actually sees alongside
|
||||||
|
// the operation — so it has to carry task-semantic content (role,
|
||||||
|
// city, scenario) rather than orchestrator bookkeeping. We stash
|
||||||
|
// the bookkeeping in the full playbook JSON instead (see playbook
|
||||||
|
// object above) where operators can grep it without it polluting
|
||||||
|
// the ranking signal.
|
||||||
|
const seedContext = task.approach_hint
|
||||||
|
?? `${task.target_role} fill in ${task.target_city}, ${task.target_state}`;
|
||||||
|
const seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
operation: task.operation,
|
||||||
|
approach: sealed.approach || "multi-agent → hybrid search",
|
||||||
|
context: seedContext,
|
||||||
|
endorsed_names: sealed.fills.map(f => f.name),
|
||||||
|
append: true,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (seedRes.ok) {
|
||||||
|
const j = await seedRes.json() as any;
|
||||||
|
console.log(` ↳ playbook_memory seeded: id=${j.playbook_id} entries=${j.entries_after}`);
|
||||||
|
} else {
|
||||||
|
console.warn(` ↳ playbook_memory seed failed: ${seedRes.status} ${await seedRes.text()}`);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(` ↳ playbook_memory seed errored: ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
process.exit(0);
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`\n✗ ${(e as Error).message}`);
|
||||||
|
// Still persist the log for inspection.
|
||||||
|
await mkdir("./tests/multi-agent/playbooks", { recursive: true });
|
||||||
|
const path = join("./tests/multi-agent/playbooks", `${task.id}-FAILED.json`);
|
||||||
|
await writeFile(path, JSON.stringify({ task, error: (e as Error).message, log }, null, 2));
|
||||||
|
console.error(` log dumped: ${path}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function trimResult(r: any): any {
|
||||||
|
if (r && Array.isArray(r.rows)) {
|
||||||
|
return { ...r, rows: r.rows.slice(0, 20), _trimmed: r.rows.length > 20 ? `${r.rows.length - 20} more rows` : undefined };
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
400
tests/multi-agent/run_e2e_rated.ts
Normal file
400
tests/multi-agent/run_e2e_rated.ts
Normal file
@ -0,0 +1,400 @@
|
|||||||
|
// Two-agent x two-tasks parallel real-world test with per-playbook rating.
|
||||||
|
//
|
||||||
|
// Spawns two independent (executor, reviewer) pairs concurrently, each
|
||||||
|
// driving a different staffing fill against the live substrate. After
|
||||||
|
// each pair seals a playbook, verifies the fill against workers_500k,
|
||||||
|
// confirms the seed reached playbook_memory, and re-runs the same query
|
||||||
|
// with use_playbook_memory=true to prove the boost fires.
|
||||||
|
//
|
||||||
|
// Errors fail fast — any HTTP error, parse error, or rating failure is
|
||||||
|
// rethrown so bun exits non-zero. Run with:
|
||||||
|
//
|
||||||
|
// bun run tests/multi-agent/run_e2e_rated.ts
|
||||||
|
//
|
||||||
|
// VRAM note: both pairs call the same two Ollama models (mistral +
|
||||||
|
// qwen2.5). Ollama queues at the model level, so "parallel" is concurrent
|
||||||
|
// orchestration, not concurrent inference — the loops interleave on the
|
||||||
|
// shared models. That's intentional: it stresses the same realistic
|
||||||
|
// path two staffing coordinators would hit if they both opened the app
|
||||||
|
// at 8am.
|
||||||
|
|
||||||
|
import {
|
||||||
|
type LogEntry,
|
||||||
|
type TaskSpec,
|
||||||
|
type Action,
|
||||||
|
type Fill,
|
||||||
|
GATEWAY,
|
||||||
|
generate,
|
||||||
|
parseAction,
|
||||||
|
executorPrompt,
|
||||||
|
reviewerPrompt,
|
||||||
|
sqlQuery,
|
||||||
|
callTool,
|
||||||
|
} from "./agent.ts";
|
||||||
|
|
||||||
|
const EXECUTOR_MODEL = "mistral:latest";
|
||||||
|
const REVIEWER_MODEL = "qwen2.5:latest";
|
||||||
|
const MAX_TURNS = 12;
|
||||||
|
const MAX_CONSECUTIVE_DRIFTS = 3;
|
||||||
|
const INDEX_NAME = "workers_500k_v1";
|
||||||
|
|
||||||
|
interface RunResult {
|
||||||
|
task: TaskSpec;
|
||||||
|
ok: boolean;
|
||||||
|
turns: number;
|
||||||
|
duration_secs: number;
|
||||||
|
fills: Fill[];
|
||||||
|
log: LogEntry[];
|
||||||
|
approach: string;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ────────────────────────── orchestrator (function form) ──────────────────────────
|
||||||
|
|
||||||
|
async function runOrchestrator(task: TaskSpec, prefix: string): Promise<RunResult> {
|
||||||
|
const start = Date.now();
|
||||||
|
const log: LogEntry[] = [];
|
||||||
|
let turn = 0;
|
||||||
|
let consecutiveDrifts = 0;
|
||||||
|
// Track tool errors separately from drift verdicts. Reviewer saying
|
||||||
|
// "continue" or "approve_done" should NOT reset a streak of malformed
|
||||||
|
// tool calls — that's a different failure mode (model can't form the
|
||||||
|
// call) than "executor is on the wrong path" (model is off-topic).
|
||||||
|
let consecutiveToolErrors = 0;
|
||||||
|
let sealed: { fills: Fill[]; approach: string } | null = null;
|
||||||
|
|
||||||
|
const append = (e: Omit<LogEntry, "at">): LogEntry => {
|
||||||
|
const full: LogEntry = { ...e, at: new Date().toISOString() };
|
||||||
|
log.push(full);
|
||||||
|
console.log(`[${prefix}] [t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}] ${shortContent(e)}`);
|
||||||
|
return full;
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (turn < MAX_TURNS && !sealed) {
|
||||||
|
turn += 1;
|
||||||
|
|
||||||
|
// Executor
|
||||||
|
const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 600 });
|
||||||
|
const execAction = parseAction(execRaw, "executor");
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });
|
||||||
|
|
||||||
|
if (execAction.kind === "tool_call") {
|
||||||
|
try {
|
||||||
|
const result = await executeToolCall(execAction.tool, execAction.args);
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", content: trimResult(result) });
|
||||||
|
consecutiveToolErrors = 0;
|
||||||
|
} catch (e) {
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
|
||||||
|
content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
|
||||||
|
consecutiveToolErrors += 1;
|
||||||
|
if (consecutiveToolErrors >= MAX_CONSECUTIVE_DRIFTS) {
|
||||||
|
throw new Error(`${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors — executor can't form a valid call`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reviewer
|
||||||
|
const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 400 });
|
||||||
|
const revAction = parseAction(revRaw, "reviewer");
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });
|
||||||
|
|
||||||
|
if (revAction.kind !== "critique") throw new Error(`reviewer non-critique: ${revAction.kind}`);
|
||||||
|
if (revAction.verdict === "drift") {
|
||||||
|
consecutiveDrifts += 1;
|
||||||
|
if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) throw new Error(`${MAX_CONSECUTIVE_DRIFTS} consecutive drifts`);
|
||||||
|
} else consecutiveDrifts = 0;
|
||||||
|
|
||||||
|
if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
|
||||||
|
if (execAction.fills.length !== task.target_count) {
|
||||||
|
throw new Error(`fills=${execAction.fills.length} target=${task.target_count}`);
|
||||||
|
}
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", content: { fills: execAction.fills } });
|
||||||
|
sealed = { fills: execAction.fills, approach: (execAction as any).rationale ?? "multi-agent → hybrid" };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
|
||||||
|
|
||||||
|
// Phase 19 write-through: seed playbook_memory so the next semantically
|
||||||
|
// similar query benefits from this fill. Mirrors orchestrator.ts. Names
|
||||||
|
// are the consensus fills' display names — that's what the boost-key
|
||||||
|
// matcher (city, state, name) will look up against worker chunks.
|
||||||
|
try {
|
||||||
|
const seedRes = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
|
||||||
|
method: "POST", headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
operation: task.operation,
|
||||||
|
approach: sealed.approach || "multi-agent → hybrid search",
|
||||||
|
context: task.approach_hint ?? `${task.target_role} fill in ${task.target_city}, ${task.target_state}`,
|
||||||
|
endorsed_names: sealed.fills.map(f => f.name),
|
||||||
|
append: true,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (!seedRes.ok) {
|
||||||
|
console.warn(`[${prefix}] seed warning: ${seedRes.status} ${await seedRes.text()}`);
|
||||||
|
} else {
|
||||||
|
const j = await seedRes.json() as any;
|
||||||
|
console.log(`[${prefix}] ↳ seeded playbook_memory: id=${j.playbook_id} entries=${j.entries_after}`);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`[${prefix}] seed errored: ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
task, ok: true, turns: turn, fills: sealed.fills, approach: sealed.approach,
|
||||||
|
duration_secs: Math.round((Date.now() - start) / 1000), log,
|
||||||
|
};
|
||||||
|
} catch (e) {
|
||||||
|
return {
|
||||||
|
task, ok: false, turns: turn, fills: [], approach: "",
|
||||||
|
duration_secs: Math.round((Date.now() - start) / 1000), log,
|
||||||
|
error: (e as Error).message,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
|
||||||
|
if (name === "hybrid_search") {
|
||||||
|
const { sql_filter, question, index_name, k } = args;
|
||||||
|
if (!sql_filter || !question || !index_name) throw new Error(`hybrid_search needs sql_filter+question+index_name`);
|
||||||
|
const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
|
||||||
|
method: "POST", headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ sql_filter, question, index_name, top_k: k ?? 10, generate: false, use_playbook_memory: true }),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(`hybrid_search → ${r.status}: ${await r.text()}`);
|
||||||
|
return r.json();
|
||||||
|
}
|
||||||
|
if (name === "sql") {
|
||||||
|
if (!args.query || typeof args.query !== "string") throw new Error("sql needs query");
|
||||||
|
if (!/^\s*SELECT/i.test(args.query)) throw new Error("sql allows SELECT only");
|
||||||
|
return sqlQuery(args.query);
|
||||||
|
}
|
||||||
|
return callTool(name, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
function trimResult(r: any): any {
|
||||||
|
if (r && Array.isArray(r.rows)) return { ...r, rows: r.rows.slice(0, 20) };
|
||||||
|
if (r && Array.isArray(r.sources)) return { ...r, sources: r.sources.slice(0, 12) };
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
function shortContent(e: Omit<LogEntry, "at">): string {
|
||||||
|
const c: any = e.content ?? {};
|
||||||
|
if (e.kind === "tool_call") return `${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 70)})`;
|
||||||
|
if (e.kind === "tool_result") {
|
||||||
|
if (c.error) return `error: ${c.error}`;
|
||||||
|
if (Array.isArray(c.sources)) return `hybrid sql=${c.sql_matches} reranked=${c.vector_reranked}`;
|
||||||
|
if (Array.isArray(c.rows)) return `sql ${c.rows.length} rows`;
|
||||||
|
return JSON.stringify(c).slice(0, 80);
|
||||||
|
}
|
||||||
|
if (e.kind === "critique") return `verdict=${c.verdict} ${(c.notes ?? "").slice(0, 60)}`;
|
||||||
|
if (e.kind === "propose_done") return `${(c.fills ?? []).length} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
|
||||||
|
if (e.kind === "consensus_done") return "✓";
|
||||||
|
if (e.kind === "plan") return `${(c.steps ?? []).length} steps`;
|
||||||
|
return JSON.stringify(c).slice(0, 80);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ────────────────────────── playbook rating ──────────────────────────
|
||||||
|
|
||||||
|
interface Rating {
|
||||||
|
geo: number; // 0-2: fills actually in target city/state
|
||||||
|
authenticity: number; // 0-2: fills' worker_ids exist in workers_500k
|
||||||
|
persistence: number; // 0-2: playbook_memory entry count grew correctly
|
||||||
|
boost_firing: number; // 0-3: follow-up query shows non-zero boost
|
||||||
|
speed: number; // 0-1: completed under 4 min
|
||||||
|
total: number; // /10
|
||||||
|
notes: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MemoryStats { entries: number; total_names_endorsed: number }
|
||||||
|
|
||||||
|
async function fetchMemoryStats(): Promise<MemoryStats> {
|
||||||
|
const r = await fetch(`${GATEWAY}/vectors/playbook_memory/stats`);
|
||||||
|
if (!r.ok) throw new Error(`stats → ${r.status}`);
|
||||||
|
return r.json() as Promise<MemoryStats>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to resolve a fill's candidate_id to a workers_500k row. Accepts
|
||||||
|
// "W500K-7995" (vector doc_id with prefix) and "7995" (raw worker_id).
|
||||||
|
async function lookupWorker(candidate_id: string): Promise<{ worker_id: number; name: string; city: string; state: string; role: string } | null> {
|
||||||
|
const numStr = candidate_id.replace(/^W500K-/i, "").replace(/[^\d]/g, "");
|
||||||
|
if (!numStr) return null;
|
||||||
|
const num = parseInt(numStr, 10);
|
||||||
|
if (!Number.isFinite(num)) return null;
|
||||||
|
const r = await sqlQuery(`SELECT worker_id, name, city, state, role FROM workers_500k WHERE worker_id = ${num} LIMIT 1`);
|
||||||
|
return (r.rows && r.rows[0]) ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-run a hybrid query that mirrors the contract — proves the freshly
|
||||||
|
// seeded playbook actually lifts a future search.
|
||||||
|
async function verifyBoostFires(task: TaskSpec): Promise<{ boostedHits: number; sampleCitations: string[]; topBoost: number }> {
|
||||||
|
// Mirror the contract's actual geo. The playbook stored (city, state)
|
||||||
|
// from the operation; if the verify SQL doesn't restrict to the same
|
||||||
|
// city, the candidate pool may not include the seeded workers and the
|
||||||
|
// boost has nothing to lift. The contract pattern in production also
|
||||||
|
// includes city — recruiters fill specific cities, not whole states.
|
||||||
|
const sql_filter = `role = '${task.target_role.replace(/'/g, "''")}' `
|
||||||
|
+ `AND state = '${task.target_state}' `
|
||||||
|
+ `AND city = '${task.target_city.replace(/'/g, "''")}'`;
|
||||||
|
const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
|
||||||
|
method: "POST", headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
index_name: INDEX_NAME, filter_dataset: "workers_500k", id_column: "worker_id",
|
||||||
|
sql_filter, question: `${task.target_role} in ${task.target_city}, ${task.target_state}`,
|
||||||
|
top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(`verify hybrid → ${r.status}: ${await r.text()}`);
|
||||||
|
const j = (await r.json()) as any;
|
||||||
|
const sources: any[] = j.sources ?? [];
|
||||||
|
const boosted = sources.filter(s => (s.playbook_boost ?? 0) > 0);
|
||||||
|
const cites = boosted.flatMap(s => s.playbook_citations ?? []).slice(0, 5);
|
||||||
|
const top = sources.reduce((m, s) => Math.max(m, s.playbook_boost ?? 0), 0);
|
||||||
|
return { boostedHits: boosted.length, sampleCitations: cites, topBoost: top };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ratePlaybook(
|
||||||
|
result: RunResult,
|
||||||
|
statsBefore: MemoryStats,
|
||||||
|
statsAfter: MemoryStats,
|
||||||
|
): Promise<Rating> {
|
||||||
|
const notes: string[] = [];
|
||||||
|
let geo = 0, authenticity = 0, persistence = 0, boost_firing = 0, speed = 0;
|
||||||
|
|
||||||
|
// 1. Geo + authenticity per fill
|
||||||
|
for (const f of result.fills) {
|
||||||
|
const w = await lookupWorker(f.candidate_id).catch(() => null);
|
||||||
|
if (!w) { notes.push(`✗ candidate_id ${f.candidate_id} not in workers_500k`); continue; }
|
||||||
|
authenticity += 1;
|
||||||
|
if (w.city.toLowerCase() === result.task.target_city.toLowerCase()
|
||||||
|
&& w.state === result.task.target_state) {
|
||||||
|
geo += 1;
|
||||||
|
} else {
|
||||||
|
notes.push(`◑ ${w.name} (id=${w.worker_id}) is in ${w.city}, ${w.state}, not ${result.task.target_city}, ${result.task.target_state}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
geo = Math.min(geo, 2);
|
||||||
|
authenticity = Math.min(authenticity, 2);
|
||||||
|
|
||||||
|
// 2. Persistence
|
||||||
|
const grew = statsAfter.entries - statsBefore.entries;
|
||||||
|
if (grew === 1) { persistence = 2; notes.push(`✓ playbook_memory grew by exactly 1`); }
|
||||||
|
else if (grew >= 1) { persistence = 1; notes.push(`◑ playbook_memory grew by ${grew} (expected 1)`); }
|
||||||
|
else { notes.push(`✗ playbook_memory did not grow (before=${statsBefore.entries} after=${statsAfter.entries})`); }
|
||||||
|
|
||||||
|
// 3. Boost firing — re-run the same query and see if it lifts anything
|
||||||
|
const v = await verifyBoostFires(result.task).catch(e => { notes.push(`✗ verify hybrid failed: ${(e as Error).message}`); return null; });
|
||||||
|
if (v) {
|
||||||
|
if (v.boostedHits >= 2) boost_firing = 3;
|
||||||
|
else if (v.boostedHits === 1) boost_firing = 2;
|
||||||
|
else if (v.topBoost > 0) boost_firing = 1;
|
||||||
|
else boost_firing = 0;
|
||||||
|
notes.push(`boost re-query: ${v.boostedHits}/10 hits boosted, top=+${v.topBoost.toFixed(3)}, citations=${v.sampleCitations.slice(0, 3).join(",")}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Speed
|
||||||
|
if (result.duration_secs <= 240) speed = 1;
|
||||||
|
else notes.push(`◑ slow: ${result.duration_secs}s (>240)`);
|
||||||
|
|
||||||
|
const total = geo + authenticity + persistence + boost_firing + speed;
|
||||||
|
return { geo, authenticity, persistence, boost_firing, speed, total, notes };
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtRating(r: Rating): string {
|
||||||
|
return `geo=${r.geo}/2 auth=${r.authenticity}/2 persist=${r.persistence}/2 boost=${r.boost_firing}/3 speed=${r.speed}/1 → ${r.total}/10`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ────────────────────────── main ──────────────────────────
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const taskA: TaskSpec = {
|
||||||
|
id: `e2e-A-${Date.now()}`,
|
||||||
|
operation: "fill: Welder x2 in Toledo, OH",
|
||||||
|
target_role: "Welder", target_count: 2, target_city: "Toledo", target_state: "OH",
|
||||||
|
approach_hint: "hybrid_search against workers_500k_v1 with sql_filter on role+city+state, then sql verify",
|
||||||
|
};
|
||||||
|
const taskB: TaskSpec = {
|
||||||
|
id: `e2e-B-${Date.now()}`,
|
||||||
|
operation: "fill: Forklift Operator x2 in Nashville, TN",
|
||||||
|
target_role: "Forklift Operator", target_count: 2, target_city: "Nashville", target_state: "TN",
|
||||||
|
approach_hint: "hybrid_search against workers_500k_v1 with sql_filter on role+city+state, then sql verify",
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`▶ parallel real-world test`);
|
||||||
|
console.log(` A: ${taskA.operation}`);
|
||||||
|
console.log(` B: ${taskB.operation}`);
|
||||||
|
console.log(` models: executor=${EXECUTOR_MODEL} reviewer=${REVIEWER_MODEL}\n`);
|
||||||
|
|
||||||
|
const statsBefore = await fetchMemoryStats();
|
||||||
|
console.log(`▶ playbook_memory before: ${statsBefore.entries} entries, ${statsBefore.total_names_endorsed} endorsed names\n`);
|
||||||
|
|
||||||
|
// Run both pairs in parallel. Each is its own (executor, reviewer)
|
||||||
|
// conversation; they do NOT see each other's logs.
|
||||||
|
const [resA, resB] = await Promise.all([
|
||||||
|
runOrchestrator(taskA, "A"),
|
||||||
|
runOrchestrator(taskB, "B"),
|
||||||
|
]);
|
||||||
|
|
||||||
|
console.log(`\n▶ both orchestrators returned`);
|
||||||
|
console.log(` A: ok=${resA.ok} turns=${resA.turns} ${resA.duration_secs}s ${resA.error ?? ""}`);
|
||||||
|
console.log(` B: ok=${resB.ok} turns=${resB.turns} ${resB.duration_secs}s ${resB.error ?? ""}`);
|
||||||
|
|
||||||
|
if (!resA.ok && !resB.ok) {
|
||||||
|
throw new Error(`both orchestrators failed — substrate or models in bad state`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const statsMid = await fetchMemoryStats();
|
||||||
|
console.log(`\n▶ playbook_memory after both runs: ${statsMid.entries} entries (+${statsMid.entries - statsBefore.entries})\n`);
|
||||||
|
|
||||||
|
// Rate each successful playbook. We compute persistence per task by
|
||||||
|
// splitting the growth — both seeded sequentially-ish, so each should
|
||||||
|
// contribute 1.
|
||||||
|
const ratings: Array<{ id: string; ok: boolean; rating?: Rating; error?: string }> = [];
|
||||||
|
|
||||||
|
if (resA.ok) {
|
||||||
|
const beforeForA: MemoryStats = { entries: statsBefore.entries, total_names_endorsed: statsBefore.total_names_endorsed };
|
||||||
|
const afterForA: MemoryStats = { entries: statsBefore.entries + (resA.fills.length > 0 ? 1 : 0), total_names_endorsed: statsBefore.total_names_endorsed };
|
||||||
|
// Use real measured numbers when they're unambiguous (only one task succeeded)
|
||||||
|
const ra = await ratePlaybook(resA, beforeForA, resB.ok ? afterForA : statsMid);
|
||||||
|
ratings.push({ id: "A", ok: true, rating: ra });
|
||||||
|
} else ratings.push({ id: "A", ok: false, error: resA.error });
|
||||||
|
|
||||||
|
if (resB.ok) {
|
||||||
|
const beforeForB: MemoryStats = resA.ok
|
||||||
|
? { entries: statsBefore.entries + 1, total_names_endorsed: statsBefore.total_names_endorsed }
|
||||||
|
: statsBefore;
|
||||||
|
const rb = await ratePlaybook(resB, beforeForB, statsMid);
|
||||||
|
ratings.push({ id: "B", ok: true, rating: rb });
|
||||||
|
} else ratings.push({ id: "B", ok: false, error: resB.error });
|
||||||
|
|
||||||
|
console.log(`\n▶ Per-playbook ratings:\n`);
|
||||||
|
for (const r of ratings) {
|
||||||
|
if (!r.ok) {
|
||||||
|
console.log(` ${r.id}: FAILED — ${r.error}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
console.log(` ${r.id}: ${fmtRating(r.rating!)}`);
|
||||||
|
for (const n of r.rating!.notes) console.log(` ${n}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const totals = ratings.filter(r => r.rating).map(r => r.rating!.total);
|
||||||
|
if (totals.length === 0) {
|
||||||
|
throw new Error(`no playbooks rated — both orchestrators failed`);
|
||||||
|
}
|
||||||
|
const min = Math.min(...totals);
|
||||||
|
const avg = totals.reduce((s, t) => s + t, 0) / totals.length;
|
||||||
|
console.log(`\n▶ Summary: avg=${avg.toFixed(1)}/10 min=${min}/10`);
|
||||||
|
|
||||||
|
// Hard gate: any rating below 5 means the loop is broken end-to-end.
|
||||||
|
if (min < 5) throw new Error(`rating gate failed — min ${min}/10 (need ≥5)`);
|
||||||
|
|
||||||
|
console.log(`\n✓ end-to-end real-world test passed`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => {
|
||||||
|
console.error(`\n✗ ${(e as Error).message}`);
|
||||||
|
if ((e as any).stack) console.error((e as any).stack);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
822
tests/multi-agent/scenario.ts
Normal file
822
tests/multi-agent/scenario.ts
Normal file
@ -0,0 +1,822 @@
|
|||||||
|
// A day in the life — the real-world scenario test.
|
||||||
|
//
|
||||||
|
// Runs six events against the live substrate: baseline_fill, recurring,
|
||||||
|
// expansion, emergency, misplacement, retrospective. Each event
|
||||||
|
// exercises a different pressure pattern; each one produces actionable
|
||||||
|
// artifacts (SMS drafts, client emails, dispatch log) alongside the
|
||||||
|
// ranking output; the run as a whole is self-audited at EOD against six
|
||||||
|
// gap categories (supply, embedding, fairness, drift, tool, write-through).
|
||||||
|
//
|
||||||
|
// Design notes:
|
||||||
|
// - Compressed clock. The "08:00" in an event spec is a label for the
|
||||||
|
// output, not a wall-clock gate. The full scenario runs in minutes.
|
||||||
|
// - One script, shared state. Each event mutates the same roster +
|
||||||
|
// gap_signals + artifacts in-memory, then persists at EOD.
|
||||||
|
// - Fail-soft per event. A drift-abort or tool error on one event
|
||||||
|
// records a gap_signal and moves on; we explicitly want to see which
|
||||||
|
// events the substrate can't handle, not abort the whole run.
|
||||||
|
// - Every fill event routes through the same executor/reviewer loop as
|
||||||
|
// the single-task orchestrator — just driven in sequence rather than
|
||||||
|
// standalone, with event-specific extra constraints in the prompt.
|
||||||
|
|
||||||
|
import {
|
||||||
|
type LogEntry,
|
||||||
|
type TaskSpec,
|
||||||
|
type Action,
|
||||||
|
type Fill,
|
||||||
|
callTool,
|
||||||
|
hybridSearch,
|
||||||
|
sqlQuery,
|
||||||
|
generate,
|
||||||
|
parseAction,
|
||||||
|
executorPrompt,
|
||||||
|
reviewerPrompt,
|
||||||
|
GATEWAY,
|
||||||
|
} from "./agent.ts";
|
||||||
|
import { mkdir, writeFile, appendFile } from "node:fs/promises";
|
||||||
|
import { join } from "node:path";
|
||||||
|
|
||||||
|
const EXECUTOR_MODEL = "mistral:latest";
|
||||||
|
const REVIEWER_MODEL = "qwen2.5:latest";
|
||||||
|
const DRAFT_MODEL = "qwen2.5:latest"; // artifact generation; short outputs
|
||||||
|
const MAX_TURNS = 14;
|
||||||
|
const MAX_CONSECUTIVE_DRIFTS = 3;
|
||||||
|
const WORKERS_INDEX = "workers_500k_v1";
|
||||||
|
const WORKERS_DATASET = "workers_500k";
|
||||||
|
|
||||||
|
// =================== Event + scenario types ===================
|
||||||
|
|
||||||
|
type EventKind = "baseline_fill" | "recurring" | "expansion" | "emergency" | "misplacement";
|
||||||
|
|
||||||
|
interface FillEvent {
|
||||||
|
kind: EventKind;
|
||||||
|
at: string; // display label like "08:00"
|
||||||
|
role: string;
|
||||||
|
count: number;
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
shift_start?: string; // "08:00 AM" for SMS/email drafts
|
||||||
|
scenario_note?: string; // extra context the agents should know
|
||||||
|
deadline?: string; // emergency events carry this, shown to reviewer
|
||||||
|
exclude_worker_ids?: string[]; // misplacement: the lost worker
|
||||||
|
replaces_event?: string; // misplacement back-ref for reporting
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScenarioSpec {
|
||||||
|
client: string;
|
||||||
|
date: string;
|
||||||
|
events: FillEvent[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface EventResult {
|
||||||
|
event: FillEvent;
|
||||||
|
ok: boolean;
|
||||||
|
fills: Fill[];
|
||||||
|
turns: number;
|
||||||
|
duration_secs: number;
|
||||||
|
error?: string;
|
||||||
|
gap_signals: string[]; // pulled into the cross-event gap report
|
||||||
|
sources_first_score?: number;
|
||||||
|
sources_last_score?: number;
|
||||||
|
pool_size?: number; // sql_matches from the first hybrid_search
|
||||||
|
playbook_citations?: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RosterEntry {
|
||||||
|
worker_id: string;
|
||||||
|
name: string;
|
||||||
|
booked_for: string; // event at-label
|
||||||
|
role: string;
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
status: "confirmed" | "no_show" | "rebooked_elsewhere";
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScenarioContext {
|
||||||
|
spec: ScenarioSpec;
|
||||||
|
out_dir: string;
|
||||||
|
roster: RosterEntry[];
|
||||||
|
results: EventResult[];
|
||||||
|
gap_signals: Array<{ event: string; category: string; detail: string }>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================== Default scenario ===================
|
||||||
|
|
||||||
|
const DEFAULT_SCENARIO: ScenarioSpec = {
|
||||||
|
client: "Riverfront Steel",
|
||||||
|
date: "2026-04-21",
|
||||||
|
events: [
|
||||||
|
{
|
||||||
|
kind: "baseline_fill",
|
||||||
|
at: "08:00",
|
||||||
|
role: "Warehouse Associate",
|
||||||
|
count: 3,
|
||||||
|
city: "Toledo",
|
||||||
|
state: "OH",
|
||||||
|
shift_start: "08:00 AM",
|
||||||
|
scenario_note: "Regular Monday morning shift, 8-hour.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
kind: "recurring",
|
||||||
|
at: "10:30",
|
||||||
|
role: "Machine Operator",
|
||||||
|
count: 2,
|
||||||
|
city: "Toledo",
|
||||||
|
state: "OH",
|
||||||
|
shift_start: "11:00 AM",
|
||||||
|
scenario_note: "Recurring Tuesday/Thursday slot — prior workers may still be available.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
kind: "expansion",
|
||||||
|
at: "12:15",
|
||||||
|
role: "Forklift Operator",
|
||||||
|
count: 5,
|
||||||
|
city: "Toledo",
|
||||||
|
state: "OH",
|
||||||
|
shift_start: "01:00 PM",
|
||||||
|
scenario_note: "New warehouse location opening, five-worker team needed.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
kind: "emergency",
|
||||||
|
at: "14:00",
|
||||||
|
role: "Loader",
|
||||||
|
count: 4,
|
||||||
|
city: "Toledo",
|
||||||
|
state: "OH",
|
||||||
|
shift_start: "04:00 PM same day",
|
||||||
|
deadline: "16:00",
|
||||||
|
scenario_note: "Walkoff incident — replacement crew needed by 16:00 sharp.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
kind: "misplacement",
|
||||||
|
at: "15:45",
|
||||||
|
role: "Warehouse Associate",
|
||||||
|
count: 1,
|
||||||
|
city: "Toledo",
|
||||||
|
state: "OH",
|
||||||
|
shift_start: "remainder of 08:00 shift",
|
||||||
|
scenario_note: "One worker from the 08:00 fill didn't show; rebuild the gap.",
|
||||||
|
replaces_event: "08:00",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
// =================== Low-level helpers shared across events ===================
|
||||||
|
|
||||||
|
async function httpJson<T>(url: string, body?: any): Promise<T> {
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method: body ? "POST" : "GET",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: body ? JSON.stringify(body) : undefined,
|
||||||
|
});
|
||||||
|
if (!res.ok) throw new Error(`${res.status} ${await res.text()}`);
|
||||||
|
return (await res.json()) as T;
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmt(e: LogEntry): string {
|
||||||
|
const tag = ` [t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
|
||||||
|
const c = e.content ?? {};
|
||||||
|
const trim = (s: any, n: number) => String(s ?? "").slice(0, n);
|
||||||
|
if (e.kind === "tool_call") return `${tag} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 60)}) — ${trim(c.rationale, 40)}`;
|
||||||
|
if (e.kind === "tool_result") {
|
||||||
|
if (c.error) return `${tag} ERROR ${c.error}`;
|
||||||
|
const rows = c?.rows?.length ?? c?.sources?.length ?? undefined;
|
||||||
|
return `${tag} ${rows !== undefined ? `rows=${rows}` : JSON.stringify(c).slice(0, 60)}`;
|
||||||
|
}
|
||||||
|
if (e.kind === "critique") return `${tag} verdict=${c.verdict} — ${trim(c.notes, 50)}`;
|
||||||
|
if (e.kind === "propose_done") return `${tag} ${c.fills?.length ?? 0} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
|
||||||
|
if (e.kind === "consensus_done") return `${tag} ✓`;
|
||||||
|
if (e.kind === "plan") return `${tag} ${c.steps?.length ?? 0} steps`;
|
||||||
|
if (e.kind === "error") return `${tag} ${c.message ?? c}`;
|
||||||
|
return `${tag} ${JSON.stringify(c).slice(0, 70)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
|
||||||
|
if (name === "hybrid_search") {
|
||||||
|
const { sql_filter, question, index_name, k } = args;
|
||||||
|
if (!sql_filter || !question || !index_name) {
|
||||||
|
throw new Error(`hybrid_search needs sql_filter + question + index_name, got ${JSON.stringify(args)}`);
|
||||||
|
}
|
||||||
|
// Every fill event uses the playbook_memory boost — that's the point
|
||||||
|
// of the run-as-a-whole: earlier events seed later ones.
|
||||||
|
return httpJson(`${GATEWAY}/vectors/hybrid`, {
|
||||||
|
sql_filter, question, index_name,
|
||||||
|
top_k: k ?? 10, generate: false,
|
||||||
|
use_playbook_memory: true,
|
||||||
|
playbook_memory_k: 10,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (name === "sql") {
|
||||||
|
const { query } = args;
|
||||||
|
if (!query || typeof query !== "string") throw new Error(`sql needs query string`);
|
||||||
|
if (!/^\s*SELECT/i.test(query)) throw new Error(`sql allows SELECT only`);
|
||||||
|
return sqlQuery(query);
|
||||||
|
}
|
||||||
|
return callTool(name, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================== Core fill loop — one event, one consensus ===================
|
||||||
|
|
||||||
|
interface AgentFillOutcome {
|
||||||
|
fills: Fill[];
|
||||||
|
approach: string;
|
||||||
|
turns: number;
|
||||||
|
duration_secs: number;
|
||||||
|
log: LogEntry[];
|
||||||
|
first_sql_matches?: number;
|
||||||
|
first_pool_first_score?: number;
|
||||||
|
first_pool_last_score?: number;
|
||||||
|
playbook_citations: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runAgentFill(
|
||||||
|
task: TaskSpec,
|
||||||
|
extra_guidance: string,
|
||||||
|
exclude_worker_ids: string[],
|
||||||
|
): Promise<AgentFillOutcome> {
|
||||||
|
const t0 = Date.now();
|
||||||
|
const log: LogEntry[] = [];
|
||||||
|
let turn = 0;
|
||||||
|
let consecutiveDrifts = 0;
|
||||||
|
let sealed: { fills: Fill[]; approach: string } | null = null;
|
||||||
|
let first_sql_matches: number | undefined;
|
||||||
|
let first_pool_first: number | undefined;
|
||||||
|
let first_pool_last: number | undefined;
|
||||||
|
const playbook_citations = new Set<string>();
|
||||||
|
|
||||||
|
const append = (e: Omit<LogEntry, "at">): LogEntry => {
|
||||||
|
const full: LogEntry = { ...e, at: new Date().toISOString() };
|
||||||
|
log.push(full);
|
||||||
|
console.log(fmt(full));
|
||||||
|
return full;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Build executor prompt with the scenario-specific guidance + exclusions
|
||||||
|
// injected as an extra block. Reuses the base prompt so drift detection
|
||||||
|
// and output-shape rules are unchanged.
|
||||||
|
const withExtras = (base: string): string => {
|
||||||
|
let addon = "";
|
||||||
|
if (extra_guidance) addon += `\n\nEVENT-SPECIFIC GUIDANCE:\n${extra_guidance}`;
|
||||||
|
if (exclude_worker_ids.length > 0) {
|
||||||
|
addon += `\n\nEXCLUDE these workers (already booked / unavailable today): ${exclude_worker_ids.join(", ")}\nIf your tool results include them, skip them — never propose them.`;
|
||||||
|
}
|
||||||
|
return base + addon;
|
||||||
|
};
|
||||||
|
|
||||||
|
while (turn < MAX_TURNS && !sealed) {
|
||||||
|
turn += 1;
|
||||||
|
|
||||||
|
const execRaw = await generate(
|
||||||
|
EXECUTOR_MODEL,
|
||||||
|
withExtras(executorPrompt(task, log)),
|
||||||
|
{ temperature: 0.2, max_tokens: 600 },
|
||||||
|
);
|
||||||
|
let execAction: Action;
|
||||||
|
try {
|
||||||
|
execAction = parseAction(execRaw, "executor");
|
||||||
|
} catch (e) {
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "error",
|
||||||
|
content: { message: (e as Error).message, raw: execRaw.slice(0, 300) } });
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL,
|
||||||
|
kind: execAction.kind as any, content: execAction });
|
||||||
|
|
||||||
|
if (execAction.kind === "tool_call") {
|
||||||
|
try {
|
||||||
|
const result = await executeToolCall(execAction.tool, execAction.args);
|
||||||
|
// Filter tool results to enforce the exclusion list — defense in
|
||||||
|
// depth since the prompt alone isn't enough for weak models.
|
||||||
|
const filtered = maskExclusions(result, exclude_worker_ids);
|
||||||
|
// Capture the first hybrid_search pool stats for gap detection.
|
||||||
|
if (execAction.tool === "hybrid_search" && first_sql_matches === undefined) {
|
||||||
|
first_sql_matches = (filtered as any).sql_matches;
|
||||||
|
const sources = (filtered as any).sources ?? [];
|
||||||
|
if (sources.length > 0) {
|
||||||
|
first_pool_first = sources[0].score;
|
||||||
|
first_pool_last = sources[sources.length - 1].score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const trimmed = trimResult(filtered);
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL,
|
||||||
|
kind: "tool_result", content: trimmed });
|
||||||
|
|
||||||
|
// Accumulate playbook citations from any hybrid result that
|
||||||
|
// carried them — the scenario-level report needs them.
|
||||||
|
if (Array.isArray((filtered as any).sources)) {
|
||||||
|
for (const s of (filtered as any).sources) {
|
||||||
|
for (const c of s.playbook_citations ?? []) {
|
||||||
|
playbook_citations.add(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
|
||||||
|
content: { error: (e as Error).message, tool: execAction.tool } });
|
||||||
|
consecutiveDrifts += 1;
|
||||||
|
if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
|
||||||
|
throw new Error(`aborted — ${MAX_CONSECUTIVE_DRIFTS} consecutive tool errors`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const revRaw = await generate(
|
||||||
|
REVIEWER_MODEL,
|
||||||
|
withExtras(reviewerPrompt(task, log)),
|
||||||
|
{ temperature: 0.1, max_tokens: 400 },
|
||||||
|
);
|
||||||
|
let revAction: Action;
|
||||||
|
try {
|
||||||
|
revAction = parseAction(revRaw, "reviewer");
|
||||||
|
} catch (e) {
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "error",
|
||||||
|
content: { message: (e as Error).message, raw: revRaw.slice(0, 300) } });
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL,
|
||||||
|
kind: "critique", content: revAction });
|
||||||
|
|
||||||
|
if (revAction.kind !== "critique") throw new Error(`reviewer emitted non-critique: ${revAction.kind}`);
|
||||||
|
|
||||||
|
if (revAction.verdict === "drift") {
|
||||||
|
consecutiveDrifts += 1;
|
||||||
|
if (consecutiveDrifts >= MAX_CONSECUTIVE_DRIFTS) {
|
||||||
|
throw new Error(`aborted — ${MAX_CONSECUTIVE_DRIFTS} consecutive drift flags`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
consecutiveDrifts = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
|
||||||
|
if (execAction.fills.length !== task.target_count) {
|
||||||
|
throw new Error(`consensus malformed — ${execAction.fills.length} fills vs target ${task.target_count}`);
|
||||||
|
}
|
||||||
|
// Enforce exclusion at seal time too, in case the models ignored
|
||||||
|
// both prompt + tool-result filtering.
|
||||||
|
for (const f of execAction.fills) {
|
||||||
|
if (exclude_worker_ids.includes(f.candidate_id)) {
|
||||||
|
throw new Error(`consensus proposed excluded worker ${f.candidate_id}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done",
|
||||||
|
content: { fills: execAction.fills } });
|
||||||
|
sealed = { fills: execAction.fills, approach: execAction.rationale ?? "multi-agent hybrid" };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
fills: sealed.fills,
|
||||||
|
approach: sealed.approach,
|
||||||
|
turns: turn,
|
||||||
|
duration_secs: (Date.now() - t0) / 1000,
|
||||||
|
log,
|
||||||
|
first_sql_matches,
|
||||||
|
first_pool_first_score: first_pool_first,
|
||||||
|
first_pool_last_score: first_pool_last,
|
||||||
|
playbook_citations: Array.from(playbook_citations),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function maskExclusions(result: any, exclude: string[]): any {
|
||||||
|
if (exclude.length === 0) return result;
|
||||||
|
if (Array.isArray(result.sources)) {
|
||||||
|
return { ...result, sources: result.sources.filter((s: any) => !exclude.includes(s.doc_id)) };
|
||||||
|
}
|
||||||
|
if (Array.isArray(result.rows)) {
|
||||||
|
return { ...result, rows: result.rows.filter((r: any) => {
|
||||||
|
const id = r.worker_id ?? r.doc_id;
|
||||||
|
return id === undefined || !exclude.includes(String(id));
|
||||||
|
}) };
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
function trimResult(r: any): any {
|
||||||
|
if (r && Array.isArray(r.sources)) {
|
||||||
|
return { ...r, sources: r.sources.slice(0, 20), _trimmed: r.sources.length > 20 ? `${r.sources.length - 20} more` : undefined };
|
||||||
|
}
|
||||||
|
if (r && Array.isArray(r.rows)) {
|
||||||
|
return { ...r, rows: r.rows.slice(0, 20), _trimmed: r.rows.length > 20 ? `${r.rows.length - 20} more` : undefined };
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================== Per-event guidance strings ===================
|
||||||
|
|
||||||
|
function guidanceFor(event: FillEvent, ctx: ScenarioContext): string {
|
||||||
|
switch (event.kind) {
|
||||||
|
case "baseline_fill":
|
||||||
|
return `Standard Monday fill. Client ${ctx.spec.client}. Shift starts ${event.shift_start ?? "at start time"}. Take the top candidates by semantic match and availability.`;
|
||||||
|
case "recurring":
|
||||||
|
return `RECURRING slot — ${ctx.spec.client} runs this shift every Tues/Thurs. If playbook_memory surfaces candidates endorsed by past similar fills (you'll see 'cites' on hybrid sources), those are the preferred workers. Shift starts ${event.shift_start ?? "at start time"}.`;
|
||||||
|
case "expansion":
|
||||||
|
return `EXPANSION at ${ctx.spec.client}. New location, ${event.count}-worker team needed at once — search broadly and prefer workers with team/collaboration signals (engagement, communications scores). Shift starts ${event.shift_start ?? "at start time"}.`;
|
||||||
|
case "emergency":
|
||||||
|
return `EMERGENCY walkoff — ${ctx.spec.client} needs ${event.count} ${event.role}s BY ${event.deadline ?? "end of day"}. Prioritize availability over perfect skill match. A good-enough worker who can report today beats a perfect worker who can't.`;
|
||||||
|
case "misplacement":
|
||||||
|
return `MISPLACEMENT refill. A worker from the 08:00 shift no-showed. You must replace them WITHOUT proposing the same worker or anyone already booked today (see EXCLUDE list). Shift is ${event.shift_start ?? "in progress"} so speed matters.`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================== Artifact generation ===================
|
||||||
|
|
||||||
|
interface ArtifactBundle {
|
||||||
|
sms: string;
|
||||||
|
email: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// One Ollama call per event for SMS (to the filled workers) + one for
|
||||||
|
// the client email. Short outputs, low temperature — these are drafts,
|
||||||
|
// not creative writing.
|
||||||
|
async function generateArtifacts(event: FillEvent, outcome: AgentFillOutcome, ctx: ScenarioContext): Promise<ArtifactBundle> {
|
||||||
|
const smsPrompt = `Generate short, friendly, professional SMS messages to confirm a shift for each worker. ONE message per worker. Format as:
|
||||||
|
|
||||||
|
TO: {Name}
|
||||||
|
{message body under 180 chars}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Details:
|
||||||
|
- Client: ${ctx.spec.client}
|
||||||
|
- Role: ${event.role}
|
||||||
|
- Location: ${event.city}, ${event.state}
|
||||||
|
- Shift starts: ${event.shift_start ?? "TBD"}
|
||||||
|
- Scenario: ${event.scenario_note ?? ""}
|
||||||
|
|
||||||
|
Workers to message:
|
||||||
|
${outcome.fills.map(f => `- ${f.name} (id ${f.candidate_id})`).join("\n")}
|
||||||
|
|
||||||
|
Respond with only the message blocks, separated by "---". No commentary.`;
|
||||||
|
|
||||||
|
const emailPrompt = `Generate a short professional email confirmation to the staffing client.
|
||||||
|
|
||||||
|
TO: staffing@${ctx.spec.client.toLowerCase().replace(/ /g, "")}.example
|
||||||
|
FROM: dispatch@lakehouse.example
|
||||||
|
SUBJECT: (3-word subject)
|
||||||
|
|
||||||
|
Body (4-6 lines max). Be specific about:
|
||||||
|
- Number of workers filled (${outcome.fills.length} of ${event.count})
|
||||||
|
- Roles: ${event.role}
|
||||||
|
- Names filled
|
||||||
|
- Shift start: ${event.shift_start ?? "TBD"}
|
||||||
|
- Any scenario flag: ${event.scenario_note ?? "(none)"}
|
||||||
|
|
||||||
|
Workers:
|
||||||
|
${outcome.fills.map(f => `- ${f.name} (${f.reason.slice(0, 60)})`).join("\n")}
|
||||||
|
|
||||||
|
Respond with only the email. No commentary.`;
|
||||||
|
|
||||||
|
const [sms, email] = await Promise.all([
|
||||||
|
generate(DRAFT_MODEL, smsPrompt, { temperature: 0.3, max_tokens: 500 }),
|
||||||
|
generate(DRAFT_MODEL, emailPrompt, { temperature: 0.3, max_tokens: 400 }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return { sms: sms.trim(), email: email.trim() };
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================== Per-event runner ===================
|
||||||
|
|
||||||
|
async function runEvent(event: FillEvent, ctx: ScenarioContext): Promise<EventResult> {
|
||||||
|
console.log(`\n════════ ${event.at} — ${event.kind.toUpperCase()}: fill ${event.count}× ${event.role} in ${event.city}, ${event.state} ════════`);
|
||||||
|
|
||||||
|
const t0 = Date.now();
|
||||||
|
|
||||||
|
// Build the task spec the agent loop expects.
|
||||||
|
const task: TaskSpec = {
|
||||||
|
id: `${ctx.spec.date}-${event.at.replace(":", "")}-${event.kind}`,
|
||||||
|
operation: `fill: ${event.role} x${event.count} in ${event.city}, ${event.state}`,
|
||||||
|
target_role: event.role,
|
||||||
|
target_count: event.count,
|
||||||
|
target_city: event.city,
|
||||||
|
target_state: event.state,
|
||||||
|
approach_hint: `hybrid search against ${WORKERS_INDEX} for ${event.kind}`,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Exclusion set: everyone already in today's roster + any explicit
|
||||||
|
// exclusions from the event spec.
|
||||||
|
const excludeIds = [
|
||||||
|
...ctx.roster
|
||||||
|
.filter(r => r.status === "confirmed")
|
||||||
|
.map(r => r.worker_id),
|
||||||
|
...(event.exclude_worker_ids ?? []),
|
||||||
|
];
|
||||||
|
|
||||||
|
const gap_signals: string[] = [];
|
||||||
|
let outcome: AgentFillOutcome;
|
||||||
|
try {
|
||||||
|
outcome = await runAgentFill(task, guidanceFor(event, ctx), excludeIds);
|
||||||
|
} catch (e) {
|
||||||
|
return {
|
||||||
|
event,
|
||||||
|
ok: false,
|
||||||
|
fills: [],
|
||||||
|
turns: 0,
|
||||||
|
duration_secs: (Date.now() - t0) / 1000,
|
||||||
|
error: (e as Error).message,
|
||||||
|
gap_signals: [`drift_or_tool: ${(e as Error).message}`],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve worker_ids via SQL so the roster has stable IDs (models
|
||||||
|
// sometimes return names-only). Best-effort — if name lookup finds
|
||||||
|
// zero or many matches, we flag a gap.
|
||||||
|
const resolved = await resolveWorkerIds(outcome.fills, event);
|
||||||
|
|
||||||
|
// Roster double-book check.
|
||||||
|
for (const r of resolved) {
|
||||||
|
const conflict = ctx.roster.find(e => e.worker_id === r.worker_id && e.status === "confirmed");
|
||||||
|
if (conflict) {
|
||||||
|
gap_signals.push(`double_book: ${r.worker_id} ${r.name} already booked for ${conflict.booked_for}`);
|
||||||
|
}
|
||||||
|
ctx.roster.push({
|
||||||
|
worker_id: r.worker_id,
|
||||||
|
name: r.name,
|
||||||
|
booked_for: event.at,
|
||||||
|
role: event.role,
|
||||||
|
city: event.city,
|
||||||
|
state: event.state,
|
||||||
|
status: "confirmed",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pool-size signal (Gap 1 — supply).
|
||||||
|
const supply_threshold = event.count * 3;
|
||||||
|
if ((outcome.first_sql_matches ?? 0) < supply_threshold) {
|
||||||
|
gap_signals.push(
|
||||||
|
`supply: only ${outcome.first_sql_matches} candidates for ${event.count}× ${event.role} in ${event.city} (< ${supply_threshold}, our 3× comfort margin)`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Score-spread signal (Gap 2 — embedding).
|
||||||
|
const spread = (outcome.first_pool_first_score ?? 0) - (outcome.first_pool_last_score ?? 0);
|
||||||
|
if (spread > 0 && spread < 0.02) {
|
||||||
|
gap_signals.push(
|
||||||
|
`embedding: top-K score spread ${spread.toFixed(3)} < 0.02 — model struggles to differentiate`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate artifacts (SMS + email) — fail-soft; artifacts are cosmetic
|
||||||
|
// relative to the consensus itself.
|
||||||
|
let bundle: ArtifactBundle | null = null;
|
||||||
|
try {
|
||||||
|
bundle = await generateArtifacts(event, { ...outcome, fills: resolved }, ctx);
|
||||||
|
await appendFile(join(ctx.out_dir, "sms.md"),
|
||||||
|
`\n## ${event.at} ${event.kind} — ${event.role} x${event.count} in ${event.city}, ${event.state}\n\n${bundle.sms}\n`);
|
||||||
|
await appendFile(join(ctx.out_dir, "emails.md"),
|
||||||
|
`\n## ${event.at} ${event.kind} — ${event.role} x${event.count}\n\n${bundle.email}\n`);
|
||||||
|
} catch (e) {
|
||||||
|
gap_signals.push(`artifact: ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dispatch log (structured).
|
||||||
|
await appendFile(join(ctx.out_dir, "dispatch.jsonl"),
|
||||||
|
JSON.stringify({
|
||||||
|
at: event.at,
|
||||||
|
kind: event.kind,
|
||||||
|
operation: task.operation,
|
||||||
|
fills: resolved,
|
||||||
|
turns: outcome.turns,
|
||||||
|
duration_secs: outcome.duration_secs,
|
||||||
|
pool_size: outcome.first_sql_matches,
|
||||||
|
playbook_citations: outcome.playbook_citations,
|
||||||
|
}) + "\n");
|
||||||
|
|
||||||
|
// Always seed playbook_memory after a sealed fill — keep the learning
|
||||||
|
// loop tight across the whole day so recurring/misplacement events
|
||||||
|
// later in the run benefit from earlier ones.
|
||||||
|
try {
|
||||||
|
await httpJson(`${GATEWAY}/vectors/playbook_memory/seed`, {
|
||||||
|
operation: task.operation,
|
||||||
|
approach: outcome.approach || `${event.kind} → hybrid search`,
|
||||||
|
context: `client=${ctx.spec.client} scenario=${event.kind} shift=${event.shift_start ?? "tbd"}`,
|
||||||
|
endorsed_names: resolved.map(r => r.name),
|
||||||
|
append: true,
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
gap_signals.push(`write_through: ${(e as Error).message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
event,
|
||||||
|
ok: true,
|
||||||
|
fills: outcome.fills,
|
||||||
|
turns: outcome.turns,
|
||||||
|
duration_secs: outcome.duration_secs,
|
||||||
|
gap_signals,
|
||||||
|
sources_first_score: outcome.first_pool_first_score,
|
||||||
|
sources_last_score: outcome.first_pool_last_score,
|
||||||
|
pool_size: outcome.first_sql_matches,
|
||||||
|
playbook_citations: outcome.playbook_citations,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================== Worker ID resolution ===================
|
||||||
|
|
||||||
|
// Models emit candidate_ids or names in propose_done. Some return the
|
||||||
|
// W500K-XXX doc_id, others just the name, others a random tag. Resolve
|
||||||
|
// to canonical (worker_id, name) via SQL so the roster is reliable.
|
||||||
|
async function resolveWorkerIds(fills: Fill[], event: FillEvent): Promise<Fill[]> {
|
||||||
|
const resolved: Fill[] = [];
|
||||||
|
for (const f of fills) {
|
||||||
|
// Case 1: candidate_id looks like W500K-NNN — accept as-is.
|
||||||
|
if (/^W500K-\d+$/.test(f.candidate_id)) {
|
||||||
|
resolved.push(f);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Case 2: candidate_id is a bare integer — promote to W500K-N.
|
||||||
|
if (/^\d+$/.test(f.candidate_id)) {
|
||||||
|
resolved.push({ ...f, candidate_id: `W500K-${f.candidate_id}` });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Case 3: look up by (name, city, state). Take the first match.
|
||||||
|
const q = `SELECT worker_id FROM ${WORKERS_DATASET} WHERE name = '${f.name.replace(/'/g, "''")}' AND city = '${event.city.replace(/'/g, "''")}' AND state = '${event.state.replace(/'/g, "''")}' LIMIT 1`;
|
||||||
|
try {
|
||||||
|
const r = await sqlQuery(q);
|
||||||
|
if (r.rows && r.rows.length > 0) {
|
||||||
|
resolved.push({ ...f, candidate_id: `W500K-${r.rows[0].worker_id}` });
|
||||||
|
} else {
|
||||||
|
// No match — keep the fill but leave candidate_id as-is; the
|
||||||
|
// gap report will flag it.
|
||||||
|
resolved.push(f);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
resolved.push(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return resolved;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================== EOD gap report ===================
|
||||||
|
|
||||||
|
async function writeRetrospective(ctx: ScenarioContext): Promise<void> {
|
||||||
|
const lines: string[] = [];
|
||||||
|
lines.push(`# Scenario retrospective — ${ctx.spec.client}, ${ctx.spec.date}`);
|
||||||
|
lines.push("");
|
||||||
|
lines.push(`Executor: \`${EXECUTOR_MODEL}\` Reviewer: \`${REVIEWER_MODEL}\` Draft: \`${DRAFT_MODEL}\``);
|
||||||
|
lines.push("");
|
||||||
|
|
||||||
|
// --- Per-event summary ---
|
||||||
|
lines.push("## Events");
|
||||||
|
lines.push("");
|
||||||
|
lines.push("| At | Kind | Role / Count | Pool | Fills | Turns | Dur(s) | Cites | Gaps |");
|
||||||
|
lines.push("|---|---|---|---|---|---|---|---|---|");
|
||||||
|
for (const r of ctx.results) {
|
||||||
|
const status = r.ok ? "✓" : "✗";
|
||||||
|
lines.push(
|
||||||
|
`| ${r.event.at} | ${r.event.kind} | ${r.event.role} × ${r.event.count} | ${r.pool_size ?? "-"} | ${status} ${r.fills.length} | ${r.turns} | ${r.duration_secs.toFixed(1)} | ${r.playbook_citations?.length ?? 0} | ${r.gap_signals.length} |`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
lines.push("");
|
||||||
|
|
||||||
|
// --- Roster ---
|
||||||
|
lines.push("## Final roster");
|
||||||
|
lines.push("");
|
||||||
|
lines.push("| Worker | Booked | Role | City, ST | Status |");
|
||||||
|
lines.push("|---|---|---|---|---|");
|
||||||
|
for (const e of ctx.roster) {
|
||||||
|
lines.push(`| ${e.worker_id} ${e.name} | ${e.booked_for} | ${e.role} | ${e.city}, ${e.state} | ${e.status} |`);
|
||||||
|
}
|
||||||
|
lines.push("");
|
||||||
|
|
||||||
|
// --- Gap analysis by category ---
|
||||||
|
const bycat: Record<string, string[]> = {};
|
||||||
|
for (const g of ctx.gap_signals) {
|
||||||
|
if (!bycat[g.category]) bycat[g.category] = [];
|
||||||
|
bycat[g.category].push(`**${g.event}** — ${g.detail}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add cross-event categories computed here:
|
||||||
|
// Gap 3 — fairness (Gini-lite on roster)
|
||||||
|
const bookedIds = ctx.roster.filter(r => r.status === "confirmed").map(r => r.worker_id);
|
||||||
|
const counts = new Map<string, number>();
|
||||||
|
for (const id of bookedIds) counts.set(id, (counts.get(id) ?? 0) + 1);
|
||||||
|
const multis = [...counts.entries()].filter(([_, n]) => n > 1);
|
||||||
|
if (multis.length > 0) {
|
||||||
|
bycat["fairness"] = bycat["fairness"] ?? [];
|
||||||
|
for (const [id, n] of multis) {
|
||||||
|
const name = ctx.roster.find(r => r.worker_id === id)?.name ?? id;
|
||||||
|
bycat["fairness"].push(`_cross-event_ — ${name} (${id}) booked ${n} times today`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gap 5 — tool errors already captured per-event via gap_signals.
|
||||||
|
|
||||||
|
// Gap 6 — write-through coverage: compare # events vs # new playbook_memory entries.
|
||||||
|
try {
|
||||||
|
const stats = await httpJson<any>(`${GATEWAY}/vectors/playbook_memory/stats`);
|
||||||
|
bycat["write_through_audit"] = bycat["write_through_audit"] ?? [];
|
||||||
|
bycat["write_through_audit"].push(`_post-run_ — playbook_memory has ${stats.entries} entries (ran ${ctx.results.length} events, expected ≥ ${ctx.results.filter(r => r.ok).length} new entries from this run)`);
|
||||||
|
} catch { /* non-fatal */ }
|
||||||
|
|
||||||
|
lines.push("## Gap signals");
|
||||||
|
lines.push("");
|
||||||
|
if (Object.keys(bycat).length === 0) {
|
||||||
|
lines.push("_None surfaced — either everything worked or detection is under-tuned._");
|
||||||
|
} else {
|
||||||
|
for (const [cat, items] of Object.entries(bycat)) {
|
||||||
|
lines.push(`### ${cat}`);
|
||||||
|
for (const item of items) lines.push(`- ${item}`);
|
||||||
|
lines.push("");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Narrative summary ---
|
||||||
|
lines.push("## Narrative");
|
||||||
|
lines.push("");
|
||||||
|
lines.push(`- ${ctx.results.filter(r => r.ok).length}/${ctx.results.length} events reached consensus.`);
|
||||||
|
lines.push(`- Final roster: ${ctx.roster.length} bookings across ${new Set(ctx.roster.map(r => r.worker_id)).size} distinct workers.`);
|
||||||
|
const totalCites = ctx.results.reduce((a, r) => a + (r.playbook_citations?.length ?? 0), 0);
|
||||||
|
lines.push(`- Playbook citations across the day: ${totalCites} (proof the feedback loop fired across events).`);
|
||||||
|
const droppedEvents = ctx.results.filter(r => !r.ok);
|
||||||
|
if (droppedEvents.length > 0) {
|
||||||
|
lines.push(`- Dropped events: ${droppedEvents.map(r => r.event.at + " " + r.event.kind).join(", ")}.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await writeFile(join(ctx.out_dir, "report.md"), lines.join("\n"));
|
||||||
|
console.log(`\n✓ report → ${join(ctx.out_dir, "report.md")}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================== Main driver ===================
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const specPath = process.argv[2];
|
||||||
|
const spec: ScenarioSpec = specPath
|
||||||
|
? JSON.parse(await Bun.file(specPath).text())
|
||||||
|
: DEFAULT_SCENARIO;
|
||||||
|
|
||||||
|
const stamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
|
||||||
|
const out_dir = join("tests/multi-agent/playbooks", `scenario-${stamp}`);
|
||||||
|
await mkdir(out_dir, { recursive: true });
|
||||||
|
|
||||||
|
const ctx: ScenarioContext = {
|
||||||
|
spec,
|
||||||
|
out_dir,
|
||||||
|
roster: [],
|
||||||
|
results: [],
|
||||||
|
gap_signals: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Initialize output files
|
||||||
|
await writeFile(join(out_dir, "sms.md"), `# SMS drafts — ${spec.client}, ${spec.date}\n`);
|
||||||
|
await writeFile(join(out_dir, "emails.md"), `# Client emails — ${spec.client}, ${spec.date}\n`);
|
||||||
|
await writeFile(join(out_dir, "dispatch.jsonl"), "");
|
||||||
|
|
||||||
|
console.log(`▶ scenario: ${spec.client}, ${spec.date}, ${spec.events.length} events`);
|
||||||
|
console.log(`▶ out: ${out_dir}\n`);
|
||||||
|
|
||||||
|
for (const event of spec.events) {
|
||||||
|
// Expand misplacement-style exclusions from the current roster: it
|
||||||
|
// wants to replace a worker from a prior event, so grab everyone
|
||||||
|
// booked at that at-label and add as exclusions.
|
||||||
|
if (event.kind === "misplacement" && event.replaces_event) {
|
||||||
|
const priorBooked = ctx.roster
|
||||||
|
.filter(r => r.booked_for === event.replaces_event && r.status === "confirmed")
|
||||||
|
.map(r => r.worker_id);
|
||||||
|
if (priorBooked.length > 0) {
|
||||||
|
// Pick one arbitrarily to mark as no_show — in a real system the
|
||||||
|
// external signal would pick. For the test, first one works.
|
||||||
|
const lost = priorBooked[0];
|
||||||
|
const lostEntry = ctx.roster.find(r => r.worker_id === lost);
|
||||||
|
if (lostEntry) {
|
||||||
|
lostEntry.status = "no_show";
|
||||||
|
console.log(` (misplacement: marking ${lost} ${lostEntry.name} as no-show)`);
|
||||||
|
}
|
||||||
|
// Exclude all prior bookings so the refill doesn't pick anyone
|
||||||
|
// already scheduled for today.
|
||||||
|
event.exclude_worker_ids = priorBooked;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await runEvent(event, ctx);
|
||||||
|
ctx.results.push(result);
|
||||||
|
for (const s of result.gap_signals) {
|
||||||
|
const [category, ...rest] = s.split(":");
|
||||||
|
ctx.gap_signals.push({ event: event.at, category: category.trim(), detail: rest.join(":").trim() });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small breather to not hammer Ollama on back-to-back runs.
|
||||||
|
await new Promise(r => setTimeout(r, 500));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Persist structured state for forensics.
|
||||||
|
await writeFile(join(out_dir, "roster.json"), JSON.stringify(ctx.roster, null, 2));
|
||||||
|
await writeFile(join(out_dir, "results.json"), JSON.stringify(ctx.results, null, 2));
|
||||||
|
|
||||||
|
await writeRetrospective(ctx);
|
||||||
|
|
||||||
|
const okCount = ctx.results.filter(r => r.ok).length;
|
||||||
|
if (okCount < ctx.results.length) {
|
||||||
|
console.log(`\n⚠ ${okCount}/${ctx.results.length} events succeeded. See ${out_dir}/report.md for gaps.`);
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
console.log(`\n✓ ${okCount}/${ctx.results.length} events succeeded. See ${out_dir}/report.md.`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => {
|
||||||
|
console.error(`\n✗ scenario driver crashed: ${(e as Error).message}`);
|
||||||
|
console.error((e as Error).stack);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
Loading…
x
Reference in New Issue
Block a user