root 25b7e6c3a7 Phase 19 wiring + Path 1/2 work + chain integrity fixes
Backend:
- crates/vectord/src/playbook_memory.rs (new): Phase 19 in-memory boost
  store with seed/rebuild/snapshot, plus temporal decay (e^-age/30 per
  playbook), persist_to_sql endpoint backing successful_playbooks_live,
  and discover_patterns endpoint for meta-index pattern aggregation
  (recurring certs/skills/archetype/reliability across similar past fills).
- DEFAULT_TOP_K_PLAYBOOKS bumped 5 → 25; old default silently missed
  most boosts when memory had > 25 entries.
- service.rs: new routes /vectors/playbook_memory/{seed,rebuild,stats,
  persist_sql,patterns}.

Bun staffing co-pilot (mcp-server/):
- /search, /match, /verify, /proof, /simulation/run, MCP tools all
  forward use_playbook_memory:true and playbook_memory_k:25 to the
  hybrid endpoint. Boost was previously dark across the entire app.
- /log no longer POSTs to /ingest/file — that endpoint REPLACES the
  dataset's object list, so single-row CSV writes were wiping all prior
  rows in successful_playbooks (sp_rows went 33→1 in one /log call).
  /log now seeds playbook_memory with canonical short text and calls
  /persist_sql to keep successful_playbooks_live in sync.
- /simulation/run cumulative end-of-week CSV write removed for the same
  reason. Per-day per-contract /seed (added in this session) is the
  accumulating feedback path now.
- search.html addWorkerInsight renders a green "Endorsed · N playbooks"
  chip with playbook citations when boost > 0.

Internal Dioxus UI (crates/ui/):
- Dashboard phase list rewritten through Phase 19 (was stuck at "Phase
  16: File Watcher" / "Phase 17: DB Connector" — both wrong).
- Removed fabricated "27ms" stat label.
- Ask tab examples + SQL default replaced with real staffing prompts
  against candidates/clients/job_orders (was referencing nonexistent
  employees/products/events).
- New Playbook tab exposes /vectors/playbook_memory/{stats,rebuild} and
  side-by-side hybrid search (boost OFF vs ON) with citations.

Tests (tests/multi-agent/):
- run_e2e_rated.ts: parallel two-agent (mistral + qwen2.5) build phase
  + verifier rating (geo, auth, persist, boost, speed → /10).
- network_proving.ts: continuous build → verify → repeat with
  staffing-recruiter profile hot-swap; geo-discrimination check.
- chain_of_custody.ts: single recruiter operation traced through every
  layer (Bun /search, direct /vectors/hybrid parity, /log, SQL,
  playbook_memory growth, profile activation, post-op boost lift).
2026-04-20 06:21:13 -05:00

352 lines
15 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Shared runtime for one agent. An agent is a role (executor or reviewer),
// a model name, and a conversation the orchestrator hands it. The agent
// produces ONE structured Action per turn; the orchestrator applies tool
// calls and feeds results back.
//
// Fail-fast: every HTTP error, parse error, and Ollama error throws. The
// orchestrator catches at the top and exits non-zero with the full log.
export const GATEWAY = "http://localhost:3100";
export const SIDECAR = "http://localhost:3200";
// --- Shared types ---
export type Role = "executor" | "reviewer";
export interface TaskSpec {
id: string;
operation: string; // "fill: Welder x2 in Columbus, OH"
target_role: string; // "Welder"
target_count: number; // 2
target_city: string; // "Columbus"
target_state: string; // "OH"
approach_hint?: string; // e.g. "hybrid search"; agent is free to ignore
}
export interface LogEntry {
turn: number;
role: Role;
model: string;
at: string;
kind:
| "plan"
| "tool_call"
| "tool_result"
| "critique"
| "propose_done"
| "consensus_done"
| "error";
content: any;
}
// Action = what an agent returns on one turn. Strict shape so we can
// enforce it at parse time rather than prompt-engineer around malformed
// JSON.
export type Action =
| { kind: "tool_call"; tool: string; args: Record<string, any>; rationale: string }
| { kind: "propose_done"; fills: Fill[]; rationale: string }
| { kind: "critique"; verdict: "continue" | "drift" | "approve_done"; notes: string }
| { kind: "plan"; steps: string[] };
export interface Fill {
candidate_id: string;
name: string;
reason: string;
}
// --- HTTP helpers (fail-fast) ---
async function http<T>(method: string, url: string, body?: any): Promise<T> {
const res = await fetch(url, {
method,
headers: { "Content-Type": "application/json" },
body: body ? JSON.stringify(body) : undefined,
});
if (!res.ok) {
const text = await res.text();
throw new Error(`${method} ${url}${res.status}: ${text}`);
}
return (await res.json()) as T;
}
// Tool calls land in the Phase 12 audit log keyed by this agent name.
// Distinguishable from human-driven calls (agent=="operator" or similar)
// so post-hoc queries can separate multi-agent runs.
export const TOOL_AGENT_ID = "multi-agent-test";
export async function callTool(tool: string, args: Record<string, any>): Promise<any> {
return http("POST", `${GATEWAY}/tools/${tool}/call`, {
params: args,
agent: TOOL_AGENT_ID,
});
}
export async function hybridSearch(sql_filter: string, question: string, k = 10): Promise<any> {
return http("POST", `${GATEWAY}/vectors/hybrid`, { sql_filter, question, k });
}
export async function sqlQuery(sql: string): Promise<any> {
return http("POST", `${GATEWAY}/query/sql`, { sql, format: "json" });
}
// Sidecar generate. Ollama's default keep_alive (5 min) keeps the model
// warm between turns on its own, so we don't need to pass it through.
export async function generate(model: string, prompt: string, opts: {
max_tokens?: number;
temperature?: number;
system?: string;
} = {}): Promise<string> {
const body: Record<string, any> = {
model,
prompt,
temperature: opts.temperature ?? 0.3,
max_tokens: opts.max_tokens ?? 800,
};
if (opts.system) body.system = opts.system;
const r = await http<any>("POST", `${SIDECAR}/generate`, body);
const text = r.text ?? "";
if (!text || typeof text !== "string") {
throw new Error(`generate returned empty text from ${model}: ${JSON.stringify(r).slice(0, 200)}`);
}
return text;
}
// --- Prompt construction ---
const TOOL_CATALOG = `
Available tools (each takes a JSON "args" object):
- hybrid_search(sql_filter: string, question: string, index_name: string, k?: number)
→ Narrow workers via SQL WHERE clause, then rank by semantic match.
→ Canonical production tool for fill tasks. Always use this FIRST.
→ Example args:
{"index_name":"workers_500k_v1",
"sql_filter":"LOWER(role) LIKE '%weld%' AND city = 'Toledo' AND state = 'OH' AND availability > 0.5",
"question":"reliable welder with OSHA certs",
"k":10}
- sql(query: string)
→ Raw read-only SELECT. Use for verification (confirm a worker exists,
check city/role/availability) after hybrid_search surfaces candidates.
→ Schema of workers_500k: worker_id, name, role, email, phone, city,
state, zip, skills, certifications, archetype, reliability,
responsiveness, engagement, communications, compliance, availability,
resume_text.
→ Example args:
{"query":"SELECT worker_id, name, role, city, state, availability FROM workers_500k WHERE worker_id = 'W123456'"}
Rules:
- hybrid_search returns sources[] each with {doc_id, chunk_text, score, sql_verified}.
- **ID mapping:** vector doc_ids look like "W500K-7995" (prefix + number).
The SQL worker_id is an INTEGER. To go from doc_id to SQL, strip the
"W500K-" prefix and cast:
SELECT ... FROM workers_500k WHERE worker_id = CAST(SUBSTR('W500K-7995', 7) AS BIGINT)
or more simply: WHERE worker_id = 7995.
- Names are NOT unique. Always identify by worker_id, never by name alone.
- Return EXACTLY ONE JSON object per turn. No prose outside the JSON.
`;
// Smart per-kind summary so agents see the substance of each prior turn
// without a raw-JSON wall of text. hybrid_search results especially need
// this — raw JSON buries sources[] past any reasonable 400-char truncation.
function summarizeEntry(e: LogEntry): string {
const c = e.content ?? {};
switch (e.kind) {
case "plan":
return `PLAN: ${(c.steps ?? []).map((s: string, i: number) => `${i + 1}.${s}`).join(" ")}`;
case "tool_call":
return `TOOL_CALL ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 250)})${c.rationale ? `${c.rationale}` : ""}`;
case "tool_result": {
if (c.error) return `TOOL_RESULT error: ${c.error}`;
// hybrid_search response
if (Array.isArray(c.sources)) {
const head = c.sources.slice(0, 5).map((s: any) =>
`${s.doc_id}${s.sql_verified ? "✓" : ""} score=${(s.score ?? 0).toFixed(2)}: ${String(s.chunk_text ?? "").slice(0, 80)}`
).join(" | ");
return `TOOL_RESULT hybrid: sql_matches=${c.sql_matches} vector_reranked=${c.vector_reranked} sources=[${head}${c.sources.length > 5 ? ` +${c.sources.length - 5} more` : ""}]`;
}
// sql response
if (Array.isArray(c.rows)) {
const head = c.rows.slice(0, 5).map((r: any) => JSON.stringify(r)).join(" | ");
return `TOOL_RESULT sql: ${c.rows.length} rows${c.rows.length > 0 ? `${head}${c.rows.length > 5 ? ` +${c.rows.length - 5} more` : ""}` : ""}`;
}
// fallback
return `TOOL_RESULT ${JSON.stringify(c).slice(0, 250)}`;
}
case "critique":
return `CRITIQUE verdict=${c.verdict} notes: ${String(c.notes ?? "").slice(0, 200)}`;
case "propose_done":
return `PROPOSE_DONE fills=[${(c.fills ?? []).map((f: Fill) => `${f.candidate_id}:${f.name}`).join(", ")}] rationale: ${String(c.rationale ?? "").slice(0, 120)}`;
case "consensus_done":
return `CONSENSUS ✓`;
case "error":
return `ERROR ${c.message ?? JSON.stringify(c)}`;
}
return JSON.stringify(c).slice(0, 200);
}
function renderLogForPrompt(log: LogEntry[]): string {
if (log.length === 0) return "(no turns yet)";
return log.slice(-12).map(e =>
`[t${e.turn} ${e.role}] ${summarizeEntry(e)}`
).join("\n");
}
// Crawl the log for every hybrid_search tool_result and collect the
// worker names + ids seen so far. LLMs routinely "forget" earlier turns
// once the conversation grows, so we surface a running ledger in the
// prompt as orchestrator-maintained state. The executor doesn't have to
// track this itself — it just reads it.
function candidatesSeen(log: LogEntry[]): Array<{ doc_id: string; name: string; city: string; state: string }> {
const seen = new Map<string, { doc_id: string; name: string; city: string; state: string }>();
for (const e of log) {
if (e.kind !== "tool_result") continue;
const sources = (e.content as any)?.sources;
if (!Array.isArray(sources)) continue;
for (const s of sources) {
// chunk_text shape "Name — Role in City, ST. …"
const t = String(s.chunk_text ?? "");
const [namePart, rest] = t.split("—", 2);
if (!namePart || !rest) continue;
const loc = rest.split(" in ")[1] ?? "";
const [city, stateRaw] = loc.split(",", 2);
const state = (stateRaw ?? "").trim().replace(/[^A-Za-z].*/, "");
if (!s.doc_id || !namePart.trim() || !city?.trim() || !state) continue;
if (!seen.has(s.doc_id)) {
seen.set(s.doc_id, {
doc_id: s.doc_id,
name: namePart.trim(),
city: city.trim(),
state,
});
}
}
}
return Array.from(seen.values());
}
export function executorPrompt(task: TaskSpec, log: LogEntry[]): string {
const logStr = renderLogForPrompt(log);
const seen = candidatesSeen(log);
const seenBlock = seen.length === 0
? "(no candidates surfaced yet — start with hybrid_search)"
: seen.map(s => ` - ${s.doc_id} ${s.name} (${s.city}, ${s.state})`).join("\n");
return `You are the EXECUTOR agent. Your job is to complete this task:
OPERATION: ${task.operation}
TARGET: ${task.target_count} × ${task.target_role} in ${task.target_city}, ${task.target_state}
${task.approach_hint ? `HINT: ${task.approach_hint}` : ""}
The REVIEWER agent is watching every turn. They will flag drift. Stay on target.
${TOOL_CATALOG}
CANDIDATES SURFACED SO FAR (orchestrator-tracked, do not forget these):
${seenBlock}
SHARED LOG (recent turns):
${logStr}
Your next action MUST be a JSON object matching one of these shapes:
{"kind":"plan","steps":["short step 1","short step 2",...]}
— use on turn 1 to outline your approach. Steps must be concrete.
{"kind":"tool_call","tool":"...","args":{...},"rationale":"why"}
— call a tool and see its result next turn.
{"kind":"propose_done","fills":[{"candidate_id":"...","name":"First Last","reason":"why them"}],"rationale":"..."}
— propose you've met the target. fills MUST have EXACTLY ${task.target_count} entries — count twice before emitting.
Strategy tip: once "CANDIDATES SURFACED SO FAR" has ≥ ${task.target_count} entries in ${task.target_city}, ${task.target_state} matching ${task.target_role}, verify ONE via the sql tool (to satisfy the reviewer's SQL-verification criterion) and then propose_done with the top ${task.target_count}. Don't keep re-searching.
Respond with ONLY the JSON object. No markdown fences, no prose.`;
}
export function reviewerPrompt(task: TaskSpec, log: LogEntry[]): string {
const logStr = renderLogForPrompt(log);
// If the most recent executor action was propose_done, the reviewer
// must commit to an up-or-down vote this turn — "continue" would stall
// the orchestrator forever. The wider prompt still describes all three
// verdicts, but we add a hard rule at the end that the model must obey.
const lastExec = [...log].reverse().find(e => e.role === "executor");
const awaitingApproval = lastExec?.kind === "propose_done";
return `You are the REVIEWER agent. The EXECUTOR is trying to complete this task:
OPERATION: ${task.operation}
TARGET: ${task.target_count} × ${task.target_role} in ${task.target_city}, ${task.target_state}
Your job: catch drift. Agents often wander from the actual objective. Specifically watch for:
- Proposing candidates who aren't in ${task.target_city}, ${task.target_state}.
- Proposing candidates who don't have ${task.target_role} skill.
- Proposing fewer or more than ${task.target_count} fills.
- Irrelevant tool calls (e.g. revenue_by_client when the task is a fill).
Available tools (for reference, but YOU don't call them):
- hybrid_search(sql_filter, question, index_name, k) — production fill path
- sql(query) — read-only SELECT for verification
SHARED LOG (recent turns):
${logStr}
Your next action MUST be a JSON object:
{"kind":"critique","verdict":"continue" | "drift" | "approve_done","notes":"..."}
- "continue" → executor is on a reasonable path, let them keep going.
- "drift" → executor is off-track; notes MUST tell them how to redirect.
- "approve_done" → executor's propose_done meets the criteria. Seal it.
APPROVAL CRITERIA (use these only for propose_done):
1. Exactly ${task.target_count} fills.
2. Each fill's name appears in a prior tool_result from ${task.target_city}, ${task.target_state} matching role "${task.target_role}".
3. Executor has SQL-verified at least one of the fills (any prior sql tool_result with that worker).
If 13 all hold, return approve_done. Do not demand further verification.
${awaitingApproval ? `
HARD RULE: The executor's most recent action was propose_done. On this turn you CANNOT return "continue" — it would stall the task. Choose approve_done (proposal is valid by the 3 criteria above) or drift (it fails one; state which in notes).` : ""}
Respond with ONLY the JSON object.`;
}
// Parse an agent's response into an Action, or throw.
export function parseAction(raw: string, role: Role): Action {
// Models sometimes wrap JSON in ```json fences; strip them.
let s = raw.trim();
if (s.startsWith("```")) {
s = s.replace(/^```(?:json)?\n?/, "").replace(/```$/, "").trim();
}
// Find the first {...} block.
const start = s.indexOf("{");
const end = s.lastIndexOf("}");
if (start < 0 || end <= start) {
throw new Error(`no JSON object in ${role} response: ${raw.slice(0, 300)}`);
}
const json = s.slice(start, end + 1);
let obj: any;
try {
obj = JSON.parse(json);
} catch (e) {
throw new Error(`invalid JSON from ${role}: ${(e as Error).message} | raw: ${json.slice(0, 300)}`);
}
if (role === "executor") {
if (obj.kind === "plan" && Array.isArray(obj.steps)) return obj as Action;
if (obj.kind === "tool_call" && typeof obj.tool === "string" && typeof obj.args === "object") return obj as Action;
if (obj.kind === "propose_done" && Array.isArray(obj.fills)) return obj as Action;
throw new Error(`executor returned unexpected shape: ${JSON.stringify(obj).slice(0, 200)}`);
} else {
// Normalize: some models (qwen2.5, mistral) emit the verdict AS the
// `kind` field directly instead of nesting it under a "critique"
// wrapper. Accept both shapes rather than hard-failing — the
// semantic content is identical, and rejecting would stall the
// orchestrator on a cosmetic schema miss.
if (obj.kind === "critique" && ["continue", "drift", "approve_done"].includes(obj.verdict)) {
return obj as Action;
}
if (["continue", "drift", "approve_done"].includes(obj.kind)) {
return { kind: "critique", verdict: obj.kind, notes: obj.notes ?? "" } as Action;
}
throw new Error(`reviewer returned unexpected shape: ${JSON.stringify(obj).slice(0, 200)}`);
}
}