Worker cards now ship a real photo per person instead of monogram tiles:
- fetch_face_pool.py pulls 1000 faces from thispersondoesnotexist.com
- tag_face_pool.py runs deepface for gender/race/age, excludes <22yo
- manifest.jsonl: 952 servable, gender/race buckets populated
- /headshots/_thumbs/ pre-resized to 384px webp (587KB -> 11KB,
60x smaller; without this Chrome's parallel-connection budget
drops ~75% of tiles in a 40-card grid)
- /headshots/:key gender x race x age intersection bucketing with
gender-only fallback when intersection is sparse
- /headshots/generate/:key ComfyUI on-demand for the contractor
profile spotlight (cold ~1.5s, cached ~1ms; worker-derived
djb2 seed makes faces deterministic-per-worker but unique
across workers sharing the same prompt)
- serve_imagegen.py _cache_key() now includes seed (was caching
by prompt only -> 3 different worker seeds collapsed to 1
cached image; verified fix produces 3 distinct md5s)
- confidence-default name resolution: Xavier->man+hispanic,
Aisha->woman+black, etc. Every worker resolves to a bucket.
End-to-end: playwright run on /?q=forklift+operators+IL -> 21/21
cards loaded, 0 broken, all 384px webp.
Cache + binary pool gitignored; manifest tracked.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
3315 lines
172 KiB
TypeScript
3315 lines
172 KiB
TypeScript
/**
|
||
* Lakehouse MCP Server — bridges local LLMs to the data substrate.
|
||
*
|
||
* Tools:
|
||
* - search_workers: hybrid SQL+vector (the core fix)
|
||
* - query_sql: analytical SQL on any dataset
|
||
* - match_contract: find workers for a job order
|
||
* - get_worker: single worker by ID
|
||
* - rag_question: full RAG pipeline
|
||
* - log_success: record what worked → playbook DB
|
||
* - get_playbooks: retrieve past successes
|
||
* - swap_profile: hot-swap model + data context
|
||
* - vram_status: GPU introspection
|
||
*/
|
||
|
||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
||
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
||
import { z } from "zod";
|
||
import { startTrace, logSpan, logGeneration, scoreTrace, flush as flushTraces } from "./tracing.js";
|
||
import { buildPermitBrief } from "./entity.js";
|
||
|
||
const BASE = process.env.LAKEHOUSE_URL || "http://localhost:3100";
|
||
const PORT = parseInt(process.env.MCP_PORT || "3700");
|
||
|
||
// ─── Staffer roster — used by the per-staffer hot-swap index (G). ────
|
||
//
|
||
// J's vision: each staffer has their own molded view of the corpus.
|
||
// When Maria searches, the system surfaces *Maria's* prior fills and
|
||
// her territory's playbooks first. When Aisha searches, the same
|
||
// corpus gets re-shaped to her geo and recent activity. This is what
|
||
// generic CRM fast-search can't do: a relevance gradient that
|
||
// compounds with each staffer's own signal.
|
||
//
|
||
// First implementation is geography-based — each staffer has a primary
|
||
// state and a list of cities they recruit for. Playbook queries get
|
||
// scoped to that territory when staffer_id is provided. As the system
|
||
// accumulates per-staffer signal (call_log assignments, email threads,
|
||
// SMS history), the scope expands beyond geography.
|
||
//
|
||
// Adding a staffer: append to this list. The /api/staffers endpoint
|
||
// exposes the public-safe fields to the UI dropdown.
|
||
const STAFFERS: Array<{
|
||
id: string;
|
||
name: string;
|
||
display: string;
|
||
territory: { state: string; cities: string[] };
|
||
greeting: string;
|
||
}> = [
|
||
{
|
||
id: "maria",
|
||
name: "Maria",
|
||
display: "Maria · Chicago coordinator",
|
||
territory: { state: "IL", cities: ["Chicago", "Joliet", "Rockford", "Peoria", "Springfield", "Decatur"] },
|
||
greeting: "Maria's territory: Illinois warehouse + manufacturing fills",
|
||
},
|
||
{
|
||
id: "devon",
|
||
name: "Devon",
|
||
display: "Devon · Indiana coordinator",
|
||
territory: { state: "IN", cities: ["Indianapolis", "Fort Wayne", "South Bend", "Evansville", "Bloomington", "Terre Haute"] },
|
||
greeting: "Devon's territory: Indiana production + assembly fills",
|
||
},
|
||
{
|
||
id: "aisha",
|
||
name: "Aisha",
|
||
display: "Aisha · Wisconsin/Michigan coordinator",
|
||
territory: { state: "WI", cities: ["Milwaukee", "Madison", "Green Bay", "Detroit", "Grand Rapids", "Lansing"] },
|
||
greeting: "Aisha's territory: Wisconsin + Michigan logistics",
|
||
},
|
||
];
|
||
|
||
function lookupStaffer(id: string | undefined): typeof STAFFERS[number] | null {
|
||
if (!id) return null;
|
||
return STAFFERS.find((s) => s.id === id) || null;
|
||
}
|
||
const MODE = process.env.MCP_TRANSPORT || "http"; // "stdio" or "http"
|
||
|
||
// Active trace for the current request — set per-request in the HTTP handler
|
||
let activeTrace: ReturnType<typeof startTrace> | null = null;
|
||
|
||
async function api(method: string, path: string, body?: any, retries = 2) {
|
||
for (let attempt = 0; attempt <= retries; attempt++) {
|
||
try {
|
||
const t0 = Date.now();
|
||
const resp = await fetch(`${BASE}${path}`, {
|
||
method,
|
||
headers: body ? { "Content-Type": "application/json" } : {},
|
||
body: body ? JSON.stringify(body) : undefined,
|
||
});
|
||
const text = await resp.text();
|
||
const ms = Date.now() - t0;
|
||
let parsed: any;
|
||
try { parsed = JSON.parse(text); } catch { parsed = { raw: text, status: resp.status }; }
|
||
|
||
// Trace the call if we have an active trace. Pre-existing edit had
|
||
// this block at module scope, dangling after the closing brace of
|
||
// api() — parsed broken until fixed 2026-04-24.
|
||
if (activeTrace) {
|
||
const isGen = path.includes("/generate");
|
||
if (isGen) {
|
||
logGeneration(activeTrace, `lakehouse${path}`, {
|
||
model: body?.model || "unknown",
|
||
prompt: typeof body?.prompt === "string" ? body.prompt.slice(0, 500) : JSON.stringify(body).slice(0, 300),
|
||
completion: typeof parsed?.text === "string" ? parsed.text.slice(0, 500) : JSON.stringify(parsed).slice(0, 300),
|
||
duration_ms: ms,
|
||
tokens_in: parsed?.prompt_eval_count,
|
||
tokens_out: parsed?.eval_count,
|
||
});
|
||
} else {
|
||
logSpan(activeTrace, `lakehouse${path}`, body, {
|
||
rows: parsed?.row_count, sources: parsed?.sources?.length,
|
||
sql_matches: parsed?.sql_matches, method: parsed?.method,
|
||
}, ms);
|
||
}
|
||
}
|
||
|
||
return parsed;
|
||
} catch (e: any) {
|
||
if (attempt === retries) throw e;
|
||
if (e.message?.includes("socket connection was closed") || e.message?.includes("ECONNREFUSED")) {
|
||
await Bun.sleep(500 * (attempt + 1));
|
||
continue;
|
||
}
|
||
throw e;
|
||
}
|
||
}
|
||
throw new Error("unreachable");
|
||
}
|
||
|
||
const server = new McpServer({ name: "lakehouse", version: "1.0.0" });
|
||
|
||
server.tool(
|
||
"search_workers",
|
||
"Hybrid SQL+vector search. SQL ensures structural accuracy (role, state, reliability), vector ranks by semantic relevance. Every result is verified against the golden dataset.",
|
||
{
|
||
question: z.string().describe("Natural language question about workers"),
|
||
sql_filter: z.string().optional().describe("SQL WHERE clause, e.g. \"role = 'Forklift Operator' AND state = 'IL' AND reliability > 0.8\""),
|
||
dataset: z.string().default("ethereal_workers"),
|
||
id_column: z.string().default("worker_id"),
|
||
top_k: z.number().default(5),
|
||
},
|
||
async ({ question, sql_filter, dataset, id_column, top_k }) => {
|
||
const body: any = {
|
||
question, index_name: "workers_500k_v1", filter_dataset: dataset, id_column, top_k, generate: true,
|
||
use_playbook_memory: true,
|
||
};
|
||
if (sql_filter) body.sql_filter = sql_filter;
|
||
const r = await api("POST", "/vectors/hybrid", body);
|
||
return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] };
|
||
},
|
||
);
|
||
|
||
server.tool(
|
||
"query_sql",
|
||
"Run SQL against any lakehouse dataset. Tables: ethereal_workers (10K), candidates (1K), workers_500k (500K), timesheets (1M), call_log (800K), email_log (500K), placements (50K), job_orders (15K), clients (2K).",
|
||
{ sql: z.string().describe("SQL query") },
|
||
async ({ sql }) => {
|
||
const r = await api("POST", "/query/sql", { sql });
|
||
if (r.error) return { content: [{ type: "text" as const, text: `SQL Error: ${r.error}` }] };
|
||
return { content: [{ type: "text" as const, text: `${r.row_count} rows:\n${JSON.stringify(r.rows?.slice(0, 20), null, 2)}` }] };
|
||
},
|
||
);
|
||
|
||
server.tool(
|
||
"match_contract",
|
||
"Find qualified workers for a staffing contract. SQL-verified matches ranked by semantic fit.",
|
||
{
|
||
role: z.string(), state: z.string(), city: z.string().optional(),
|
||
min_reliability: z.number().default(0.7),
|
||
required_certs: z.array(z.string()).default([]),
|
||
headcount: z.number().default(5),
|
||
},
|
||
async ({ role, state, city, min_reliability, required_certs, headcount }) => {
|
||
let filter = `role = '${role}' AND state = '${state}' AND reliability >= ${min_reliability}`;
|
||
if (city) filter += ` AND city = '${city}'`;
|
||
const r = await api("POST", "/vectors/hybrid", {
|
||
question: `Find the best ${role} workers with relevant skills and certifications`,
|
||
index_name: "workers_500k_v1", sql_filter: filter,
|
||
filter_dataset: "ethereal_workers", id_column: "worker_id",
|
||
top_k: headcount * 2, generate: false,
|
||
use_playbook_memory: true,
|
||
});
|
||
let matches = r.sources || [];
|
||
if (required_certs.length > 0) {
|
||
const req = new Set(required_certs.map((c: string) => c.toLowerCase()));
|
||
matches = matches.filter((m: any) => {
|
||
const certs = (m.chunk_text || "").toLowerCase();
|
||
return [...req].every(c => certs.includes(c));
|
||
});
|
||
}
|
||
return { content: [{ type: "text" as const, text: JSON.stringify({
|
||
contract: { role, state, city, min_reliability, required_certs },
|
||
matches: matches.slice(0, headcount), total_sql: r.sql_matches, method: r.method,
|
||
}, null, 2) }] };
|
||
},
|
||
);
|
||
|
||
server.tool(
|
||
"get_worker",
|
||
"Fetch one worker profile by ID — all fields including scores and comms.",
|
||
{ worker_id: z.number() },
|
||
async ({ worker_id }) => {
|
||
const r = await api("POST", "/query/sql", { sql: `SELECT * FROM ethereal_workers WHERE worker_id = ${worker_id}` });
|
||
if (!r.rows?.length) return { content: [{ type: "text" as const, text: `Worker ${worker_id} not found` }] };
|
||
return { content: [{ type: "text" as const, text: JSON.stringify(r.rows[0], null, 2) }] };
|
||
},
|
||
);
|
||
|
||
server.tool(
|
||
"rag_question",
|
||
"Natural language question answered via RAG (embed → search → retrieve → generate). For open-ended questions where SQL alone isn't enough.",
|
||
{ question: z.string(), index: z.string().default("workers_500k_v1"), top_k: z.number().default(5) },
|
||
async ({ question, index, top_k }) => {
|
||
const r = await api("POST", "/vectors/rag", { index_name: index, question, top_k });
|
||
return { content: [{ type: "text" as const, text: r.error ? `RAG Error: ${r.error}` : `Answer: ${r.answer}\n\nSources: ${r.sources?.length || 0}` }] };
|
||
},
|
||
);
|
||
|
||
server.tool(
|
||
"log_success",
|
||
"Record a successful operation to the playbook database. Small models query this later to learn what worked.",
|
||
{
|
||
operation: z.string().describe("What was done"),
|
||
approach: z.string().describe("How it was done"),
|
||
result: z.string().describe("Outcome"),
|
||
context: z.string().optional(),
|
||
},
|
||
async ({ operation, approach, result, context }) => {
|
||
const csv = `timestamp,operation,approach,result,context\n"${new Date().toISOString()}","${operation.replace(/"/g, '""')}","${approach.replace(/"/g, '""')}","${result.replace(/"/g, '""')}","${(context||"").replace(/"/g, '""')}"`;
|
||
const form = new FormData();
|
||
form.append("file", new Blob([csv], { type: "text/csv" }), "playbook.csv");
|
||
const resp = await fetch(`${BASE}/ingest/file?name=successful_playbooks`, { method: "POST", body: form });
|
||
return { content: [{ type: "text" as const, text: `Logged: ${await resp.text()}` }] };
|
||
},
|
||
);
|
||
|
||
server.tool(
|
||
"get_playbooks",
|
||
"Retrieve past successful operations. Small models use this to learn what approaches worked.",
|
||
{ keyword: z.string().optional(), limit: z.number().default(10) },
|
||
async ({ keyword, limit }) => {
|
||
let sql = `SELECT * FROM successful_playbooks ORDER BY timestamp DESC LIMIT ${limit}`;
|
||
if (keyword) sql = `SELECT * FROM successful_playbooks WHERE operation LIKE '%${keyword}%' OR approach LIKE '%${keyword}%' ORDER BY timestamp DESC LIMIT ${limit}`;
|
||
const r = await api("POST", "/query/sql", { sql });
|
||
if (r.error) return { content: [{ type: "text" as const, text: "No playbooks yet — log some successful operations first!" }] };
|
||
return { content: [{ type: "text" as const, text: JSON.stringify(r.rows, null, 2) }] };
|
||
},
|
||
);
|
||
|
||
server.tool(
|
||
"swap_profile",
|
||
"Hot-swap model profile. Changes Ollama model in VRAM + bound datasets. 'agent-parquet' = HNSW (fast), 'agent-lance' = IVF_PQ (scalable).",
|
||
{ profile_id: z.string() },
|
||
async ({ profile_id }) => {
|
||
const r = await api("POST", `/vectors/profile/${profile_id}/activate`);
|
||
return { content: [{ type: "text" as const, text: JSON.stringify({
|
||
profile: r.profile_id, model: r.ollama_name,
|
||
indexes: r.indexes_warmed?.length, vectors: r.total_vectors,
|
||
previous: r.previous_profile, duration: r.duration_secs,
|
||
}, null, 2) }] };
|
||
},
|
||
);
|
||
|
||
server.tool(
|
||
"vram_status",
|
||
"GPU VRAM usage + loaded Ollama models. Check before swapping profiles.",
|
||
{},
|
||
async () => {
|
||
const r = await api("GET", "/ai/vram");
|
||
return { content: [{ type: "text" as const, text: JSON.stringify(r, null, 2) }] };
|
||
},
|
||
);
|
||
|
||
// Resources — these give any MCP client full context about the system
|
||
|
||
server.resource("lakehouse://system", "lakehouse://system", async (uri) => {
|
||
const health = await api("GET", "/health");
|
||
const datasets = await api("GET", "/catalog/datasets") as any[];
|
||
const indexes = await api("GET", "/vectors/indexes") as any[];
|
||
const vram = await api("GET", "/ai/vram");
|
||
const agent = await api("GET", "/vectors/agent/status");
|
||
const buckets = await api("GET", "/storage/buckets");
|
||
|
||
const text = `# Lakehouse System Status
|
||
|
||
## Health: ${health === "lakehouse ok" ? "OK" : JSON.stringify(health)}
|
||
|
||
## Datasets (${datasets.length})
|
||
${datasets.map((d: any) => `- ${d.name}: ${d.row_count || "?"} rows`).join("\n")}
|
||
|
||
## Vector Indexes (${indexes.length})
|
||
${(indexes as any[]).map((i: any) => `- ${i.index_name}: ${i.chunk_count} chunks (${i.vector_backend || "parquet"})`).join("\n")}
|
||
|
||
## GPU
|
||
- Used: ${vram?.gpu?.used_mib || "?"}/${vram?.gpu?.total_mib || "?"} MiB
|
||
- Models loaded: ${(vram?.ollama_loaded || []).map((m: any) => m.name).join(", ") || "none"}
|
||
|
||
## Autotune Agent
|
||
- Running: ${agent?.running}, Trials: ${agent?.trials_run}, Promotions: ${agent?.promotions}
|
||
|
||
## Buckets (${(buckets as any[])?.length || 0})
|
||
${(buckets as any[] || []).map((b: any) => `- ${b.name}: ${b.backend} (${b.reachable ? "reachable" : "DOWN"})`).join("\n")}
|
||
|
||
## Services
|
||
- Lakehouse Gateway: :3100
|
||
- AI Sidecar: :3200
|
||
- Agent Gateway: :3700
|
||
- Langfuse: :3001
|
||
- MinIO S3: :9000
|
||
- Ollama: :11434
|
||
|
||
## Available Models
|
||
- qwen3: 8.2B, 40K context, thinking+tools (best for reasoning)
|
||
- qwen2.5: 7B, 8K context (best for fast SQL generation)
|
||
- mistral: 7B, 8K context (general generation)
|
||
- nomic-embed-text: 137M (embedding, automatic)
|
||
`;
|
||
return { contents: [{ uri: uri.href, mimeType: "text/plain", text }] };
|
||
});
|
||
|
||
server.resource("lakehouse://architecture", "lakehouse://architecture", async (uri) => {
|
||
// Read the PRD directly
|
||
const prd = await Bun.file("/home/profit/lakehouse/docs/PRD.md").text().catch(() => "PRD not found");
|
||
return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: prd }] };
|
||
});
|
||
|
||
server.resource("lakehouse://instructions", "lakehouse://instructions", async (uri) => {
|
||
const instructions = await Bun.file("/home/profit/lakehouse/mcp-server/AGENT_INSTRUCTIONS.md").text().catch(() => "Instructions not found");
|
||
return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: instructions }] };
|
||
});
|
||
|
||
server.resource("lakehouse://playbooks", "lakehouse://playbooks", async (uri) => {
|
||
const r = await api("POST", "/query/sql", {
|
||
sql: "SELECT * FROM successful_playbooks ORDER BY timestamp DESC LIMIT 20"
|
||
});
|
||
const rows = r?.rows || [];
|
||
const text = rows.length === 0
|
||
? "No playbooks yet. Log successful operations with the log_success tool."
|
||
: rows.map((p: any) => `## ${p.operation}\n- Approach: ${p.approach}\n- Result: ${p.result}\n- Context: ${p.context || "—"}\n`).join("\n");
|
||
return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: `# Successful Playbooks\n\n${text}` }] };
|
||
});
|
||
|
||
server.resource("lakehouse://datasets", "lakehouse://datasets", async (uri) => {
|
||
const r = await api("GET", "/catalog/datasets") as any[];
|
||
const text = r.map(d => `${d.name}: ${d.row_count || "?"} rows`).join("\n");
|
||
return { contents: [{ uri: uri.href, mimeType: "text/plain", text }] };
|
||
});
|
||
|
||
// ─── Dual mode: stdio (Claude Code) or HTTP (internal agents) ───
|
||
|
||
async function main() {
|
||
if (MODE === "stdio") {
|
||
const transport = new StdioServerTransport();
|
||
await server.connect(transport);
|
||
console.error(`Lakehouse MCP (stdio) → ${BASE}`);
|
||
return;
|
||
}
|
||
|
||
// HTTP mode — a REST gateway that internal agents call directly.
|
||
// No MCP protocol complexity for consumers — just POST JSON, get JSON.
|
||
// The MCP tool definitions above are reused for the stdio path; this
|
||
// HTTP path wraps the same lakehouse API with agent-friendly routing.
|
||
|
||
Bun.serve({
|
||
port: PORT,
|
||
async fetch(req) {
|
||
const url = new URL(req.url);
|
||
const json = async () => req.method === "POST" ? await req.json() : {};
|
||
|
||
// CORS — dashboard runs in the browser, gateway is a different origin
|
||
const cors = {
|
||
"Access-Control-Allow-Origin": "*",
|
||
"Access-Control-Allow-Methods": "GET, POST, OPTIONS",
|
||
"Access-Control-Allow-Headers": "Content-Type",
|
||
};
|
||
if (req.method === "OPTIONS") return new Response(null, { status: 204, headers: cors });
|
||
|
||
const ok = (data: any) => Response.json(data, { headers: cors });
|
||
const err = (msg: string, status = 400) => Response.json({ error: msg }, { status, headers: cors });
|
||
|
||
try {
|
||
// Health — no trace needed
|
||
if (url.pathname === "/health") return ok({ status: "ok", lakehouse: BASE, tools: 11 });
|
||
|
||
// Start a Langfuse trace for every non-static request
|
||
if (req.method === "POST" || !["/","/dashboard","/dashboard.css","/dashboard.ts","/dashboard.js"].includes(url.pathname)) {
|
||
activeTrace = startTrace(`gw:${url.pathname}`, { method: req.method, path: url.pathname });
|
||
}
|
||
|
||
// Self-orientation: any agent calls this first to understand the system
|
||
if (url.pathname === "/context") {
|
||
const instructions = await Bun.file("/home/profit/lakehouse/mcp-server/AGENT_INSTRUCTIONS.md").text().catch(() => "");
|
||
const datasets = await api("GET", "/catalog/datasets") as any[];
|
||
const indexes = await api("GET", "/vectors/indexes") as any[];
|
||
const vram = await api("GET", "/ai/vram");
|
||
return ok({
|
||
system: "Lakehouse Staffing Co-Pilot",
|
||
purpose: "AI anticipates staffing coordinator needs — pre-matches workers to contracts, surfaces alerts, builds playbooks from successful operations",
|
||
instructions: instructions.slice(0, 3000),
|
||
datasets: (datasets || []).map((d: any) => ({ name: d.name, rows: d.row_count })),
|
||
indexes: (indexes || []).map((i: any) => ({ name: i.index_name, chunks: i.chunk_count, backend: i.vector_backend })),
|
||
models: { qwen3: "8.2B reasoning+tools", qwen2_5: "7B fast SQL", mistral: "7B generation", nomic: "137M embedding" },
|
||
vram: vram?.gpu,
|
||
tools: ["/search","/sql","/match","/worker/:id","/ask","/log","/playbooks","/profile/:id","/vram","/context","/verify"],
|
||
rules: [
|
||
"Never hallucinate — only state facts from tool responses",
|
||
"SQL for counts/aggregations, hybrid /search for matching",
|
||
"Log every successful operation to /log",
|
||
"Check /playbooks before complex tasks",
|
||
"Verify worker details via /worker/:id before communicating",
|
||
],
|
||
});
|
||
}
|
||
|
||
// Verification endpoint — agent can check any claim against SQL
|
||
if (url.pathname === "/verify") {
|
||
const b = await json();
|
||
// b.claim: "worker 4925 is a Forklift Operator in IL with reliability 0.82"
|
||
// b.worker_id: 4925
|
||
// b.checks: { role: "Forklift Operator", state: "IL", reliability: 0.82 }
|
||
if (!b.worker_id) return err("worker_id required");
|
||
const r = await api("POST", "/query/sql", {
|
||
sql: `SELECT * FROM ethereal_workers WHERE worker_id = ${b.worker_id}`
|
||
});
|
||
const worker = r?.rows?.[0];
|
||
if (!worker) return ok({ verified: false, reason: `worker ${b.worker_id} not found` });
|
||
|
||
const checks = b.checks || {};
|
||
const failures: string[] = [];
|
||
for (const [field, expected] of Object.entries(checks)) {
|
||
const actual = worker[field];
|
||
if (actual === undefined) continue;
|
||
if (typeof expected === "number") {
|
||
if (Math.abs(Number(actual) - expected) > 0.05) {
|
||
failures.push(`${field}: claimed=${expected} actual=${actual}`);
|
||
}
|
||
} else if (String(actual).toLowerCase() !== String(expected).toLowerCase()) {
|
||
failures.push(`${field}: claimed=${expected} actual=${actual}`);
|
||
}
|
||
}
|
||
return ok({
|
||
verified: failures.length === 0,
|
||
worker_id: b.worker_id,
|
||
worker_name: worker.name,
|
||
failures,
|
||
actual: worker,
|
||
});
|
||
}
|
||
|
||
// Tool: hybrid search
|
||
// ─── Client blacklists (feature #2) ───────────────────────────
|
||
// Per-client worker exclusion list. A worker blacklisted for
|
||
// client X is hidden from /search and /match when the caller
|
||
// passes `client: "X"`. Persisted to local JSON so it survives
|
||
// Bun restarts. This is a trust-critical feature — if the
|
||
// system recommends a worker the client already flagged, the
|
||
// system's credibility is gone.
|
||
if (url.pathname.startsWith("/clients/") && url.pathname.includes("/blacklist")) {
|
||
const m = url.pathname.match(/^\/clients\/([^\/]+)\/blacklist\/?(.*)$/);
|
||
if (m) {
|
||
const client = decodeURIComponent(m[1]);
|
||
const suffix = m[2]; // empty, or a worker_id to delete
|
||
|
||
if (req.method === "GET") {
|
||
const list = await loadClientBlacklist(client);
|
||
return ok({ client, entries: list });
|
||
}
|
||
if (req.method === "POST" && !suffix) {
|
||
const b = await json();
|
||
if (!b.worker_id) return err("worker_id required", 400);
|
||
const entry = {
|
||
worker_id: String(b.worker_id),
|
||
name: b.name || "",
|
||
reason: b.reason || "",
|
||
added_at: new Date().toISOString(),
|
||
};
|
||
const list = await addToClientBlacklist(client, entry);
|
||
return ok({ client, added: entry, total: list.length });
|
||
}
|
||
if (req.method === "DELETE" && suffix) {
|
||
const worker_id = decodeURIComponent(suffix);
|
||
const { removed, total } = await removeFromClientBlacklist(client, worker_id);
|
||
return ok({ client, removed, total });
|
||
}
|
||
return err(`unsupported method ${req.method} for blacklist`, 405);
|
||
}
|
||
}
|
||
|
||
if (url.pathname === "/search") {
|
||
const b = await json();
|
||
// Availability soft-filter: if the caller didn't constrain
|
||
// availability and isn't explicitly opting out, auto-append
|
||
// `availability > 0.5`. Recruiters calling this route expect
|
||
// "available workers" by default; surfacing someone who's on
|
||
// an active placement breaks trust on the first call.
|
||
let filter = b.sql_filter as (string | undefined);
|
||
const optOut = b.include_unavailable === true;
|
||
if (!optOut && filter && !/availability/i.test(filter)) {
|
||
filter = `(${filter}) AND CAST(availability AS DOUBLE) > 0.5`;
|
||
}
|
||
// Client blacklist filter: if caller passes `client`, exclude
|
||
// worker_ids that client has flagged. One SQL expression
|
||
// added, no extra round-trip needed by the caller.
|
||
if (b.client && filter) {
|
||
const bl = await loadClientBlacklist(String(b.client));
|
||
const ids = bl.map(e => e.worker_id).filter(x => /^\d+$/.test(x));
|
||
if (ids.length > 0) {
|
||
filter = `(${filter}) AND worker_id NOT IN (${ids.join(",")})`;
|
||
}
|
||
}
|
||
const hybridRes = await api("POST", "/vectors/hybrid", {
|
||
question: b.question, index_name: b.index || "workers_500k_v1",
|
||
sql_filter: filter, filter_dataset: b.dataset || "ethereal_workers",
|
||
id_column: b.id_column || "worker_id", top_k: b.top_k || 5, generate: b.generate !== false,
|
||
use_playbook_memory: b.use_playbook_memory !== false,
|
||
playbook_memory_k: b.playbook_memory_k ?? 200,
|
||
});
|
||
// Rate enrichment + optional max_pay_rate filter (soft filter,
|
||
// preserves result shape). Operator can opt out by omitting.
|
||
if (hybridRes && Array.isArray(hybridRes.sources)) {
|
||
enrichWithRates(hybridRes.sources);
|
||
if (typeof b.max_pay_rate === "number" && b.max_pay_rate > 0) {
|
||
const before = hybridRes.sources.length;
|
||
hybridRes.sources = hybridRes.sources.filter((s: any) => s.implied_pay_rate <= b.max_pay_rate);
|
||
(hybridRes as any).pay_rate_filtered_out = before - hybridRes.sources.length;
|
||
}
|
||
}
|
||
return ok(hybridRes);
|
||
}
|
||
|
||
// Tool: SQL
|
||
if (url.pathname === "/sql") {
|
||
const b = await json();
|
||
return ok(await api("POST", "/query/sql", { sql: b.sql }));
|
||
}
|
||
|
||
// Tool: match contract
|
||
if (url.pathname === "/match") {
|
||
const b = await json();
|
||
let filter = `role = '${b.role}' AND state = '${b.state}' AND reliability >= ${b.min_reliability || 0.7}`;
|
||
if (b.city) filter += ` AND city = '${b.city}'`;
|
||
return ok(await api("POST", "/vectors/hybrid", {
|
||
question: `Best ${b.role} workers with relevant skills`,
|
||
index_name: b.index || "workers_500k_v1", sql_filter: filter,
|
||
filter_dataset: b.dataset || "ethereal_workers",
|
||
id_column: "worker_id", top_k: (b.headcount || 5) * 2, generate: false,
|
||
use_playbook_memory: true,
|
||
playbook_memory_k: 200,
|
||
}));
|
||
}
|
||
|
||
// Tool: get worker
|
||
if (url.pathname.startsWith("/worker/")) {
|
||
const id = url.pathname.split("/")[2];
|
||
return ok(await api("POST", "/query/sql", { sql: `SELECT * FROM ethereal_workers WHERE worker_id = ${id}` }));
|
||
}
|
||
|
||
// Tool: RAG
|
||
if (url.pathname === "/ask") {
|
||
const b = await json();
|
||
return ok(await api("POST", "/vectors/rag", { index_name: b.index || "workers_500k_v1", question: b.question, top_k: b.top_k || 5 }));
|
||
}
|
||
|
||
// Tool: log success.
|
||
//
|
||
// BUG FIX 2026-04-20: previously this also POSTed a 1-row CSV to
|
||
// /ingest/file?name=successful_playbooks. That endpoint REPLACES
|
||
// the dataset's object list rather than appending — so every /log
|
||
// call destroyed all prior rows in the SQL-queryable
|
||
// successful_playbooks table. Chain-of-custody trace caught it:
|
||
// sp_rows went 33 → 1 in a single /log call.
|
||
//
|
||
// Until a proper append endpoint exists (Phase 8 delta write
|
||
// surface for the SQL table), /log writes ONLY to playbook_memory
|
||
// (in-memory append-only store, works correctly for boost). The
|
||
// SQL successful_playbooks table is now treated as derived state
|
||
// that gets rebuilt explicitly via /vectors/playbook_memory/rebuild
|
||
// — never written to by the recruiter path.
|
||
if (url.pathname === "/log") {
|
||
const b = await json();
|
||
// Result format expected: "{filled}/{needed} filled → Name1, Name2, Name3"
|
||
const result = String(b.result || "");
|
||
const arrowIdx = result.indexOf("→");
|
||
const namesPart = arrowIdx >= 0 ? result.slice(arrowIdx + 1) : "";
|
||
const rawEndorsed = namesPart.split(",").map(s => s.trim()).filter(Boolean);
|
||
|
||
// Parse the contract's (city, state) from operation. Seed is
|
||
// keyed by (city, state, name) so validation must match those
|
||
// coordinates, not just the name.
|
||
const opMatch = String(b.operation || "").match(/ in ([^,]+),\s*([A-Za-z]+)/);
|
||
const city = opMatch ? opMatch[1].trim() : "";
|
||
const state = opMatch ? opMatch[2].trim() : "";
|
||
|
||
// Ghost-name guard — /log previously accepted any endorsed
|
||
// names without verification. Those ghosts landed in
|
||
// playbook_memory, grew the entry count, but boost silently
|
||
// never fired because no real worker chunk ever matched the
|
||
// stored (city, state, name) tuple. Real-test on 2026-04-20
|
||
// surfaced this. Validate against workers_500k before seeding.
|
||
let endorsed: string[] = rawEndorsed;
|
||
let rejected: string[] = [];
|
||
if (rawEndorsed.length && city && state) {
|
||
const quoted = rawEndorsed.map(n => `'${n.replace(/'/g, "''")}'`).join(",");
|
||
const sql = `SELECT DISTINCT name FROM workers_500k `
|
||
+ `WHERE name IN (${quoted}) AND city = '${city.replace(/'/g,"''")}' `
|
||
+ `AND state = '${state.replace(/'/g,"''")}'`;
|
||
const vr = await api("POST", "/query/sql", { sql }).catch(() => ({ rows: [] as any[] })) as any;
|
||
const found = new Set((vr.rows ?? []).map((r: any) => r.name));
|
||
endorsed = rawEndorsed.filter(n => found.has(n));
|
||
rejected = rawEndorsed.filter(n => !found.has(n));
|
||
}
|
||
|
||
let seeded = 0;
|
||
let persisted_rows = 0;
|
||
if (endorsed.length && /fill:.+ in .+,.+/i.test(String(b.operation || ""))) {
|
||
const canonicalApproach = `${(b.approach || "manual log").split(/[\.\n]/)[0]}`.slice(0, 80);
|
||
const canonicalContext = `${(b.context || "").split(/[\.\n]/)[0]}`.slice(0, 80);
|
||
const seedRes = await api("POST", "/vectors/playbook_memory/seed", {
|
||
operation: b.operation,
|
||
approach: canonicalApproach,
|
||
context: canonicalContext,
|
||
endorsed_names: endorsed,
|
||
append: true,
|
||
}).catch(() => null) as any;
|
||
if (seedRes && seedRes.playbook_id) {
|
||
seeded = endorsed.length;
|
||
const pr = await api("POST", "/vectors/playbook_memory/persist_sql", {}).catch(() => null) as any;
|
||
if (pr && typeof pr.rows_persisted === "number") persisted_rows = pr.rows_persisted;
|
||
}
|
||
}
|
||
return ok({
|
||
logged: true,
|
||
seeded,
|
||
persisted_to_sql: persisted_rows,
|
||
rejected_ghost_names: rejected,
|
||
note: rejected.length
|
||
? `${rejected.length} endorsed name(s) not found in workers_500k for ${city}, ${state} — skipped seeding to prevent silent boost failure.`
|
||
: "successful_playbooks_live is the SQL surface for live operator activity. /log is non-destructive and name-validated.",
|
||
});
|
||
}
|
||
|
||
// Tool: log FAILED fill — negative signal for Phase 19 boost.
|
||
// Workers named here get a 0.5^n penalty on future positive
|
||
// boosts in the same (city, state). Three failures effectively
|
||
// zero the boost; five make the worker invisible to the re-rank.
|
||
// Names are validated against workers_500k same as /log.
|
||
if (url.pathname === "/log_failure") {
|
||
const b = await json();
|
||
const opMatch = String(b.operation || "").match(/ in ([^,]+),\s*([A-Za-z]+)/);
|
||
const city = opMatch ? opMatch[1].trim() : "";
|
||
const state = opMatch ? opMatch[2].trim() : "";
|
||
const rawNames: string[] = Array.isArray(b.failed_names) ? b.failed_names : [];
|
||
if (!city || !state) {
|
||
return err("operation must be 'fill: Role xN in City, ST'", 400);
|
||
}
|
||
if (rawNames.length === 0) return err("failed_names must be a non-empty array", 400);
|
||
|
||
const quoted = rawNames.map((n: string) => `'${n.replace(/'/g, "''")}'`).join(",");
|
||
const sql = `SELECT DISTINCT name FROM workers_500k `
|
||
+ `WHERE name IN (${quoted}) AND city = '${city.replace(/'/g,"''")}' `
|
||
+ `AND state = '${state.replace(/'/g,"''")}'`;
|
||
const vr = await api("POST", "/query/sql", { sql }).catch(() => ({ rows: [] as any[] })) as any;
|
||
const found = new Set((vr.rows ?? []).map((r: any) => r.name));
|
||
const failed_names = rawNames.filter((n: string) => found.has(n));
|
||
const rejected = rawNames.filter((n: string) => !found.has(n));
|
||
|
||
if (failed_names.length === 0) {
|
||
return ok({ marked: 0, rejected_ghost_names: rejected,
|
||
note: "no failed_names matched workers_500k for this geo" });
|
||
}
|
||
const mr = await api("POST", "/vectors/playbook_memory/mark_failed", {
|
||
operation: b.operation,
|
||
failed_names,
|
||
reason: b.reason || "",
|
||
});
|
||
return ok({
|
||
marked: mr?.added ?? 0,
|
||
rejected_ghost_names: rejected,
|
||
city, state,
|
||
note: `Each marked worker's positive boost in ${city}, ${state} is halved per recorded failure.`,
|
||
});
|
||
}
|
||
|
||
// Tool: get playbooks
|
||
if (url.pathname === "/playbooks") {
|
||
const kw = url.searchParams.get("keyword");
|
||
const limit = url.searchParams.get("limit") || "10";
|
||
let sql = `SELECT * FROM successful_playbooks ORDER BY timestamp DESC LIMIT ${limit}`;
|
||
if (kw) sql = `SELECT * FROM successful_playbooks WHERE operation LIKE '%${kw}%' OR approach LIKE '%${kw}%' ORDER BY timestamp DESC LIMIT ${limit}`;
|
||
const r = await api("POST", "/query/sql", { sql });
|
||
return ok(r.error ? { playbooks: [], note: "No playbooks yet" } : { playbooks: r.rows });
|
||
}
|
||
|
||
// Tool: swap profile
|
||
if (url.pathname.startsWith("/profile/")) {
|
||
const id = url.pathname.split("/")[2];
|
||
return ok(await api("POST", `/vectors/profile/${id}/activate`));
|
||
}
|
||
|
||
// Tool: VRAM
|
||
if (url.pathname === "/vram") return ok(await api("GET", "/ai/vram"));
|
||
|
||
// Pass-through to lakehouse for anything else
|
||
if (url.pathname.startsWith("/api/")) {
|
||
const path = url.pathname.replace("/api", "");
|
||
const body = req.method !== "GET" ? await req.text() : undefined;
|
||
const r = await fetch(`${BASE}${path}`, { method: req.method, headers: { "Content-Type": "application/json" }, body });
|
||
return new Response(await r.text(), { status: r.status, headers: { "Content-Type": "application/json" } });
|
||
}
|
||
|
||
// Proof — narrative HTML served from mcp-server/proof.html.
|
||
// Live tests consumed client-side via /proof.json.
|
||
if (url.pathname === "/proof") {
|
||
return new Response(Bun.file(import.meta.dir + "/proof.html"), {
|
||
headers: { ...cors, "Content-Type": "text/html" },
|
||
});
|
||
}
|
||
|
||
// Spec — technical specification / README-equivalent document.
|
||
// Long-form architecture doc: folder layout, ingest pipeline,
|
||
// scale story, error surfaces, per-staffer context, a day in
|
||
// the life. Intended for a skeptical reader who needs to
|
||
// dispute or reproduce what the system claims to do.
|
||
if (url.pathname === "/spec") {
|
||
return new Response(Bun.file(import.meta.dir + "/spec.html"), {
|
||
headers: { ...cors, "Content-Type": "text/html" },
|
||
});
|
||
}
|
||
|
||
// Onboard — client-facing ingest wizard. Upload any CSV, preview
|
||
// columns + PII + sample rows, commit via /ingest/file. Works
|
||
// with a shipped sample roster so anyone can trial the flow
|
||
// without real client data.
|
||
if (url.pathname === "/onboard") {
|
||
return new Response(Bun.file(import.meta.dir + "/onboard.html"), {
|
||
headers: { ...cors, "Content-Type": "text/html" },
|
||
});
|
||
}
|
||
|
||
// Workspaces — per-contract state (Phase 8.5). UI layer over the
|
||
// gateway's /workspaces/* routes: list, create, detail, handoff,
|
||
// save-search, shortlist, log-activity. All persisted on the
|
||
// Rust side; this page is a pure viewer + editor.
|
||
if (url.pathname === "/workspaces") {
|
||
return new Response(Bun.file(import.meta.dir + "/workspaces.html"), {
|
||
headers: { ...cors, "Content-Type": "text/html" },
|
||
});
|
||
}
|
||
|
||
// Alerts — push/daemon settings page + config API + test-fire.
|
||
if (url.pathname === "/alerts") {
|
||
return new Response(Bun.file(import.meta.dir + "/alerts.html"), {
|
||
headers: { ...cors, "Content-Type": "text/html" },
|
||
});
|
||
}
|
||
if (url.pathname === "/alerts/config") {
|
||
if (req.method === "GET") {
|
||
const cfg = await loadAlertsConfig();
|
||
const state = await loadAlertsState();
|
||
return ok({ config: cfg, state: { last_run_at: state.last_run_at } });
|
||
}
|
||
if (req.method === "POST") {
|
||
const b = await json();
|
||
const prev = await loadAlertsConfig();
|
||
const next: AlertsConfig = {
|
||
enabled: b.enabled ?? prev.enabled,
|
||
interval_minutes: Math.max(1, Number(b.interval_minutes ?? prev.interval_minutes)),
|
||
webhook_url: typeof b.webhook_url === "string" ? b.webhook_url.trim() || undefined : prev.webhook_url,
|
||
webhook_label: typeof b.webhook_label === "string" ? b.webhook_label : prev.webhook_label,
|
||
deadline_warn_days: Math.max(1, Number(b.deadline_warn_days ?? prev.deadline_warn_days)),
|
||
};
|
||
await saveAlertsConfig(next);
|
||
return ok({ saved: true, config: next,
|
||
note: "Interval change requires server restart to apply. Current running interval unchanged this cycle." });
|
||
}
|
||
}
|
||
if (url.pathname === "/alerts/fire" && req.method === "POST") {
|
||
const cfg = await loadAlertsConfig();
|
||
const d = await buildDigest();
|
||
if (!d) return ok({ fired: false, reason: "no events since last run" });
|
||
const res = await dispatchDigest(d, cfg);
|
||
return ok({ fired: true, channels: res.channels, errors: res.errors, digest: d });
|
||
}
|
||
if (url.pathname === "/alerts/recent" && req.method === "GET") {
|
||
const f = Bun.file(ALERTS_LOG_PATH);
|
||
if (!(await f.exists())) return ok({ entries: [] });
|
||
const text = await f.text();
|
||
const lines = text.split("\n").filter(l => l.trim());
|
||
const last = lines.slice(-10).reverse();
|
||
const entries: any[] = [];
|
||
for (const l of last) { try { entries.push(JSON.parse(l)); } catch {} }
|
||
return ok({ entries });
|
||
}
|
||
|
||
// Onboard ingest — forwards multipart/form-data correctly to
|
||
// the Rust gateway /ingest/file. The generic /api/* passthrough
|
||
// can't handle multipart because it reads as text and forwards
|
||
// as JSON, losing the boundary. This route preserves the body
|
||
// and Content-Type.
|
||
if (url.pathname === "/onboard/ingest" && req.method === "POST") {
|
||
const name = url.searchParams.get("name");
|
||
if (!name || !/^[a-z][a-z0-9_]*$/.test(name)) {
|
||
return err("dataset name required (lowercase+underscores)", 400);
|
||
}
|
||
const contentType = req.headers.get("content-type") || "";
|
||
const upstream = await fetch(`${BASE}/ingest/file?name=${encodeURIComponent(name)}`, {
|
||
method: "POST",
|
||
headers: { "Content-Type": contentType },
|
||
body: await req.arrayBuffer(),
|
||
});
|
||
const body = await upstream.text();
|
||
return new Response(body, {
|
||
status: upstream.status,
|
||
headers: { ...cors, "Content-Type": upstream.headers.get("content-type") || "application/json" },
|
||
});
|
||
}
|
||
|
||
// Sample CSV — generated fresh on every request so content-hash
|
||
// dedup on the ingest side always sees a new payload (two uploads
|
||
// in a row would otherwise be a no-op). Each generation has
|
||
// unique worker_ids (timestamp-prefixed), randomized names + roles
|
||
// + geos from realistic pools, and a random size (~120-180 rows)
|
||
// so the demo looks different every time and numbers actually
|
||
// update visibly in the dashboard after onboarding.
|
||
if (url.pathname.startsWith("/samples/")) {
|
||
const name = url.pathname.slice("/samples/".length);
|
||
if (!/^[a-zA-Z0-9_\-\.]+\.csv$/.test(name)) {
|
||
return err("invalid sample filename", 400);
|
||
}
|
||
if (name === "staffing_roster_sample.csv") {
|
||
const csv = generateSampleRosterCSV();
|
||
return new Response(csv, {
|
||
headers: {
|
||
...cors,
|
||
"Content-Type": "text/csv",
|
||
"Content-Disposition": `attachment; filename="${name}"`,
|
||
"Cache-Control": "no-store",
|
||
},
|
||
});
|
||
}
|
||
// Other sample filenames fall through to the static dir
|
||
const path = `${import.meta.dir}/samples/${name}`;
|
||
const file = Bun.file(path);
|
||
if (!(await file.exists())) return err("sample not found", 404);
|
||
return new Response(file, {
|
||
headers: { ...cors, "Content-Type": "text/csv",
|
||
"Content-Disposition": `attachment; filename="${name}"` },
|
||
});
|
||
}
|
||
|
||
// System-wide scale summary — truthful numbers for the UI.
|
||
// Pulls row counts via SQL (COUNT(*) from parquet footers) for
|
||
// the key datasets rather than trusting catalog manifests, which
|
||
// can go stale when data changes without re-registering. The
|
||
// workers_500k manifest is correct (500K); candidates manifest
|
||
// lied (said 100K, actual 1K) — the audit caught it.
|
||
// Everything else uses manifest row_count since it's O(1).
|
||
// Phase 24 refinement — unified memory query endpoint. Accepts
|
||
// any input (natural language, structured JSON, mixed) via
|
||
// POST body {input: <anything>}. Normalizer handles the shape.
|
||
// Returns a single bundle with every memory surface relevant:
|
||
// playbook workers, KB recommendation, neighbor signatures,
|
||
// prior lessons, top staffers, discovered patterns.
|
||
if (url.pathname === "/memory/query" && req.method === "POST") {
|
||
try {
|
||
const body: any = await req.json();
|
||
const { queryMemory } = await import("../tests/multi-agent/memory_query.ts");
|
||
const result = await queryMemory(body.input ?? body);
|
||
return ok(result);
|
||
} catch (e) {
|
||
return new Response(JSON.stringify({ error: (e as Error).message }), {
|
||
status: 500,
|
||
headers: { "content-type": "application/json" },
|
||
});
|
||
}
|
||
}
|
||
|
||
// Batch ticker quotes — used by the profiler page's scrolling
|
||
// ticker basket. Stooq's HTTP CSV API is single-symbol per call,
|
||
// so this fans out N tickers in parallel and returns a flat
|
||
// map. Non-US tickers (HOC.DE, SKA-B.ST, LLC.AX) won't have a
|
||
// Stooq.us entry; we surface that as null so the UI can render
|
||
// them with a "—" placeholder.
|
||
if (url.pathname === "/intelligence/ticker_quotes" && req.method === "POST") {
|
||
const start = Date.now();
|
||
const b = await json();
|
||
const tickers: string[] = Array.isArray(b.tickers) ? b.tickers.map((t: any) => String(t).toUpperCase()).filter(Boolean) : [];
|
||
if (!tickers.length) return ok({ quotes: {}, duration_ms: 0 });
|
||
const { fetchStooqQuote } = await import("./entity.js");
|
||
const dedup = Array.from(new Set(tickers)).slice(0, 50);
|
||
const results = await Promise.all(dedup.map(async (t) => {
|
||
// Skip non-US suffixes — Stooq.us won't have them
|
||
if (t.includes(".")) return [t, null] as const;
|
||
try {
|
||
const q = await fetchStooqQuote(t);
|
||
if (!q || !q.price) return [t, null] as const;
|
||
const change_pct = q.open && q.open > 0 ? ((q.price - q.open) / q.open) * 100 : null;
|
||
return [t, {
|
||
ticker: t,
|
||
price: q.price,
|
||
price_date: q.price_date,
|
||
open: q.open,
|
||
high: q.high,
|
||
low: q.low,
|
||
day_change_pct: change_pct,
|
||
stooq_url: `https://stooq.com/q/?s=${t.toLowerCase()}.us`,
|
||
}] as const;
|
||
} catch {
|
||
return [t, null] as const;
|
||
}
|
||
}));
|
||
const quotes: Record<string, any> = {};
|
||
for (const [t, q] of results) quotes[t] = q;
|
||
return ok({ quotes, count: dedup.length, duration_ms: Date.now() - start });
|
||
}
|
||
|
||
// Profiler index — directory of every contractor that has filed
|
||
// a Chicago permit recently, ranked by permit count + total
|
||
// cost. Each name in the response links to the full /contractor
|
||
// profile page. Answers J's ask: "a profiler index that shows
|
||
// a history of everyone." Pulled live from Socrata; the
|
||
// count/cost aggregations let the staffer see who's actually
|
||
// active vs one-off LLCs.
|
||
if (url.pathname === "/intelligence/profiler_index" && req.method === "POST") {
|
||
const start = Date.now();
|
||
const b = await json();
|
||
const sinceDate = String(b.since || "2025-06-01");
|
||
const minCost = Math.max(0, Number(b.min_cost) || 100000);
|
||
const limit = Math.max(1, Math.min(500, Number(b.limit) || 200));
|
||
const search = String(b.search || "").trim();
|
||
const permitUrl = "https://data.cityofchicago.org/resource/ydr8-5enu.json";
|
||
// Group by contact_1_name AND by contact_2_name in two
|
||
// queries, then merge — Socrata's GROUP BY only takes one
|
||
// expression and we want both contractor slots.
|
||
const buildQuery = (col: string) => {
|
||
const where = [
|
||
`reported_cost>${minCost}`,
|
||
`issue_date>'${sinceDate.replace(/'/g, "")}'`,
|
||
`${col} IS NOT NULL`,
|
||
];
|
||
if (search) {
|
||
const s = search.replace(/'/g, "''").toUpperCase();
|
||
where.push(`upper(${col}) like '%${s}%'`);
|
||
}
|
||
return `${permitUrl}?$select=${col} AS name,count(*) as cnt,sum(reported_cost) as total_cost,max(issue_date) as last_filed&`
|
||
+ `$where=${encodeURIComponent(where.join(" AND "))}`
|
||
+ `&$group=${col}&$order=total_cost DESC&$limit=${limit}`;
|
||
};
|
||
// Co-occurrence query — pulls the contact_1+contact_2 pairs in
|
||
// the window so we can attribute tickers across associations
|
||
// ("Bob's Electric works for Target → show TGT"). Capped 5K
|
||
// permits, ~200ms cold; resolved tickers are in-memory after.
|
||
const buildCoQuery = () => {
|
||
const where = [
|
||
`reported_cost>${minCost}`,
|
||
`issue_date>'${sinceDate.replace(/'/g, "")}'`,
|
||
"contact_1_name IS NOT NULL",
|
||
"contact_2_name IS NOT NULL",
|
||
];
|
||
if (search) {
|
||
const s = search.replace(/'/g, "''").toUpperCase();
|
||
where.push(`(upper(contact_1_name) like '%${s}%' OR upper(contact_2_name) like '%${s}%')`);
|
||
}
|
||
return `${permitUrl}?$select=contact_1_name,contact_2_name`
|
||
+ `&$where=${encodeURIComponent(where.join(" AND "))}`
|
||
+ `&$limit=5000`;
|
||
};
|
||
try {
|
||
const [byC1, byC2, coRows] = await Promise.all([
|
||
fetch(buildQuery("contact_1_name")).then((r) => r.json()).catch(() => []),
|
||
fetch(buildQuery("contact_2_name")).then((r) => r.json()).catch(() => []),
|
||
fetch(buildCoQuery()).then((r) => r.json()).catch(() => []),
|
||
]);
|
||
const merged: Record<string, { name: string; permits: number; total_cost: number; last_filed: string; roles: Set<string> }> = {};
|
||
const consume = (rows: any[], role: string) => {
|
||
for (const r of rows || []) {
|
||
const n = (r.name || "").trim();
|
||
if (!n) continue;
|
||
const cnt = parseInt(r.cnt, 10) || 0;
|
||
const cost = parseFloat(r.total_cost || "0") || 0;
|
||
const last = r.last_filed || "";
|
||
const key = n.toUpperCase();
|
||
if (!merged[key]) merged[key] = { name: n, permits: 0, total_cost: 0, last_filed: "", roles: new Set() };
|
||
merged[key].permits += cnt;
|
||
merged[key].total_cost += cost;
|
||
if (last > merged[key].last_filed) merged[key].last_filed = last;
|
||
merged[key].roles.add(role);
|
||
}
|
||
};
|
||
consume(byC1, "applicant");
|
||
consume(byC2, "contractor");
|
||
|
||
// Build co-occurrence map from the permit pairs. For each
|
||
// contractor key, count how many permits they co-appeared
|
||
// on with each other party.
|
||
const coMap: Record<string, Record<string, number>> = {};
|
||
for (const r of (Array.isArray(coRows) ? coRows : []) as any[]) {
|
||
const a = String(r.contact_1_name || "").trim();
|
||
const b = String(r.contact_2_name || "").trim();
|
||
if (!a || !b) continue;
|
||
const ka = a.toUpperCase();
|
||
const kb = b.toUpperCase();
|
||
if (ka === kb) continue;
|
||
(coMap[ka] = coMap[ka] || {})[kb] = (coMap[ka][kb] || 0) + 1;
|
||
(coMap[kb] = coMap[kb] || {})[ka] = (coMap[kb][ka] || 0) + 1;
|
||
}
|
||
|
||
// Attach tickers per contractor — direct, parent, and any
|
||
// tickers attributable to top co-occurring partners ("works
|
||
// with TARGET CORPORATION → TGT shown as associated"). Resolves
|
||
// via the in-memory SEC tickers index + curated parent map,
|
||
// so the cost is per-name index-lookup, not a network call.
|
||
const { lookupTickerLite } = await import("./entity.js");
|
||
// Memoize per-name to skip duplicate lookups across the
|
||
// associated step.
|
||
const tickerCache: Record<string, any[]> = {};
|
||
const lookupCached = async (n: string) => {
|
||
const k = n.toUpperCase();
|
||
if (tickerCache[k]) return tickerCache[k];
|
||
tickerCache[k] = await lookupTickerLite(n);
|
||
return tickerCache[k];
|
||
};
|
||
|
||
const rowsBase = Object.values(merged)
|
||
.map((r) => ({ ...r, roles: Array.from(r.roles) }))
|
||
.sort((a, b) => b.total_cost - a.total_cost)
|
||
.slice(0, limit);
|
||
|
||
// Resolve tickers concurrently (in-memory ops, but Promise.all
|
||
// keeps the code uniform with future remote ticker sources).
|
||
const enriched = await Promise.all(rowsBase.map(async (r) => {
|
||
const direct = await lookupCached(r.name);
|
||
const partners = Object.entries(coMap[r.name.toUpperCase()] || {})
|
||
.sort((a, b) => b[1] - a[1])
|
||
.slice(0, 6);
|
||
const associated: any[] = [];
|
||
const seen = new Set(direct.map((t: any) => t.ticker));
|
||
for (const [partnerName, occurrences] of partners) {
|
||
const ts = await lookupCached(partnerName);
|
||
for (const t of ts) {
|
||
if (seen.has(t.ticker)) continue;
|
||
seen.add(t.ticker);
|
||
associated.push({
|
||
ticker: t.ticker,
|
||
via: "associated",
|
||
partner_name: partnerName,
|
||
co_permits: occurrences,
|
||
partner_via: t.via,
|
||
matched_name: t.matched_name,
|
||
});
|
||
if (associated.length >= 5) break;
|
||
}
|
||
if (associated.length >= 5) break;
|
||
}
|
||
return { ...r, tickers: { direct, associated } };
|
||
}));
|
||
|
||
return ok({
|
||
count: enriched.length,
|
||
since: sinceDate,
|
||
min_cost: minCost,
|
||
search,
|
||
contractors: enriched,
|
||
duration_ms: Date.now() - start,
|
||
});
|
||
} catch (e: any) {
|
||
return err(`profiler_index: ${e.message}`, 500);
|
||
}
|
||
}
|
||
|
||
// Staffer roster — read by the UI dropdown so each coordinator
|
||
// can act under their own identity (per-staffer hot-swap index).
|
||
if (url.pathname === "/api/staffers" || url.pathname === "/staffers") {
|
||
return ok({
|
||
staffers: STAFFERS.map((s) => ({
|
||
id: s.id,
|
||
name: s.name,
|
||
display: s.display,
|
||
territory: s.territory,
|
||
greeting: s.greeting,
|
||
})),
|
||
});
|
||
}
|
||
|
||
if (url.pathname === "/system/summary") {
|
||
const [ds, indexes, workersCount, candsCount] = await Promise.all([
|
||
api("GET", "/catalog/datasets").catch(() => [] as any),
|
||
api("GET", "/vectors/indexes").catch(() => [] as any),
|
||
api("POST", "/query/sql", { sql: "SELECT COUNT(*) AS c FROM workers_500k" })
|
||
.catch(() => null as any),
|
||
api("POST", "/query/sql", { sql: "SELECT COUNT(*) AS c FROM candidates" })
|
||
.catch(() => null as any),
|
||
]);
|
||
const datasets = Array.isArray(ds) ? ds : [];
|
||
const idxs = Array.isArray(indexes) ? indexes : [];
|
||
const workers = Number(workersCount?.rows?.[0]?.c ?? 0);
|
||
const candidates = Number(candsCount?.rows?.[0]?.c ?? 0);
|
||
// Sum manifest row_counts EXCLUDING workers_500k + candidates,
|
||
// then add the truthful SQL counts. This gives a total that
|
||
// reflects live state for the two most-quoted tables.
|
||
const otherManifest = datasets
|
||
.filter((d: any) => d?.name !== "workers_500k" && d?.name !== "candidates")
|
||
.reduce((s: number, d: any) => s + (d?.row_count || 0), 0);
|
||
const totalRows = otherManifest + workers + candidates;
|
||
const totalChunks = idxs.reduce((s: number, i: any) => s + (i?.chunk_count || 0), 0);
|
||
// Manifest drift audit — surface any cases where manifest
|
||
// disagrees with SQL for the two spot-checked tables so the UI
|
||
// can note it if ever meaningful.
|
||
const drift: any[] = [];
|
||
const workersManifest = datasets.find((d: any) => d?.name === "workers_500k")?.row_count;
|
||
const candidatesManifest = datasets.find((d: any) => d?.name === "candidates")?.row_count;
|
||
if (workersManifest !== undefined && workersManifest !== workers) {
|
||
drift.push({ dataset: "workers_500k", manifest: workersManifest, actual: workers });
|
||
}
|
||
if (candidatesManifest !== undefined && candidatesManifest !== candidates) {
|
||
drift.push({ dataset: "candidates", manifest: candidatesManifest, actual: candidates });
|
||
}
|
||
return ok({
|
||
datasets: datasets.length,
|
||
total_rows: totalRows,
|
||
total_chunks: totalChunks,
|
||
workers_500k_rows: workers,
|
||
candidates_rows: candidates,
|
||
indexes: idxs.length,
|
||
manifest_drift: drift,
|
||
});
|
||
}
|
||
|
||
// Model matrix — read config/models.json and expose read-only.
|
||
// Strips internal notes that could drift; the source of truth is
|
||
// the file itself. UI can render tiers, rate budgets, and the
|
||
// experimental rotation list from this endpoint.
|
||
if (url.pathname === "/models/matrix") {
|
||
try {
|
||
const raw = await Bun.file("../config/models.json").text();
|
||
return ok(JSON.parse(raw));
|
||
} catch (e) {
|
||
return new Response(JSON.stringify({ error: `models.json not found: ${(e as Error).message}` }), {
|
||
status: 404,
|
||
headers: { "content-type": "application/json" },
|
||
});
|
||
}
|
||
}
|
||
|
||
// Proof JSON API (same data, no HTML)
|
||
if (url.pathname === "/proof.json") {
|
||
const ds = await api("GET", "/catalog/datasets") as any[];
|
||
const indexes = await api("GET", "/vectors/indexes") as any[];
|
||
const vram = await api("GET", "/ai/vram");
|
||
const totalRows = (ds || []).reduce((s: number, d: any) => s + (d.row_count || 0), 0);
|
||
const totalChunks = (indexes || []).reduce((s: number, i: any) => s + i.chunk_count, 0);
|
||
|
||
// Run live SQL tests
|
||
const tests: any[] = [];
|
||
const sqls = [
|
||
["COUNT 500K workers", "SELECT COUNT(*) FROM workers_500k"],
|
||
["COUNT 1M timesheets", "SELECT COUNT(*) FROM timesheets"],
|
||
["Filter+aggregate 500K", "SELECT role, COUNT(*) cnt FROM workers_500k WHERE state='IL' AND CAST(reliability AS DOUBLE)>0.8 GROUP BY role ORDER BY cnt DESC LIMIT 3"],
|
||
["Cross-table JOIN", "SELECT COUNT(*) FROM candidates c JOIN (SELECT candidate_id, COUNT(*) calls FROM call_log GROUP BY candidate_id HAVING COUNT(*)>=5) cl ON c.candidate_id=cl.candidate_id WHERE c.city='Chicago'"],
|
||
];
|
||
for (const [name, sql] of sqls) {
|
||
const t0 = Date.now();
|
||
const r = await api("POST", "/query/sql", { sql });
|
||
const ms = Date.now() - t0;
|
||
tests.push({ name, ms, result: r.rows?.[0] || r.error, pass: !r.error });
|
||
}
|
||
|
||
// Hybrid test
|
||
const ht0 = Date.now();
|
||
const hybrid = await api("POST", "/vectors/hybrid", {
|
||
question: "reliable forklift operator", index_name: "workers_500k_v1",
|
||
sql_filter: "role = 'Forklift Operator' AND state = 'IL' AND CAST(reliability AS DOUBLE) > 0.8",
|
||
filter_dataset: "workers_500k", id_column: "worker_id", top_k: 5, generate: false,
|
||
use_playbook_memory: true,
|
||
});
|
||
tests.push({
|
||
name: "Hybrid SQL+Vector", ms: Date.now() - ht0,
|
||
result: `sql=${hybrid.sql_matches} → ${hybrid.vector_reranked} verified results`,
|
||
pass: (hybrid.vector_reranked || 0) > 0,
|
||
sources: hybrid.sources?.slice(0, 3),
|
||
});
|
||
|
||
return ok({
|
||
title: "Lakehouse Proof of Work",
|
||
generated: new Date().toISOString(),
|
||
server: "192.168.1.177 (i9 + 128GB RAM + A4000 16GB)",
|
||
scale: { datasets: ds?.length, total_rows: totalRows, indexes: indexes?.length, total_chunks: totalChunks },
|
||
gpu: vram?.gpu,
|
||
tests,
|
||
recall: { hnsw: 0.98, lance: 0.94, note: "Measured on 50K real nomic-embed-text embeddings, 30 queries" },
|
||
lance_10m: { vectors: 10_000_000, disk_gb: 32.9, search_p50_ms: 5, note: "Past HNSW RAM ceiling" },
|
||
verify: "SSH into server, run: curl http://localhost:3100/health — or open http://192.168.1.177:3700/proof",
|
||
});
|
||
}
|
||
|
||
// Dashboard — calls lakehouse /vectors/hybrid directly (no gateway hop)
|
||
if (url.pathname === "/" || url.pathname === "/dashboard") {
|
||
return new Response(Bun.file(import.meta.dir + "/search.html"), {
|
||
headers: { ...cors, "Content-Type": "text/html" },
|
||
});
|
||
}
|
||
if (url.pathname === "/dashboard.css") {
|
||
return new Response(Bun.file(import.meta.dir + "/dashboard.css"), { headers: { "Content-Type": "text/css" } });
|
||
}
|
||
if (url.pathname === "/dashboard.ts" || url.pathname === "/dashboard.js") {
|
||
// Bun transpiles TS on the fly
|
||
const built = await Bun.build({ entrypoints: [import.meta.dir + "/dashboard.ts"], target: "browser" });
|
||
const js = await built.outputs[0].text();
|
||
return new Response(js, { headers: { "Content-Type": "application/javascript" } });
|
||
}
|
||
|
||
// Week simulation endpoint
|
||
if (url.pathname === "/simulation/run" && req.method === "POST") {
|
||
return ok(await runWeekSimulation());
|
||
}
|
||
|
||
// ─── Staffing Intelligence Console ───
|
||
if (url.pathname === "/console") {
|
||
return new Response(Bun.file(import.meta.dir + "/console.html"));
|
||
}
|
||
|
||
// ─── Contractor / entity drill-down page ───
|
||
// Single-contractor portfolio view across every wired source:
|
||
// OSHA national, Chicago history, ticker chart, parent link,
|
||
// federal contracts, debarment, unions, training. Click any
|
||
// contractor name in a permit Entity Brief to land here.
|
||
// ComfyUI-generated portrait — every call is unique by (key,
|
||
// gender, race, age, role) tuple. First hit takes ~1.5s on
|
||
// the A4000; subsequent hits read from disk. Use this for
|
||
// contractor / profile modal where one worker gets the
|
||
// spotlight. NB: declared BEFORE the pool route so the prefix
|
||
// match doesn't intercept it.
|
||
if (url.pathname.startsWith("/headshots/generate/") && req.method === "GET") {
|
||
const key = decodeURIComponent(url.pathname.slice("/headshots/generate/".length));
|
||
if (!key) return new Response("missing key", { status: 400 });
|
||
const g = (url.searchParams.get("g") || "person").toLowerCase();
|
||
const r = (url.searchParams.get("e") || "").toLowerCase();
|
||
const role = (url.searchParams.get("role") || "warehouse worker").toLowerCase();
|
||
const age = parseInt(url.searchParams.get("age") || "32", 10) || 32;
|
||
const cacheKey = await crypto.subtle.digest(
|
||
"SHA-256",
|
||
new TextEncoder().encode(`${key}|${g}|${r}|${role}|${age}`)
|
||
).then((b) => Array.from(new Uint8Array(b)).map((x) => x.toString(16).padStart(2, "0")).join("").slice(0, 24));
|
||
const GEN_DIR = "/home/profit/lakehouse/data/headshots_gen";
|
||
await Bun.$`mkdir -p ${GEN_DIR}`.quiet();
|
||
const cachePath = `${GEN_DIR}/${cacheKey}.webp`;
|
||
const cached = Bun.file(cachePath);
|
||
if (await cached.exists()) {
|
||
return new Response(cached, {
|
||
headers: {
|
||
"Content-Type": "image/webp",
|
||
"Cache-Control": "public, max-age=86400, immutable",
|
||
"X-Headshot-Source": "comfyui-cached",
|
||
},
|
||
});
|
||
}
|
||
const raceText = r === "hispanic" ? "Hispanic"
|
||
: r === "black" ? "Black"
|
||
: r === "south_asian" ? "South Asian"
|
||
: r === "east_asian" ? "East Asian"
|
||
: r === "middle_eastern" ? "Middle Eastern"
|
||
: "";
|
||
const genderText = g === "woman" ? "woman" : g === "man" ? "man" : "person";
|
||
const prompt = `professional corporate headshot portrait of a ${age}-year-old ${raceText} ${genderText}, ${role}, neutral expression, plain studio background, soft natural lighting, sharp focus, photorealistic, dslr`;
|
||
// Worker-derived seed — same input always picks the same
|
||
// pixel layout in StyleGAN2 latent space, so the face is
|
||
// deterministic per worker BUT distinct from any other
|
||
// worker that happens to share the same prompt. Without
|
||
// this, every (g, r, age, role) combo collapses to one face.
|
||
let seedHash = 0;
|
||
for (let i = 0; i < key.length; i++) seedHash = ((seedHash << 5) - seedHash + key.charCodeAt(i)) | 0;
|
||
const seed = Math.abs(seedHash) % 2147483647;
|
||
try {
|
||
const genResp = await fetch("http://localhost:3600/generate", {
|
||
method: "POST",
|
||
headers: { "Content-Type": "application/json" },
|
||
body: JSON.stringify({ prompt, width: 512, height: 512, steps: 8, seed }),
|
||
signal: AbortSignal.timeout(30000),
|
||
});
|
||
if (!genResp.ok) return new Response(`gen failed: ${genResp.status}`, { status: 502 });
|
||
const data: any = await genResp.json();
|
||
if (!data.image) return new Response("no image returned", { status: 502 });
|
||
const bytes = Uint8Array.from(atob(data.image), (c) => c.charCodeAt(0));
|
||
await Bun.write(cachePath, bytes);
|
||
return new Response(bytes, {
|
||
headers: {
|
||
"Content-Type": "image/webp",
|
||
"Cache-Control": "public, max-age=86400, immutable",
|
||
"X-Headshot-Source": "comfyui-fresh",
|
||
"X-Headshot-Gen-Ms": String(data.time_ms || 0),
|
||
},
|
||
});
|
||
} catch (e: any) {
|
||
return new Response(`gen error: ${e.message}`, { status: 502 });
|
||
}
|
||
}
|
||
|
||
// Headshot pool — synthetic StyleGAN faces from
|
||
// thispersondoesnotexist.com fetched offline by
|
||
// scripts/staffing/fetch_face_pool.py. Deterministic mapping:
|
||
// hash(worker key) → pool index → image bytes. Same key always
|
||
// gets the same face; different keys spread evenly.
|
||
//
|
||
// Optional gender hint: ?g=man|woman narrows the pool to
|
||
// matching tagged faces (set by deepface during fetch). Falls
|
||
// back to whole pool if no matches.
|
||
if (url.pathname.startsWith("/headshots/") && req.method === "GET") {
|
||
const key = decodeURIComponent(url.pathname.slice("/headshots/".length));
|
||
const wantGender = url.searchParams.get("g") || "";
|
||
if (!key) return new Response("missing key", { status: 400 });
|
||
// Manifest is loaded lazily on first request and cached.
|
||
// Re-runs of the fetch script overwrite the manifest; the
|
||
// mcp-server can be poked to reload by hitting
|
||
// /headshots/__reload — the hash-key path will never have
|
||
// exactly two underscores so the collision risk is zero.
|
||
const HEADSHOT_DIR = "/home/profit/lakehouse/data/headshots";
|
||
if (key === "__reload" || !(globalThis as any)._faces) {
|
||
try {
|
||
const raw = await Bun.file(`${HEADSHOT_DIR}/manifest.jsonl`).text();
|
||
const lines = raw.trim().split("\n").filter(Boolean);
|
||
const all = lines.map((l) => JSON.parse(l));
|
||
// Build (gender × race) buckets so a request that names
|
||
// both narrows to the intersection. Missing intersections
|
||
// fall back to gender-only, then race-only, then all.
|
||
const byGR: Record<string, any[]> = {};
|
||
const byG: Record<string, any[]> = { man: [], woman: [] };
|
||
const byR: Record<string, any[]> = {};
|
||
// Filter excluded faces (e.g. minors) from every bucket
|
||
// and from the all-pool. They never get served.
|
||
const adults = all.filter((r: any) => !r.excluded);
|
||
for (const r of adults) {
|
||
if (r.gender === "man" || r.gender === "woman") byG[r.gender].push(r);
|
||
if (r.race) {
|
||
byR[r.race] = byR[r.race] || [];
|
||
byR[r.race].push(r);
|
||
if (r.gender === "man" || r.gender === "woman") {
|
||
const k = r.gender + "/" + r.race;
|
||
byGR[k] = byGR[k] || [];
|
||
byGR[k].push(r);
|
||
}
|
||
}
|
||
}
|
||
(globalThis as any)._faces = {
|
||
all: adults,
|
||
byG, byR, byGR,
|
||
untagged: adults.filter((r: any) => !r.gender || (r.gender !== "man" && r.gender !== "woman")),
|
||
excluded_count: all.length - adults.length,
|
||
loaded_at: Date.now(),
|
||
};
|
||
if (key === "__reload") {
|
||
const byRSummary: Record<string, number> = {};
|
||
for (const k of Object.keys(byR)) byRSummary[k] = byR[k].length;
|
||
const byGRSummary: Record<string, number> = {};
|
||
for (const k of Object.keys(byGR)) byGRSummary[k] = byGR[k].length;
|
||
return Response.json({
|
||
reloaded: true,
|
||
total: all.length,
|
||
excluded: all.length - adults.length,
|
||
served_pool: adults.length,
|
||
by_gender: { man: byG.man.length, woman: byG.woman.length },
|
||
by_race: byRSummary,
|
||
by_gender_race: byGRSummary,
|
||
untagged: (globalThis as any)._faces.untagged.length,
|
||
});
|
||
}
|
||
} catch (e: any) {
|
||
return new Response(`face pool not available: ${e.message}. Run scripts/staffing/fetch_face_pool.py first.`, { status: 503 });
|
||
}
|
||
}
|
||
const F = (globalThis as any)._faces as {
|
||
all: any[];
|
||
byG: Record<string, any[]>;
|
||
byR: Record<string, any[]>;
|
||
byGR: Record<string, any[]>;
|
||
untagged: any[];
|
||
};
|
||
if (!F || !F.all.length) {
|
||
return new Response("face pool empty", { status: 503 });
|
||
}
|
||
// Pool selection: try gender×race intersection first, then
|
||
// gender-only, then race-only, then full pool. Always returns
|
||
// a face so the worker card never falls back to the monogram.
|
||
const wantRace = url.searchParams.get("e") || "";
|
||
let pool = F.all;
|
||
if (wantGender && wantRace && F.byGR[wantGender + "/" + wantRace]?.length) {
|
||
pool = F.byGR[wantGender + "/" + wantRace];
|
||
} else if (wantGender && F.byG[wantGender]?.length) {
|
||
pool = F.byG[wantGender];
|
||
} else if (wantRace && F.byR[wantRace]?.length) {
|
||
pool = F.byR[wantRace];
|
||
}
|
||
// Hash key → pool index. djb2-ish, fits any string.
|
||
let h = 5381;
|
||
for (let i = 0; i < key.length; i++) h = ((h << 5) + h + key.charCodeAt(i)) | 0;
|
||
const idx = Math.abs(h) % pool.length;
|
||
const pick = pool[idx];
|
||
// Prefer pre-resized webp thumb (~10KB) over native JPEG
|
||
// (~580KB). 60× smaller — without this, a 40-card grid
|
||
// overruns Chrome's parallel-connection budget and ~75% of
|
||
// tiles never finish decoding.
|
||
const thumbName = pick.file.replace(/\.jpg$/, ".webp");
|
||
const thumb = Bun.file(`${HEADSHOT_DIR}/_thumbs/${thumbName}`);
|
||
if (await thumb.exists()) {
|
||
return new Response(thumb, {
|
||
headers: {
|
||
"Content-Type": "image/webp",
|
||
"Cache-Control": "public, max-age=86400, immutable",
|
||
"X-Face-Pool-Idx": String(pick.id),
|
||
"X-Face-Pool-Gender": pick.gender || "untagged",
|
||
"X-Face-Pool-Variant": "thumb-384",
|
||
},
|
||
});
|
||
}
|
||
const file = Bun.file(`${HEADSHOT_DIR}/${pick.file}`);
|
||
if (!(await file.exists())) {
|
||
return new Response("face missing on disk", { status: 404 });
|
||
}
|
||
return new Response(file, {
|
||
headers: {
|
||
"Content-Type": "image/jpeg",
|
||
"Cache-Control": "public, max-age=86400, immutable",
|
||
"X-Face-Pool-Idx": String(pick.id),
|
||
"X-Face-Pool-Gender": pick.gender || "untagged",
|
||
"X-Face-Pool-Variant": "native-1024",
|
||
},
|
||
});
|
||
}
|
||
|
||
// Profiler index — directory page of everyone who's filed a
|
||
// Chicago permit (clickable directory of contractors).
|
||
if (url.pathname === "/profiler" || url.pathname === "/contractors") {
|
||
return new Response(Bun.file(import.meta.dir + "/profiler.html"), {
|
||
headers: { "Content-Type": "text/html; charset=utf-8" },
|
||
});
|
||
}
|
||
|
||
if (url.pathname === "/contractor") {
|
||
return new Response(Bun.file(import.meta.dir + "/contractor.html"), {
|
||
headers: { ...cors, "Content-Type": "text/html" },
|
||
});
|
||
}
|
||
if (url.pathname === "/intelligence/contractor_profile" && req.method === "POST") {
|
||
const start = Date.now();
|
||
try {
|
||
const b = (await req.json().catch(() => ({}))) as { name?: string };
|
||
if (!b.name) return err("missing name", 400);
|
||
// Use the entity-brief library directly — single entity, all sources.
|
||
const { fetchOshaBrief, fetchTickerBrief, fetchContractorHistory, fetchParentLink, fetchFederalContracts, fetchDebarmentBrief, fetchNlrbBriefReal, fetchIlsosBrief, fetchNewsMentions, fetchDiversityCerts, scoreNewsSentiment, fetchBlsConstructionTrend, normalizeEntityName, entityTicker } = await import("./entity.js");
|
||
const [osha, stock, history, parent_link, federal, debarment, nlrb, ilsos, news, diversity, macro] = await Promise.all([
|
||
fetchOshaBrief(b.name),
|
||
fetchTickerBrief(b.name),
|
||
fetchContractorHistory(b.name),
|
||
fetchParentLink(b.name),
|
||
fetchFederalContracts(b.name),
|
||
fetchDebarmentBrief(b.name),
|
||
fetchNlrbBriefReal(b.name),
|
||
fetchIlsosBrief(b.name),
|
||
fetchNewsMentions(b.name),
|
||
fetchDiversityCerts(b.name),
|
||
fetchBlsConstructionTrend(),
|
||
]);
|
||
const news_sentiment = news ? scoreNewsSentiment(news) : null;
|
||
return ok({
|
||
key: normalizeEntityName(b.name),
|
||
display_name: b.name,
|
||
ticker: entityTicker(b.name),
|
||
osha,
|
||
stock,
|
||
history,
|
||
parent_link,
|
||
federal,
|
||
debarment,
|
||
nlrb,
|
||
ilsos,
|
||
news,
|
||
news_sentiment,
|
||
diversity,
|
||
macro,
|
||
generated_at: new Date().toISOString(),
|
||
duration_ms: Date.now() - start,
|
||
});
|
||
} catch (e: any) {
|
||
return err(`contractor_profile: ${e.message}`, 500);
|
||
}
|
||
}
|
||
|
||
// Intelligence: Market data — public building permits → staffing demand forecast
|
||
if (url.pathname === "/intelligence/market" && req.method === "POST") {
|
||
const start = Date.now();
|
||
try {
|
||
// Fetch Chicago building permits (public Socrata API — real data)
|
||
const permitUrl = "https://data.cityofchicago.org/resource/ydr8-5enu.json";
|
||
const [bigR, byTypeR, recentR, benchR] = await Promise.all([
|
||
// Top 8 largest permits by cost
|
||
fetch(`${permitUrl}?$select=permit_type,work_type,work_description,reported_cost,street_number,street_direction,street_name,community_area,issue_date,latitude,longitude&$where=reported_cost>1000000 AND issue_date>'2025-06-01'&$order=reported_cost DESC&$limit=50`).then(r => r.json()),
|
||
// Permits grouped by work type
|
||
fetch(`${permitUrl}?$select=work_type,count(*) as cnt,sum(reported_cost) as total_cost&$where=reported_cost>10000 AND issue_date>'2025-06-01'&$group=work_type&$order=total_cost DESC&$limit=10`).then(r => r.json()),
|
||
// Most recent permits
|
||
fetch(`${permitUrl}?$select=work_type,work_description,reported_cost,street_name,issue_date&$where=reported_cost>50000&$order=issue_date DESC&$limit=5`).then(r => r.json()),
|
||
// Our worker bench in IL (cross-reference)
|
||
api("POST", "/query/sql", { sql: "SELECT role, COUNT(*) supply, SUM(CASE WHEN CAST(reliability AS DOUBLE)>0.8 THEN 1 ELSE 0 END) reliable, SUM(CASE WHEN CAST(availability AS DOUBLE)>0.5 THEN 1 ELSE 0 END) available FROM workers_500k WHERE state='IL' GROUP BY role ORDER BY supply DESC" }),
|
||
]);
|
||
|
||
// Map construction types to staffing roles
|
||
const typeToRoles: Record<string, string[]> = {
|
||
"Electrical Work": ["Electrician","Maintenance Tech"],
|
||
"Masonry Work": ["Production Worker","Loader","Material Handler"],
|
||
"Mechanical Work": ["Maintenance Tech","Machine Operator","Welder"],
|
||
"Reroofing": ["Production Worker","Loader"],
|
||
"Plumbing Work": ["Maintenance Tech"],
|
||
"": ["Forklift Operator","Loader","Material Handler","Production Worker","Warehouse Associate"],
|
||
};
|
||
|
||
// Build demand forecast from permit types
|
||
const forecast: any[] = [];
|
||
for (const t of (byTypeR || [])) {
|
||
const wtype = t.work_type || "(general construction)";
|
||
const totalCost = parseFloat(t.total_cost || 0);
|
||
const cnt = parseInt(t.cnt || 0);
|
||
const estWorkers = Math.round(totalCost / 150000); // industry heuristic
|
||
const roles = typeToRoles[t.work_type || ""] || typeToRoles[""];
|
||
forecast.push({ work_type: wtype, permits: cnt, total_cost: totalCost, estimated_workers: estWorkers, needed_roles: roles });
|
||
}
|
||
|
||
// Cross-reference with our bench
|
||
const ilBench = (benchR.rows || []).reduce((m: any, r: any) => { m[r.role] = r; return m; }, {});
|
||
const gaps: any[] = [];
|
||
for (const f of forecast) {
|
||
for (const role of f.needed_roles) {
|
||
const b = ilBench[role];
|
||
if (b) {
|
||
const coverage = Math.round((b.available / Math.max(f.estimated_workers, 1)) * 100);
|
||
gaps.push({ role, demand: f.estimated_workers, supply: b.supply, available: b.available, reliable: b.reliable, coverage_pct: Math.min(coverage, 999), source: f.work_type });
|
||
}
|
||
}
|
||
}
|
||
|
||
return ok({
|
||
major_permits: (bigR || []).map((p: any) => ({
|
||
cost: parseFloat(p.reported_cost || 0),
|
||
description: (p.work_description || "").substring(0, 100),
|
||
address: `${p.street_number || ""} ${p.street_direction || ""} ${p.street_name || ""}`.trim(),
|
||
type: p.work_type || p.permit_type || "",
|
||
date: (p.issue_date || "").substring(0, 10),
|
||
lat: p.latitude, lng: p.longitude,
|
||
})),
|
||
by_type: forecast,
|
||
recent: (recentR || []).map((p: any) => ({
|
||
type: p.work_type || "", description: (p.work_description || "").substring(0, 80),
|
||
cost: parseFloat(p.reported_cost || 0), street: p.street_name || "", date: (p.issue_date || "").substring(0, 10),
|
||
})),
|
||
il_bench: benchR.rows || [],
|
||
gaps,
|
||
total_construction_value: forecast.reduce((s: number, f: any) => s + f.total_cost, 0),
|
||
total_estimated_workers: forecast.reduce((s: number, f: any) => s + f.estimated_workers, 0),
|
||
duration_ms: Date.now() - start,
|
||
});
|
||
} catch (e: any) {
|
||
return ok({ error: e.message, duration_ms: Date.now() - start });
|
||
}
|
||
}
|
||
|
||
// Predictive staffing forecast — aggregate demand inferred from
|
||
// recent Chicago permits, compared to our bench supply. Answers
|
||
// "what's coming in the next 30-60 days and can we cover it?"
|
||
// — the contextual-awareness dimension beyond retrospective rank.
|
||
if (url.pathname === "/intelligence/staffing_forecast" && req.method === "POST") {
|
||
const start = Date.now();
|
||
try {
|
||
const permitUrl = "https://data.cityofchicago.org/resource/ydr8-5enu.json";
|
||
// Last 30 days of permits — that's our forward demand window
|
||
const thirtyDaysAgo = new Date(Date.now() - 30 * 86400e3).toISOString().slice(0, 10);
|
||
const permits: any[] = await fetch(
|
||
`${permitUrl}?$select=work_type,reported_cost,issue_date`
|
||
+ `&$where=reported_cost>100000 AND issue_date>'${thirtyDaysAgo}'`
|
||
+ `&$limit=200`
|
||
).then(r => r.json()).catch(() => []);
|
||
|
||
// Construction heuristic: permit filing → construction start
|
||
// averages ~45 days. Staffing window opens 14 days before.
|
||
const typeToRole: Record<string, string> = {
|
||
"Electrical Work": "Electrician",
|
||
"Masonry Work": "Production Worker",
|
||
"Mechanical Work": "Maintenance Tech",
|
||
"Reroofing": "Production Worker",
|
||
"Plumbing Work": "Maintenance Tech",
|
||
};
|
||
|
||
// Aggregate demand by role
|
||
const demandByRole: Record<string, { permits: number; total_cost: number; est_workers: number; earliest_need: string }> = {};
|
||
for (const p of permits) {
|
||
const role = typeToRole[p.work_type || ""] || "Production Worker";
|
||
const cost = parseFloat(p.reported_cost || 0);
|
||
const workers = Math.max(2, Math.min(Math.round(cost / 150000), 8));
|
||
const issueDate = new Date(p.issue_date);
|
||
const stagingDate = new Date(issueDate.getTime() + 31 * 86400e3); // 45d - 14d window
|
||
if (!demandByRole[role]) {
|
||
demandByRole[role] = { permits: 0, total_cost: 0, est_workers: 0,
|
||
earliest_need: stagingDate.toISOString().slice(0, 10) };
|
||
}
|
||
demandByRole[role].permits += 1;
|
||
demandByRole[role].total_cost += cost;
|
||
demandByRole[role].est_workers += workers;
|
||
const cur = new Date(demandByRole[role].earliest_need);
|
||
if (stagingDate < cur) demandByRole[role].earliest_need = stagingDate.toISOString().slice(0, 10);
|
||
}
|
||
|
||
// Bench supply in IL
|
||
const benchR = await api("POST", "/query/sql", {
|
||
sql: `SELECT role, COUNT(*) as total, `
|
||
+ `SUM(CASE WHEN CAST(availability AS DOUBLE) > 0.5 THEN 1 ELSE 0 END) as available, `
|
||
+ `SUM(CASE WHEN CAST(reliability AS DOUBLE) > 0.8 THEN 1 ELSE 0 END) as reliable `
|
||
+ `FROM workers_500k WHERE state = 'IL' `
|
||
+ `GROUP BY role`,
|
||
});
|
||
const bench: Record<string, any> = {};
|
||
for (const r of (benchR.rows || [])) bench[r.role] = r;
|
||
|
||
// Past playbook fill-speed + success signal per role
|
||
const playbookR = await api("POST", "/query/sql", {
|
||
sql: `SELECT operation, COUNT(*) as fills `
|
||
+ `FROM successful_playbooks_live `
|
||
+ `WHERE operation LIKE '%Chicago, IL%' `
|
||
+ `GROUP BY operation ORDER BY fills DESC LIMIT 20`,
|
||
});
|
||
const recentChicagoOps = playbookR.rows || [];
|
||
|
||
// Build forecast entries with risk flag
|
||
const forecast: any[] = [];
|
||
for (const [role, d] of Object.entries(demandByRole)) {
|
||
const b = bench[role] || { total: 0, available: 0, reliable: 0 };
|
||
const coverage = d.est_workers > 0 ? Math.round((b.available / d.est_workers) * 100) : 999;
|
||
const reliable_coverage = d.est_workers > 0 ? Math.round((b.reliable / d.est_workers) * 100) : 999;
|
||
let risk = "ok";
|
||
if (coverage < 100) risk = "critical";
|
||
else if (coverage < 300) risk = "tight";
|
||
else if (reliable_coverage < 200) risk = "watch";
|
||
// Days until earliest staffing deadline
|
||
const days_to_deadline = Math.round((new Date(d.earliest_need).getTime() - Date.now()) / 86400e3);
|
||
forecast.push({
|
||
role,
|
||
demand_permits: d.permits,
|
||
demand_workers: d.est_workers,
|
||
demand_total_cost: d.total_cost,
|
||
earliest_staffing_deadline: d.earliest_need,
|
||
days_to_deadline,
|
||
bench_total: b.total,
|
||
bench_available: b.available,
|
||
bench_reliable: b.reliable,
|
||
coverage_pct: Math.min(coverage, 9999),
|
||
reliable_coverage_pct: Math.min(reliable_coverage, 9999),
|
||
risk,
|
||
});
|
||
}
|
||
forecast.sort((a, b) => {
|
||
const order: Record<string, number> = { critical: 0, tight: 1, watch: 2, ok: 3 };
|
||
if (order[a.risk] !== order[b.risk]) return order[a.risk] - order[b.risk];
|
||
return a.days_to_deadline - b.days_to_deadline;
|
||
});
|
||
|
||
return ok({
|
||
generated_at: new Date().toISOString(),
|
||
window_days: 30,
|
||
permit_count: permits.length,
|
||
total_cost: permits.reduce((s, p) => s + parseFloat(p.reported_cost || 0), 0),
|
||
total_estimated_workers: forecast.reduce((s, f) => s + f.demand_workers, 0),
|
||
critical_roles: forecast.filter(f => f.risk === "critical").length,
|
||
tight_roles: forecast.filter(f => f.risk === "tight").length,
|
||
forecast,
|
||
recent_chicago_operations: recentChicagoOps,
|
||
duration_ms: Date.now() - start,
|
||
note: "Demand inferred from Chicago permit filings last 30 days. Construction starts ~45d after permit. Staffing window opens ~14d before construction. Supply = IL bench in workers_500k.",
|
||
});
|
||
} catch (e: any) {
|
||
return err(`staffing_forecast: ${e.message}`, 500);
|
||
}
|
||
}
|
||
|
||
// Intelligence: Chicago permits → assumed staffing contracts with
|
||
// Phase 19-ranked candidates and Path-2 discovered patterns. Each
|
||
// card pairs a REAL permit (live from data.cityofchicago.org) with
|
||
// a PROPOSED fill drawn from our 500K worker bench. Surfaces the
|
||
// meta-index dimension directly: "what past similar fills had in
|
||
// common" for this role + geo.
|
||
// Architecture signals — the "our substrate is better than the
|
||
// alternatives" proof surface. Pulls live health numbers so the
|
||
// dashboard can show, per-card or in a top bar, that the claims
|
||
// we make in the PRD (instant searches, self-regulation,
|
||
// hot-swap, indexed-at-ingest) are verifiable right now.
|
||
if (url.pathname === "/intelligence/arch_signals" && (req.method === "GET" || req.method === "POST")) {
|
||
try {
|
||
const t0 = Date.now();
|
||
// Index freshness + shape (hot-swap + clever-index claims)
|
||
const idxRaw = await fetch("http://localhost:3100/vectors/indexes/workers_500k_v1", {
|
||
signal: AbortSignal.timeout(3000),
|
||
}).then(r => r.ok ? r.json() : null).catch(() => null);
|
||
|
||
// Playbook memory — "self-regulates via learned playbooks"
|
||
const pbmRaw = await fetch("http://localhost:3100/vectors/playbook_memory/stats", {
|
||
signal: AbortSignal.timeout(3000),
|
||
}).then(r => r.ok ? r.json() : null).catch(() => null);
|
||
|
||
// Pathway memory — ADR-021 compounding-bug-grammar surface
|
||
const pwmRaw = await fetch("http://localhost:3100/vectors/pathway/stats", {
|
||
signal: AbortSignal.timeout(3000),
|
||
}).then(r => r.ok ? r.json() : null).catch(() => null);
|
||
|
||
// Live instant-search probe — one trivial hybrid call so the
|
||
// latency number on screen is fresh, not cached.
|
||
const probeT0 = Date.now();
|
||
await api("POST", "/vectors/hybrid", {
|
||
index_name: "workers_500k_v1",
|
||
filter_dataset: "workers_500k",
|
||
id_column: "worker_id",
|
||
sql_filter: "state = 'OH'",
|
||
question: "production worker",
|
||
top_k: 3, generate: false,
|
||
}).catch(() => ({}));
|
||
const probeMs = Date.now() - probeT0;
|
||
|
||
return ok({
|
||
generated_at: new Date().toISOString(),
|
||
duration_ms: Date.now() - t0,
|
||
index: idxRaw ? {
|
||
name: idxRaw.index_name,
|
||
source: idxRaw.source,
|
||
model: idxRaw.model_name,
|
||
dimensions: idxRaw.dimensions,
|
||
chunk_count: idxRaw.chunk_count,
|
||
doc_count: idxRaw.doc_count,
|
||
created_at: idxRaw.created_at,
|
||
backend: idxRaw.vector_backend,
|
||
last_used: idxRaw.last_used ?? null,
|
||
build_signature: idxRaw.build_signature ?? null,
|
||
} : null,
|
||
playbook_memory: pbmRaw ? {
|
||
entries: pbmRaw.entries_count ?? pbmRaw.count ?? 0,
|
||
rebuilt_at: pbmRaw.last_rebuilt_at ?? null,
|
||
} : null,
|
||
pathway_memory: pwmRaw ? {
|
||
total_pathways: pwmRaw.total_pathways ?? 0,
|
||
retired: pwmRaw.retired ?? 0,
|
||
with_audit_pass: pwmRaw.with_audit_pass ?? 0,
|
||
total_replays: pwmRaw.total_replays ?? 0,
|
||
} : null,
|
||
instant_search_probe_ms: probeMs,
|
||
});
|
||
} catch (e: any) {
|
||
return err(`arch_signals: ${e.message}`, 500);
|
||
}
|
||
}
|
||
|
||
if (url.pathname === "/intelligence/permit_contracts" && req.method === "POST") {
|
||
const start = Date.now();
|
||
try {
|
||
const b: any = await req.json().catch(() => ({}));
|
||
const permitUrl = "https://data.cityofchicago.org/resource/ydr8-5enu.json";
|
||
// Recent + substantial permits only — skip tiny ones that
|
||
// don't imply real staffing demand.
|
||
// Include contact_1 + contact_2 fields so the Entity Brief
|
||
// panel on each card can populate without a second fetch.
|
||
// Contacts identify the applicant / contractor by name —
|
||
// those are the keys we pass to OSHA/ILSOS enrichment.
|
||
// Caller-controlled limit: J reported the live panel was
|
||
// dropping older permits (Target) because $limit=6 only ever
|
||
// showed today's 6 newest. Default 24 so a few days of
|
||
// permits stay on the panel; allow up to 100 via body.
|
||
const reqLimit = Math.max(1, Math.min(100, Number((b as any)?.limit) || 24));
|
||
// Optional contractor-name filter — lets the panel scope to
|
||
// a specific contact_1 or contact_2 name (e.g. "Target
|
||
// Corporation") so the user can pin a contractor to the panel
|
||
// without it scrolling past.
|
||
const cFilter = String((b as any)?.contractor || "").trim().replace(/'/g, "''");
|
||
const wherePieces: string[] = [
|
||
"reported_cost>250000",
|
||
"issue_date>'2025-06-01'",
|
||
];
|
||
if (cFilter) {
|
||
wherePieces.push(`(upper(contact_1_name)=upper('${cFilter}') OR upper(contact_2_name)=upper('${cFilter}'))`);
|
||
}
|
||
const permits: any[] = await fetch(
|
||
`${permitUrl}?$select=id,permit_type,work_type,work_description,reported_cost,street_number,street_direction,street_name,community_area,issue_date,contact_1_name,contact_1_type,contact_2_name,contact_2_type,latitude,longitude&`
|
||
+ `$where=${encodeURIComponent(wherePieces.join(" AND "))}`
|
||
+ `&$order=issue_date DESC&$limit=${reqLimit}`
|
||
).then(r => r.json()).catch(() => []);
|
||
|
||
const typeToRole: Record<string, string> = {
|
||
"Electrical Work": "Electrician",
|
||
"Masonry Work": "Production Worker",
|
||
"Mechanical Work": "Maintenance Tech",
|
||
"Reroofing": "Production Worker",
|
||
"Plumbing Work": "Maintenance Tech",
|
||
};
|
||
|
||
const contracts: any[] = [];
|
||
for (const p of permits) {
|
||
const cost = parseFloat(p.reported_cost || 0);
|
||
// Industry heuristic — one worker per $150K of permit value,
|
||
// capped at 8 per contract for staffing realism.
|
||
const count = Math.min(Math.max(Math.round(cost / 150000), 2), 8);
|
||
const role = typeToRole[p.work_type || ""] || "Production Worker";
|
||
const city = "Chicago";
|
||
const state = "IL";
|
||
|
||
// Phase 19 ranked candidates. Soft availability filter
|
||
// auto-applied by /search — this mirrors the real recruiter
|
||
// query path exactly. k=200 to ensure boost fires across
|
||
// the full memory surface (the embedding-discrimination
|
||
// narrowness means under-k silently misses endorsements).
|
||
//
|
||
// Timed so the UI can surface "instant search from clever
|
||
// indexing at ingest" — the architecture claim J wants
|
||
// visible. Each contract card shows its hybrid latency.
|
||
const hybridT0 = Date.now();
|
||
const searchRes = await api("POST", "/vectors/hybrid", {
|
||
index_name: "workers_500k_v1",
|
||
filter_dataset: "workers_500k",
|
||
id_column: "worker_id",
|
||
sql_filter: `role = '${role}' AND state = '${state}' AND city = '${city}' AND CAST(availability AS DOUBLE) > 0.5`,
|
||
question: `${role} for ${p.work_type || "construction"} in ${city}`,
|
||
top_k: 5, generate: false,
|
||
use_playbook_memory: true, playbook_memory_k: 200,
|
||
}).catch(() => ({ sources: [] as any[] }));
|
||
const hybridMs = Date.now() - hybridT0;
|
||
|
||
// Path 2 — discovered patterns for this role in this city.
|
||
const patternRes = await api("POST", "/vectors/playbook_memory/patterns", {
|
||
query: `${role} in ${city}, ${state}`,
|
||
top_k_playbooks: 25,
|
||
min_trait_frequency: 0.3,
|
||
}).catch(() => ({} as any));
|
||
|
||
// Enrich with implied pay rate before taking the top-5
|
||
enrichWithRates(searchRes.sources || []);
|
||
const contractBillRate = impliedBillRate(role);
|
||
const sources = (searchRes.sources || []).slice(0, 5).map((s: any) => {
|
||
const name = String(s.chunk_text || "").split("—")[0]?.trim() || s.doc_id;
|
||
return {
|
||
doc_id: s.doc_id,
|
||
name,
|
||
score: s.score,
|
||
playbook_boost: s.playbook_boost || 0,
|
||
playbook_citations: s.playbook_citations || [],
|
||
implied_pay_rate: s.implied_pay_rate ?? null,
|
||
over_bill_rate: (s.implied_pay_rate ?? 0) > contractBillRate,
|
||
};
|
||
});
|
||
|
||
// Timeline heuristic — permits filed now → construction
|
||
// starts ~45d later → staffing window opens ~14d before
|
||
// start. days_to_deadline is negative when we're past the
|
||
// window (fill urgency is imminent).
|
||
const issueDate = new Date(p.issue_date || Date.now());
|
||
const estStart = new Date(issueDate.getTime() + 45 * 86400e3);
|
||
const stagingDate = new Date(issueDate.getTime() + 31 * 86400e3);
|
||
const daysToDeadline = Math.round((stagingDate.getTime() - Date.now()) / 86400e3);
|
||
let urgency = "scheduled";
|
||
if (daysToDeadline < 0) urgency = "overdue";
|
||
else if (daysToDeadline <= 7) urgency = "urgent";
|
||
else if (daysToDeadline <= 21) urgency = "soon";
|
||
else urgency = "scheduled";
|
||
|
||
// Fill-probability ramp — staffing-industry heuristic.
|
||
// Base probability by pool_size (how many available workers
|
||
// match the role+geo), decayed by days-remaining. Produces
|
||
// a curve the UI can sparkline.
|
||
const poolSize = (searchRes.sql_matches ?? 0) as number;
|
||
const basePFill = poolSize >= count * 20 ? 0.95
|
||
: poolSize >= count * 10 ? 0.85
|
||
: poolSize >= count * 5 ? 0.70
|
||
: poolSize >= count * 2 ? 0.55
|
||
: poolSize >= count ? 0.35
|
||
: 0.15;
|
||
const fillByDay = [0, 3, 7, 14, 21, 30].map((d) => {
|
||
// Front-loaded: most fills land in first 7 days; tail
|
||
// falls off quickly. This is a Weibull-ish shape that
|
||
// matches real staffing data we've seen.
|
||
const ramp = d === 0 ? 0.0
|
||
: d <= 3 ? 0.35
|
||
: d <= 7 ? 0.65
|
||
: d <= 14 ? 0.85
|
||
: d <= 21 ? 0.95
|
||
: 1.0;
|
||
return { day: d, cumulative_pct: Math.round(basePFill * ramp * 100) };
|
||
});
|
||
|
||
// Economics — "as though the contracts were accepted and
|
||
// filled." 40 hrs/week, default 12-week contract. Margin
|
||
// = (bill - avg_pay) × count × hours. Payout window is
|
||
// fill_date + 30d billing cycle.
|
||
const weeksAssumed = 12;
|
||
const hoursPerWeek = 40;
|
||
const avgPayRate = sources.length
|
||
? sources.reduce((s, c) => s + (c.implied_pay_rate || 0), 0) / sources.length
|
||
: contractBillRate / BILL_MARKUP;
|
||
const grossRevenue = contractBillRate * count * hoursPerWeek * weeksAssumed;
|
||
const grossMargin = (contractBillRate - avgPayRate) * count * hoursPerWeek * weeksAssumed;
|
||
const overBillCount = sources.filter((c) => c.over_bill_rate).length;
|
||
const overBillPoolMargin = sources
|
||
.filter((c) => c.over_bill_rate)
|
||
.reduce((s, c) => s + (c.implied_pay_rate - contractBillRate) * hoursPerWeek * weeksAssumed, 0);
|
||
|
||
// Shift inference from permit work_type + description.
|
||
// Construction defaults to 1st-shift (day). Heavy civil or
|
||
// facility work sometimes runs 2nd or split-shift. 3rd
|
||
// (overnight) is rare in commercial construction but real
|
||
// for maintenance / emergency calls.
|
||
const descLower = ((p.work_description || "") + " " + (p.work_type || "")).toLowerCase();
|
||
const shifts: string[] = ["1st"]; // default day
|
||
if (/night|overnight|24\s*hr|emergency/.test(descLower)) shifts.push("3rd");
|
||
if (/multi.?shift|round.?the.?clock|double.?shift/.test(descLower)) shifts.push("2nd");
|
||
if (/weekend|saturday|sunday/.test(descLower)) shifts.push("4th");
|
||
|
||
contracts.push({
|
||
permit: {
|
||
id: p.id,
|
||
cost,
|
||
work_type: p.work_type || "General construction",
|
||
description: (p.work_description || "").substring(0, 140),
|
||
address: `${p.street_number || ""} ${p.street_direction || ""} ${p.street_name || ""}`.trim(),
|
||
community_area: p.community_area,
|
||
issue_date: (p.issue_date || "").substring(0, 10),
|
||
// Contacts — used by /intelligence/permit_entities to
|
||
// enrich each card with OSHA + ILSOS on expand.
|
||
contact_1_name: p.contact_1_name || "",
|
||
contact_1_type: p.contact_1_type || "",
|
||
contact_2_name: p.contact_2_name || "",
|
||
contact_2_type: p.contact_2_type || "",
|
||
},
|
||
implied_bill_rate: contractBillRate,
|
||
timeline: {
|
||
estimated_construction_start: estStart.toISOString().slice(0, 10),
|
||
staffing_window_opens: stagingDate.toISOString().slice(0, 10),
|
||
days_to_deadline: daysToDeadline,
|
||
urgency,
|
||
},
|
||
proposed: {
|
||
role,
|
||
count,
|
||
city, state,
|
||
pool_size: poolSize,
|
||
candidates: sources,
|
||
},
|
||
discovered_pattern: patternRes.discovered_pattern,
|
||
pattern_matched: patternRes.matched_playbooks ?? 0,
|
||
pattern_workers_examined: patternRes.total_workers_examined ?? 0,
|
||
// ADR-021 / PRD architecture claims surface — these fields
|
||
// let the UI show "instant search from clever indexing"
|
||
// and the fill economics beyond bill rate alone.
|
||
search_latency_ms: hybridMs,
|
||
fill_probability: {
|
||
base_pct: Math.round(basePFill * 100),
|
||
curve: fillByDay,
|
||
},
|
||
economics: {
|
||
avg_pay_rate: Math.round(avgPayRate * 100) / 100,
|
||
hours_per_week: hoursPerWeek,
|
||
weeks_assumed: weeksAssumed,
|
||
gross_revenue: Math.round(grossRevenue),
|
||
gross_margin: Math.round(grossMargin),
|
||
margin_pct: grossRevenue > 0 ? Math.round((grossMargin / grossRevenue) * 100) : 0,
|
||
payout_window_days: [30, 45],
|
||
over_bill_count: overBillCount,
|
||
over_bill_pool_margin_at_risk: Math.round(overBillPoolMargin),
|
||
},
|
||
shifts_needed: shifts,
|
||
});
|
||
}
|
||
|
||
return ok({
|
||
generated_at: new Date().toISOString(),
|
||
count: contracts.length,
|
||
contracts,
|
||
duration_ms: Date.now() - start,
|
||
note: "Live Chicago permits paired with workers_500k-ranked candidates and playbook_memory discovered patterns. The permit is real public data; the proposed fill is derived per industry heuristic (~$150K → 1 worker).",
|
||
});
|
||
} catch (e: any) {
|
||
return err(`permit_contracts: ${e.message}`, 500);
|
||
}
|
||
}
|
||
|
||
// Intelligence: per-permit entity brief — OSHA + ILSOS + property
|
||
// Takes a permit identifier (we look it up from Chicago Socrata) or
|
||
// raw contact fields directly from the client. Returns an "ETF
|
||
// basket" shape: property + entities + per-entity risk factors.
|
||
// OSHA is live-scraped (cached 30d). ILSOS returns a structured
|
||
// placeholder because apps.ilsos.gov blocks our ASN.
|
||
if (url.pathname === "/intelligence/permit_entities" && req.method === "POST") {
|
||
const start = Date.now();
|
||
try {
|
||
const b = await req.json().catch(() => ({})) as {
|
||
permit_id?: string;
|
||
address?: string;
|
||
work_type?: string;
|
||
contact_1_name?: string;
|
||
contact_1_type?: string;
|
||
contact_2_name?: string;
|
||
contact_2_type?: string;
|
||
fetch_osha?: boolean;
|
||
fetch_ilsos?: boolean;
|
||
};
|
||
// If the caller didn't pass contact fields but did pass a
|
||
// permit_id, go pull the record from Chicago Socrata.
|
||
let permit = b;
|
||
if (b.permit_id && !b.contact_1_name) {
|
||
const u = `https://data.cityofchicago.org/resource/ydr8-5enu.json?$where=id='${encodeURIComponent(b.permit_id)}'`;
|
||
const rows = (await fetch(u).then((r) => r.json())) as any[];
|
||
const p = rows?.[0];
|
||
if (p) {
|
||
const addr = [p.street_number, p.street_direction, p.street_name]
|
||
.filter(Boolean)
|
||
.join(" ");
|
||
permit = {
|
||
permit_id: b.permit_id,
|
||
address: addr,
|
||
work_type: p.work_type,
|
||
contact_1_name: p.contact_1_name,
|
||
contact_1_type: p.contact_1_type,
|
||
contact_2_name: p.contact_2_name,
|
||
contact_2_type: p.contact_2_type,
|
||
};
|
||
}
|
||
}
|
||
const brief = await buildPermitBrief(permit, {
|
||
fetchOsha: b.fetch_osha !== false,
|
||
fetchIlsos: b.fetch_ilsos !== false,
|
||
});
|
||
return ok({ ...brief, duration_ms: Date.now() - start });
|
||
} catch (e: any) {
|
||
return err(`permit_entities: ${e.message}`, 500);
|
||
}
|
||
}
|
||
|
||
// Removed 2026-04-20: /intelligence/learn was a legacy CSV writer
|
||
// that destructively re-wrote successful_playbooks. /log and
|
||
// /log_failure replace it cleanly via /vectors/playbook_memory/seed
|
||
// and /mark_failed. Keeping the endpoint would only mislead
|
||
// future callers — dead code rots.
|
||
|
||
// Intelligence: Activity feed — what the system has learned
|
||
if (url.pathname === "/intelligence/activity" && req.method === "POST") {
|
||
const start = Date.now();
|
||
const [playbooksR, searchCountR, fillCountR, totalR] = await Promise.all([
|
||
api("POST", "/query/sql", { sql: "SELECT * FROM successful_playbooks ORDER BY timestamp DESC LIMIT 20" }).catch(() => ({ rows: [] })),
|
||
api("POST", "/query/sql", { sql: "SELECT COUNT(*) cnt FROM successful_playbooks WHERE operation LIKE 'search:%'" }).catch(() => ({ rows: [{ cnt: 0 }] })),
|
||
api("POST", "/query/sql", { sql: "SELECT COUNT(*) cnt FROM successful_playbooks WHERE operation LIKE 'fill:%'" }).catch(() => ({ rows: [{ cnt: 0 }] })),
|
||
api("POST", "/query/sql", { sql: "SELECT COUNT(*) cnt FROM successful_playbooks" }).catch(() => ({ rows: [{ cnt: 0 }] })),
|
||
]);
|
||
// Extract learned patterns — which roles+cities get filled most
|
||
const patterns: Record<string, number> = {};
|
||
for (const p of (playbooksR.rows || [])) {
|
||
if (p.operation?.startsWith("fill:") || p.operation?.startsWith("search:")) {
|
||
const key = p.operation.replace(/^(fill|search): ?/, "").trim();
|
||
patterns[key] = (patterns[key] || 0) + 1;
|
||
}
|
||
}
|
||
return ok({
|
||
playbooks: playbooksR.rows || [],
|
||
search_count: searchCountR.rows?.[0]?.cnt || 0,
|
||
fill_count: fillCountR.rows?.[0]?.cnt || 0,
|
||
total_operations: totalR.rows?.[0]?.cnt || 0,
|
||
learned_patterns: Object.entries(patterns).map(([q, c]) => ({ query: q, times: c })).sort((a, b) => b.times - a.times),
|
||
duration_ms: Date.now() - start,
|
||
});
|
||
}
|
||
|
||
// Intelligence Brief — parallel analytics across 500K profiles
|
||
if (url.pathname === "/intelligence/brief" && req.method === "POST") {
|
||
const start = Date.now();
|
||
const [poolR, benchR, supplyR, gemsR, risksR, untappedR, archetypeR] = await Promise.all([
|
||
api("POST", "/query/sql", { sql: `SELECT COUNT(*) total, ROUND(AVG(CAST(reliability AS DOUBLE)),3) avg_rel, SUM(CASE WHEN CAST(reliability AS DOUBLE)>0.9 THEN 1 ELSE 0 END) elite, SUM(CASE WHEN CAST(reliability AS DOUBLE)>0.8 THEN 1 ELSE 0 END) reliable, SUM(CASE WHEN archetype='erratic' THEN 1 ELSE 0 END) erratic, SUM(CASE WHEN archetype='silent' THEN 1 ELSE 0 END) silent_cnt, SUM(CASE WHEN archetype='improving' THEN 1 ELSE 0 END) improving FROM workers_500k` }),
|
||
api("POST", "/query/sql", { sql: `SELECT state, COUNT(*) total, ROUND(AVG(CAST(reliability AS DOUBLE)),3) avg_rel, SUM(CASE WHEN CAST(reliability AS DOUBLE)>0.8 THEN 1 ELSE 0 END) reliable, SUM(CASE WHEN CAST(availability AS DOUBLE)>0.5 THEN 1 ELSE 0 END) available FROM workers_500k GROUP BY state ORDER BY total DESC` }),
|
||
api("POST", "/query/sql", { sql: `SELECT role, COUNT(*) supply, SUM(CASE WHEN CAST(availability AS DOUBLE)>0.5 THEN 1 ELSE 0 END) available, ROUND(AVG(CAST(reliability AS DOUBLE)),3) avg_rel FROM workers_500k GROUP BY role ORDER BY supply DESC` }),
|
||
api("POST", "/query/sql", { sql: `SELECT name, role, city, state, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, archetype, skills FROM workers_500k WHERE archetype='improving' AND CAST(reliability AS DOUBLE)>0.8 ORDER BY CAST(reliability AS DOUBLE) DESC LIMIT 5` }),
|
||
api("POST", "/query/sql", { sql: `SELECT name, role, city, state, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(responsiveness AS DOUBLE),2) resp, ROUND(CAST(compliance AS DOUBLE),2) compl, archetype FROM workers_500k WHERE archetype IN ('erratic','silent') AND CAST(reliability AS DOUBLE)<0.5 ORDER BY CAST(reliability AS DOUBLE) ASC LIMIT 5` }),
|
||
api("POST", "/query/sql", { sql: `SELECT name, role, city, state, ROUND(CAST(availability AS DOUBLE),2) avail, ROUND(CAST(reliability AS DOUBLE),2) rel, skills, archetype FROM workers_500k WHERE CAST(availability AS DOUBLE)>0.8 AND CAST(reliability AS DOUBLE)>0.85 ORDER BY CAST(availability AS DOUBLE) DESC LIMIT 5` }),
|
||
api("POST", "/query/sql", { sql: `SELECT archetype, COUNT(*) cnt, ROUND(AVG(CAST(reliability AS DOUBLE)),3) avg_rel FROM workers_500k GROUP BY archetype ORDER BY cnt DESC` }),
|
||
]);
|
||
return ok({
|
||
pool: poolR.rows?.[0] || {},
|
||
bench: benchR.rows || [],
|
||
supply: supplyR.rows || [],
|
||
gems: gemsR.rows || [],
|
||
risks: risksR.rows || [],
|
||
untapped: untappedR.rows || [],
|
||
archetypes: archetypeR.rows || [],
|
||
duration_ms: Date.now() - start,
|
||
});
|
||
}
|
||
|
||
// Intelligence Chat — natural language → routed queries → structured results
|
||
if (url.pathname === "/intelligence/chat" && req.method === "POST") {
|
||
const b = await json();
|
||
const q = (b.message || "").trim();
|
||
const lower = q.toLowerCase();
|
||
const start = Date.now();
|
||
const queries: string[] = [];
|
||
|
||
// Route 1: "Find someone like [Name]"
|
||
const likeMatch = q.match(/(?:like|similar to)\s+([A-Z][a-z]+(?:\s+[A-Z]\.?\s*)?(?:[A-Z][a-z]+)?)/i);
|
||
if (likeMatch) {
|
||
const name = likeMatch[1].trim();
|
||
queries.push(`SQL: Looking up ${name}'s profile`);
|
||
const profileR = await api("POST", "/query/sql", { sql: `SELECT * FROM workers_500k WHERE name LIKE '%${name.replace(/'/g,"''")}%' LIMIT 1` });
|
||
if (profileR.rows?.length) {
|
||
const worker = profileR.rows[0];
|
||
const stateMatch = lower.match(/\b(?:in|from)\s+([A-Z]{2})\b/i) || lower.match(/\b(IL|IN|OH|MO|TN|KY|WI|MI|IA|MN)\b/i);
|
||
const stateFilter = stateMatch ? `state = '${stateMatch[1].toUpperCase()}'` : `state != '${worker.state}'`;
|
||
queries.push(`Vector: Semantic similarity on ${worker.name}'s full profile → ${stateFilter}`);
|
||
const searchR = await api("POST", "/vectors/hybrid", {
|
||
question: worker.resume_text || `${worker.role} in ${worker.city} with skills ${worker.skills}`,
|
||
index_name: "workers_500k_v1",
|
||
sql_filter: stateFilter + ` AND CAST(reliability AS DOUBLE) >= 0.7`,
|
||
filter_dataset: "ethereal_workers", id_column: "worker_id", top_k: 5, generate: false,
|
||
});
|
||
return ok({ type: "similar", summary: `Found ${(searchR.sources||[]).length} workers similar to ${worker.name}${stateMatch ? ' in '+stateMatch[1].toUpperCase() : ' (other states)'}`,
|
||
source: { name: worker.name, role: worker.role, city: worker.city, state: worker.state, rel: worker.reliability, skills: worker.skills, archetype: worker.archetype },
|
||
results: (searchR.sources||[]).map((s:any) => ({ doc_id: s.doc_id, score: s.score, text: s.chunk_text })),
|
||
sql_matches: searchR.sql_matches, queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
return ok({ type: "error", summary: `Couldn't find "${name}" in the database. Try a full name.`, queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
|
||
// Route 2: "What if we lose"
|
||
if (/what if|lose|happens if/i.test(lower)) {
|
||
const roleMatch = lower.match(/(?:lose|lost?)\s+(?:our\s+)?(?:top\s+)?(\d+)?\s*(.+?)(?:\?|$)/i);
|
||
if (roleMatch) {
|
||
const count = parseInt(roleMatch[1]) || 5;
|
||
const subject = roleMatch[2].trim().replace(/\s*workers?\s*$/,'').replace(/s$/,'');
|
||
queries.push(`SQL: Top ${count} ${subject}s by reliability`);
|
||
const topR = await api("POST", "/query/sql", { sql: `SELECT name, role, city, state, ROUND(CAST(reliability AS DOUBLE),2) rel, skills FROM workers_500k WHERE LOWER(role) LIKE '%${subject.replace(/'/g,"''")}%' ORDER BY CAST(reliability AS DOUBLE) DESC LIMIT ${count}` });
|
||
if (topR.rows?.length) {
|
||
const states = [...new Set(topR.rows.map((r:any) => r.state))];
|
||
queries.push(`SQL: Bench depth for ${subject}s in ${states.join(', ')}`);
|
||
const benchR = await api("POST", "/query/sql", { sql: `SELECT state, COUNT(*) total, SUM(CASE WHEN CAST(reliability AS DOUBLE)>0.8 THEN 1 ELSE 0 END) reliable FROM workers_500k WHERE LOWER(role) LIKE '%${subject.replace(/'/g,"''")}%' AND state IN (${states.map((s:string)=>`'${s}'`).join(',')}) GROUP BY state` });
|
||
const totalInRole = (benchR.rows||[]).reduce((s:number,r:any) => s + r.total, 0);
|
||
const reliableRemaining = (benchR.rows||[]).reduce((s:number,r:any) => s + r.reliable, 0) - topR.rows.length;
|
||
return ok({ type: "whatif", summary: `Impact: losing top ${topR.rows.length} ${subject} workers`,
|
||
lost: topR.rows, bench: benchR.rows||[], total_in_role: totalInRole, reliable_remaining: Math.max(0, reliableRemaining),
|
||
risk_level: reliableRemaining < count * 2 ? "HIGH" : reliableRemaining < count * 5 ? "MEDIUM" : "LOW",
|
||
queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
return ok({ type: "error", summary: `Couldn't find workers in the "${subject}" role. Try: welder, forklift operator, assembler, etc.`, queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
}
|
||
|
||
// Route 3: "Who could handle" — semantic role discovery
|
||
if (/could handle|capable of|suitable for|qualified for|try.*for|can do/i.test(lower)) {
|
||
const roleDesc = q.replace(/^.*?(?:handle|capable of|suitable for|qualified for|try\s+\w+\s+for|can do)\s*/i,'').replace(/\?$/,'').trim();
|
||
queries.push(`Vector: Semantic search for "${roleDesc}" — no exact role match needed`);
|
||
const searchR = await api("POST", "/vectors/hybrid", {
|
||
question: `Worker experienced in ${roleDesc}, relevant skills and certifications`,
|
||
index_name: "workers_500k_v1", sql_filter: "CAST(reliability AS DOUBLE) >= 0.75",
|
||
filter_dataset: "ethereal_workers", id_column: "worker_id", top_k: 8, generate: false,
|
||
});
|
||
return ok({ type: "discovery", summary: `${(searchR.sources||[]).length} workers found through semantic skill matching for: "${roleDesc}"`,
|
||
role_searched: roleDesc, results: (searchR.sources||[]).map((s:any) => ({ doc_id: s.doc_id, score: s.score, text: s.chunk_text })),
|
||
sql_matches: searchR.sql_matches,
|
||
note: "None of these workers have this exact role title. They were found because their skills, certifications, and experience are semantically similar. This is talent discovery — finding people for roles that don't exist in your database yet.",
|
||
queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
|
||
// Route 4: "Stop placing" / risk workers
|
||
if (/stop placing|worst|problem|flag|risk|underperform|fire|let go/i.test(lower)) {
|
||
queries.push("SQL: erratic/silent workers with reliability < 50%");
|
||
const riskR = await api("POST", "/query/sql", { sql: `SELECT name, role, city, state, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(responsiveness AS DOUBLE),2) resp, ROUND(CAST(compliance AS DOUBLE),2) compl, archetype FROM workers_500k WHERE archetype IN ('erratic','silent') AND CAST(reliability AS DOUBLE)<0.5 ORDER BY CAST(reliability AS DOUBLE) ASC LIMIT 10` });
|
||
const countR = await api("POST", "/query/sql", { sql: `SELECT COUNT(*) cnt FROM workers_500k WHERE archetype IN ('erratic','silent') AND CAST(reliability AS DOUBLE)<0.5` });
|
||
return ok({ type: "risk", summary: `${countR.rows?.[0]?.cnt || 0} workers flagged — showing the 10 lowest performers`,
|
||
results: riskR.rows||[], total_flagged: countR.rows?.[0]?.cnt || 0,
|
||
queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
|
||
// Route 5: Analytics / counts
|
||
if (/how many|count|total|percentage|average|breakdown/i.test(lower)) {
|
||
queries.push("RAG: analytical question → vector retrieval + LLM reasoning");
|
||
const ragR = await api("POST", "/vectors/rag", { index_name: "workers_500k_v1", question: q, top_k: 3 });
|
||
return ok({ type: "answer", summary: ragR.answer || "Couldn't determine the answer from the data",
|
||
sources: (ragR.sources||[]).map((s:any) => ({ doc_id: s.doc_id, text: s.chunk_text, score: s.score })),
|
||
queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
|
||
// Route 6: late-worker / no-show triage. Coordinator gets a text
|
||
// ("Marcus running late site 4422") and needs three things in
|
||
// one shot: the worker's record + attendance pattern, a draft
|
||
// SMS to the client, and a ranked list of immediately-available
|
||
// backfills filtered by the same role+geo. The system already
|
||
// has every input (workers_500k, call_log, playbook_memory).
|
||
// The route binds them.
|
||
// No /i — the name has to be capitalized (English convention)
|
||
// and the event verbs are matched lowercase. The /i flag was
|
||
// letting "Marcus running" parse as "Marcus Running" (a last
|
||
// name) and then the event regex wouldn't find "running late"
|
||
// because "running" was already consumed by the name group.
|
||
const triageMatch = q.match(/^([A-Z][a-z]+(?:\s+[A-Z]\.?\s*)?(?:\s+[A-Z][a-z]+)?)\s+(running\s+late|late|no\s*show|no-show|sick|out\s+today|called\s+out|called\s+in|can'?t\s+make\s+it|won'?t\s+make\s+it)/);
|
||
if (triageMatch) {
|
||
const name = triageMatch[1].trim();
|
||
const event = triageMatch[2].toLowerCase().replace(/\s+/g, " ");
|
||
queries.push(`SQL: locate ${name}'s worker record`);
|
||
const profileR = await api("POST", "/query/sql", { sql: `SELECT name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, ROUND(CAST(responsiveness AS DOUBLE),2) resp, archetype, skills, certifications FROM workers_500k WHERE name LIKE '%${name.replace(/'/g, "''")}%' ORDER BY CAST(reliability AS DOUBLE) DESC LIMIT 1` });
|
||
if (profileR.rows?.length) {
|
||
const w = profileR.rows[0];
|
||
// Pull attendance pattern from call_log if available — count
|
||
// recent calls + count of unanswered/late patterns. If the
|
||
// table doesn't exist or has nothing, we surface that
|
||
// honestly rather than fabricate.
|
||
queries.push(`SQL: ${w.name}'s recent contact pattern`);
|
||
const callR = await api("POST", "/query/sql", { sql: `SELECT COUNT(*) calls FROM call_log WHERE candidate_id IN (SELECT candidate_id FROM workers_500k WHERE name = '${w.name.replace(/'/g, "''")}')` }).catch(() => null);
|
||
const callCount = callR?.rows?.[0]?.calls ?? null;
|
||
|
||
// Backfills: same role + same geo, available now, ordered
|
||
// by responsiveness (a coordinator covering a no-show
|
||
// wants the candidate who actually answers their phone).
|
||
queries.push(`Backfill: ${w.role} in ${w.city}, ${w.state}, available, sorted by responsiveness`);
|
||
const backfillR = await api("POST", "/query/sql", { sql: `SELECT name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, ROUND(CAST(responsiveness AS DOUBLE),2) resp, archetype, skills FROM workers_500k WHERE role = '${w.role.replace(/'/g, "''")}' AND city = '${(w.city||"").replace(/'/g, "''")}' AND state = '${(w.state||"").replace(/'/g, "''")}' AND name != '${w.name.replace(/'/g, "''")}' AND CAST(availability AS DOUBLE) > 0.6 ORDER BY CAST(responsiveness AS DOUBLE) DESC, CAST(reliability AS DOUBLE) DESC LIMIT 5` });
|
||
|
||
// Draft SMS the coordinator can send to the client. This
|
||
// is template-generated, not LLM — the coordinator must
|
||
// be able to send it instantly without re-reading. Names
|
||
// and roles are interpolated; the COORDINATOR sends.
|
||
const eventLabel = event.includes("late") ? "running late" : event.includes("show") ? "a no-show" : event.includes("sick") || event.includes("out") ? "out today" : "unable to make their shift";
|
||
const backfills = backfillR.rows || [];
|
||
const topBackfill = backfills[0]?.name;
|
||
const draftSms = topBackfill
|
||
? `Heads-up: ${w.name} (${w.role}) is ${eventLabel}. I'm dispatching ${topBackfill} from our local bench (${Math.round((backfills[0].rel||0)*100)}% reliability) to cover. Will confirm arrival within the hour.`
|
||
: `Heads-up: ${w.name} (${w.role}) is ${eventLabel}. I'm pulling our nearest available ${w.role} now and will confirm coverage shortly.`;
|
||
|
||
return ok({
|
||
type: "triage",
|
||
summary: `${w.name} — ${eventLabel}. ${backfills.length} local backfill${backfills.length === 1 ? "" : "s"} ready, draft SMS ready to send.`,
|
||
worker: { name: w.name, role: w.role, city: w.city, state: w.state, zip: w.zip, rel: w.rel, avail: w.avail, resp: w.resp, archetype: w.archetype, skills: w.skills, certifications: w.certifications, recent_calls: callCount },
|
||
event,
|
||
backfills,
|
||
draft_sms: draftSms,
|
||
queries_run: queries,
|
||
duration_ms: Date.now() - start,
|
||
});
|
||
}
|
||
return ok({ type: "triage_miss", summary: `Couldn't find a worker named "${name}" in the roster. Check the spelling or try last name only.`, queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
|
||
// Route 7: bare-name profile lookup. Coordinator types just a
|
||
// name (or "First Last") with no other intent — pull the
|
||
// profile, prior fills, and attendance pattern in one shot.
|
||
// Distinguished from smart_search by being SHORT (≤4 tokens),
|
||
// capitalized like a name, and not containing role/skill words.
|
||
const tokens = q.trim().split(/\s+/);
|
||
const looksLikeName = tokens.length >= 1 && tokens.length <= 4
|
||
&& tokens.every((t) => /^[A-Z][a-z'-]+\.?$/.test(t) || /^[A-Z]\.$/.test(t))
|
||
&& !/forklift|warehouse|electric|welder|assembl|maintain|production|operator|driver|tech|loader|packag|inventory|sanitation/i.test(q);
|
||
if (looksLikeName) {
|
||
// Names have middle initials in workers_500k ("Steven A. Allen"),
|
||
// so a single LIKE '%First Last%' won't match. Split on
|
||
// whitespace, AND each token — lets "Marcus Rivera" match
|
||
// "Marcus L. Rivera" without enumerating initials.
|
||
const nameLike = tokens
|
||
.map((t) => `name LIKE '%${t.replace(/'/g, "''").replace(/\./g, "")}%'`)
|
||
.join(" AND ");
|
||
queries.push(`SQL: lookup name="${q}" via per-token LIKE`);
|
||
const r = await api("POST", "/query/sql", { sql: `SELECT name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, ROUND(CAST(responsiveness AS DOUBLE),2) resp, archetype, skills, certifications FROM workers_500k WHERE ${nameLike} ORDER BY CAST(reliability AS DOUBLE) DESC LIMIT 5` });
|
||
if (r.rows?.length) {
|
||
return ok({
|
||
type: "profile",
|
||
summary: r.rows.length === 1 ? `${r.rows[0].name} — ${r.rows[0].role}, ${r.rows[0].city}, ${r.rows[0].state}` : `${r.rows.length} workers match "${q}"`,
|
||
profiles: r.rows,
|
||
queries_run: queries,
|
||
duration_ms: Date.now() - start,
|
||
});
|
||
}
|
||
return ok({ type: "profile_miss", summary: `No workers named "${q}" in the roster.`, queries_run: queries, duration_ms: Date.now() - start });
|
||
}
|
||
|
||
// Route 8: temporal — "what came in last night", "new resumes
|
||
// today", "last 24 hours". Surfaces recent ingest events from
|
||
// the catalog (created_at on dataset objects) and ranks them
|
||
// against open job_orders for "likely role match." Schema-
|
||
// agnostic: any dataset that landed recently shows up.
|
||
const temporalMatch = lower.match(/\b(last\s+night|today|this\s+morning|past\s+(\d+)\s+(?:hours?|days?)|last\s+(\d+)\s+(?:hours?|days?)|recent|new\s+(?:resumes?|candidates?|workers?|hires?|today)|came\s+in|arrived|just\s+(?:got|came))/i);
|
||
if (temporalMatch) {
|
||
// Decide window in hours
|
||
let windowHours = 24;
|
||
const pastN = lower.match(/\b(?:past|last)\s+(\d+)\s+(hours?|days?)/);
|
||
if (pastN) {
|
||
windowHours = parseInt(pastN[1], 10) * (pastN[2].startsWith("d") ? 24 : 1);
|
||
} else if (/last\s+night|this\s+morning|today/i.test(lower)) {
|
||
windowHours = 24;
|
||
} else if (/recent/i.test(lower)) {
|
||
windowHours = 72;
|
||
}
|
||
queries.push(`Catalog: datasets with created_at within last ${windowHours}h`);
|
||
const ds = await api("GET", "/catalog/datasets") as any[];
|
||
const cutoff = Date.now() - windowHours * 3600 * 1000;
|
||
const recent = (Array.isArray(ds) ? ds : [])
|
||
.map((d: any) => ({
|
||
name: d.name,
|
||
row_count: d.row_count || 0,
|
||
bytes: (d.objects?.[0]?.size_bytes) || 0,
|
||
updated_at: d.updated_at,
|
||
ts: d.updated_at ? Date.parse(d.updated_at) : 0,
|
||
}))
|
||
.filter((d) => d.ts >= cutoff && d.row_count > 0)
|
||
.sort((a, b) => b.ts - a.ts);
|
||
|
||
// For each recent dataset, sample its first row's role-shape
|
||
// text so the coordinator sees what's in it without reading
|
||
// schemas. If it's a workers/resumes dataset, group by role.
|
||
const samples: any[] = [];
|
||
for (const d of recent.slice(0, 8)) {
|
||
const sample = await api("POST", "/query/sql", { sql: `SELECT * FROM "${d.name.replace(/"/g, '""')}" LIMIT 1` }).catch(() => null);
|
||
const cols = sample?.columns?.map((c: any) => c.name) || [];
|
||
const looksLikeWorkers = cols.includes("role") && (cols.includes("name") || cols.includes("candidate_id"));
|
||
let roleBreakdown: any[] = [];
|
||
if (looksLikeWorkers) {
|
||
const byRole = await api("POST", "/query/sql", { sql: `SELECT role, COUNT(*) cnt FROM "${d.name.replace(/"/g, '""')}" GROUP BY role ORDER BY cnt DESC LIMIT 5` }).catch(() => null);
|
||
roleBreakdown = byRole?.rows || [];
|
||
}
|
||
samples.push({
|
||
name: d.name,
|
||
row_count: d.row_count,
|
||
updated_at: d.updated_at,
|
||
hours_ago: Math.round((Date.now() - d.ts) / 3600000),
|
||
looks_like_workers: looksLikeWorkers,
|
||
role_breakdown: roleBreakdown,
|
||
preview: sample?.rows?.[0] || null,
|
||
});
|
||
}
|
||
|
||
return ok({
|
||
type: "ingest_log",
|
||
summary: recent.length
|
||
? `${recent.length} dataset${recent.length === 1 ? "" : "s"} landed in the last ${windowHours}h. ${samples.filter((s) => s.looks_like_workers).reduce((sum, s) => sum + s.row_count, 0)} new worker rows across them.`
|
||
: `Nothing new in the catalog in the last ${windowHours}h. (Dataset timestamps are based on catalog updated_at; if data was loaded directly to disk without going through /ingest/file, it won't show here.)`,
|
||
window_hours: windowHours,
|
||
datasets: samples,
|
||
queries_run: queries,
|
||
duration_ms: Date.now() - start,
|
||
});
|
||
}
|
||
|
||
// Default: smart search — extract role, location, availability from natural language
|
||
{
|
||
const filters: string[] = ["CAST(reliability AS DOUBLE) >= 0.5"];
|
||
const understood: string[] = [];
|
||
|
||
// Structured input from the search-form dropdowns. When set,
|
||
// these win over NL parsing — typing "forklift in IL" used to
|
||
// misparse the preposition "in" as state IN (Indiana). Trust
|
||
// explicit user selection over regex archaeology.
|
||
const explicitState = String(b.state || "").trim().toUpperCase();
|
||
const explicitRole = String(b.role || "").trim();
|
||
|
||
// (G) Per-staffer context. When the UI sends a staffer_id,
|
||
// playbook queries scope to that staffer's territory — their
|
||
// recent fills, their geo's recurring patterns. The corpus is
|
||
// the same for everyone; the relevance gradient is unique to
|
||
// each staffer because each pulls a different shape from it.
|
||
const staffer = lookupStaffer(String(b.staffer_id || "").trim());
|
||
// If the staffer has a territory and the user hasn't already
|
||
// pinned a state/city via dropdown or NL, default the search
|
||
// to their territory. They can override by typing a different
|
||
// city or selecting a different state.
|
||
if (staffer && !explicitState) {
|
||
filters.push(`state = '${staffer.territory.state}'`);
|
||
understood.push(`as ${staffer.name}: ${staffer.territory.state}`);
|
||
}
|
||
|
||
// (B) Headcount parser — coordinator says "8 production
|
||
// workers", "I need 12 forklift operators", "5 welders by
|
||
// Friday". Match a leading or embedded count followed by
|
||
// a worker-shape noun. Bound at 1..200 — anything outside is
|
||
// probably not a headcount (zip codes, dates, addresses).
|
||
let topK = 10;
|
||
// Allow zero-to-two role words between the number and the
|
||
// worker-noun: "8 workers" / "8 production workers" /
|
||
// "8 forklift operators" all match. The role word is
|
||
// optional so we don't lose the bare-number form.
|
||
const countMatch = q.match(/\b(\d{1,3})\s+(?:\w+\s+){0,2}(?:workers?|operators?|drivers?|techs?|technicians?|welders?|electricians?|assemblers?|handlers?|loaders?|packagers?|associates?|leads?|people|hires?|staff)\b/i);
|
||
if (countMatch) {
|
||
const n = parseInt(countMatch[1], 10);
|
||
if (n >= 1 && n <= 200) {
|
||
topK = n;
|
||
understood.push(`headcount: ${n}`);
|
||
}
|
||
}
|
||
|
||
// (A) Zip code → city/state lookup. A coordinator types a zip
|
||
// because that's what the contract says. The previous parser
|
||
// saw "60607" and treated it as a stray number; results came
|
||
// back from any state. Map known metro zip prefixes here so
|
||
// the geographic constraint actually fires.
|
||
//
|
||
// Each entry: zip-prefix → { city, state }. Prefix-match
|
||
// covers a metro without enumerating every zip — e.g. "606"
|
||
// catches Chicago zips 60600-60699.
|
||
const zipPrefixMap: Array<[string, { city: string, state: string }]> = [
|
||
// Chicago + near-suburb
|
||
["606", { city: "Chicago", state: "IL" }],
|
||
["607", { city: "Chicago", state: "IL" }],
|
||
["608", { city: "Chicago", state: "IL" }],
|
||
// Indianapolis
|
||
["462", { city: "Indianapolis", state: "IN" }],
|
||
["461", { city: "Indianapolis", state: "IN" }],
|
||
// Fort Wayne
|
||
["468", { city: "Fort Wayne", state: "IN" }],
|
||
// Columbus OH
|
||
["432", { city: "Columbus", state: "OH" }],
|
||
["431", { city: "Columbus", state: "OH" }],
|
||
// Cleveland
|
||
["441", { city: "Cleveland", state: "OH" }],
|
||
// Cincinnati
|
||
["452", { city: "Cincinnati", state: "OH" }],
|
||
["451", { city: "Cincinnati", state: "OH" }],
|
||
// Dayton
|
||
["454", { city: "Dayton", state: "OH" }],
|
||
// Milwaukee
|
||
["532", { city: "Milwaukee", state: "WI" }],
|
||
["531", { city: "Milwaukee", state: "WI" }],
|
||
// Madison
|
||
["537", { city: "Madison", state: "WI" }],
|
||
// Detroit
|
||
["482", { city: "Detroit", state: "MI" }],
|
||
["481", { city: "Detroit", state: "MI" }],
|
||
// Grand Rapids
|
||
["495", { city: "Grand Rapids", state: "MI" }],
|
||
["493", { city: "Grand Rapids", state: "MI" }],
|
||
// Minneapolis / St. Paul
|
||
["554", { city: "Minneapolis", state: "MN" }],
|
||
["551", { city: "Minneapolis", state: "MN" }],
|
||
// Des Moines
|
||
["503", { city: "Des Moines", state: "IA" }],
|
||
// Kansas City MO
|
||
["641", { city: "Kansas City", state: "MO" }],
|
||
// St. Louis
|
||
["631", { city: "St. Louis", state: "MO" }],
|
||
// Nashville
|
||
["372", { city: "Nashville", state: "TN" }],
|
||
// Memphis
|
||
["381", { city: "Memphis", state: "TN" }],
|
||
// Knoxville
|
||
["379", { city: "Knoxville", state: "TN" }],
|
||
// Louisville
|
||
["402", { city: "Louisville", state: "KY" }],
|
||
// Lexington
|
||
["405", { city: "Lexington", state: "KY" }],
|
||
];
|
||
const zipMatch = q.match(/\b(\d{5})\b/);
|
||
let zipCity: { city: string, state: string } | null = null;
|
||
if (zipMatch) {
|
||
const z = zipMatch[1];
|
||
const hit = zipPrefixMap.find(([prefix]) => z.startsWith(prefix));
|
||
if (hit) {
|
||
zipCity = hit[1];
|
||
understood.push(`zip ${z} → ${hit[1].city}, ${hit[1].state}`);
|
||
}
|
||
}
|
||
|
||
// Extract role keywords (skip if dropdown picked one)
|
||
const roleKeywords: Record<string, string> = {
|
||
"warehouse": "warehouse", "forklift": "forklift", "welder": "weld", "assembler": "assembl",
|
||
"loader": "loader", "machine operator": "machine operator", "shipping": "shipping",
|
||
"quality": "quality", "maintenance": "maintenance", "production": "production",
|
||
"material handler": "material handler", "sanitation": "sanitation", "inventory": "inventory",
|
||
"line lead": "line lead", "electrician": "electric", "packaging": "packaging",
|
||
"tool and die": "tool", "logistics": "logistics", "safety": "safety", "cnc": "cnc",
|
||
};
|
||
if (explicitRole) {
|
||
filters.push(`LOWER(role) LIKE '%${explicitRole.toLowerCase().replace(/'/g, "''")}%'`);
|
||
understood.push(`role: ${explicitRole}`);
|
||
} else {
|
||
for (const [kw, sqlPart] of Object.entries(roleKeywords)) {
|
||
if (lower.includes(kw)) { filters.push(`LOWER(role) LIKE '%${sqlPart}%'`); understood.push(`role: ${kw}`); break; }
|
||
}
|
||
}
|
||
|
||
// Extract city
|
||
// Zip code wins over city-name parsing — it's more specific
|
||
// and the coordinator typed a number, not a casual mention.
|
||
if (zipCity) {
|
||
filters.push(`city = '${zipCity.city}'`);
|
||
understood.push(`city: ${zipCity.city}`);
|
||
} else {
|
||
const cities = ["chicago","springfield","rockford","peoria","joliet","indianapolis","fort wayne",
|
||
"evansville","south bend","columbus","cleveland","cincinnati","dayton","akron","toledo",
|
||
"st. louis","st louis","kansas city","nashville","memphis","knoxville","louisville","lexington",
|
||
"milwaukee","madison","detroit","grand rapids","lansing","des moines","minneapolis","terre haute",
|
||
"bloomington","decatur","mattoon","galesburg","danville","champaign"];
|
||
for (const city of cities) {
|
||
if (lower.includes(city)) {
|
||
const sqlCity = city.split(' ').map(w => w[0].toUpperCase() + w.slice(1)).join(' ');
|
||
filters.push(`city = '${sqlCity}'`);
|
||
understood.push(`city: ${sqlCity}`);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Extract state — dropdown wins; otherwise NL parse, but
|
||
// require either an explicit "in/from <STATE>" preposition
|
||
// OR an UPPERCASE 2-letter code, never a bare lowercase
|
||
// 2-letter token. Old regex matched "in" (preposition) as
|
||
// state IN (Indiana) because the /i flag made the standalone
|
||
// pattern case-insensitive — "forklift in IL" always returned
|
||
// Indiana workers.
|
||
const stateNames: Record<string, string> = {
|
||
"illinois":"IL","indiana":"IN","ohio":"OH","missouri":"MO","tennessee":"TN",
|
||
"kentucky":"KY","wisconsin":"WI","michigan":"MI","iowa":"IA","minnesota":"MN"
|
||
};
|
||
if (explicitState) {
|
||
if (!understood.some(u => u.startsWith('city'))) {
|
||
filters.push(`state = '${explicitState.replace(/'/g, "''")}'`);
|
||
understood.push(`state: ${explicitState}`);
|
||
}
|
||
} else {
|
||
const prepMatch = q.match(/\b(?:in|from)\s+(IL|IN|OH|MO|TN|KY|WI|MI|IA|MN)\b/i);
|
||
const upperMatch = q.match(/\b(IL|IN|OH|MO|TN|KY|WI|MI|IA|MN)\b/); // no /i — must be uppercase
|
||
const stateMatch = prepMatch || upperMatch;
|
||
if (stateMatch && !understood.some(u => u.startsWith('city'))) {
|
||
filters.push(`state = '${stateMatch[1].toUpperCase()}'`);
|
||
understood.push(`state: ${stateMatch[1].toUpperCase()}`);
|
||
} else {
|
||
for (const [name, abbr] of Object.entries(stateNames)) {
|
||
if (lower.includes(name)) { filters.push(`state = '${abbr}'`); understood.push(`state: ${abbr}`); break; }
|
||
}
|
||
}
|
||
}
|
||
|
||
// Extract availability
|
||
if (/available|open|ready|today|now|immediate|asap|right away/i.test(lower)) {
|
||
filters.push("CAST(availability AS DOUBLE) > 0.5");
|
||
understood.push("available now");
|
||
}
|
||
|
||
// Extract reliability preference
|
||
if (/reliable|dependable|best|top|trusted|proven/i.test(lower)) {
|
||
filters[0] = "CAST(reliability AS DOUBLE) >= 0.8";
|
||
understood.push("high reliability");
|
||
}
|
||
|
||
const filterStr = filters.join(" AND ");
|
||
queries.push("Smart parse: " + (understood.length ? understood.join(", ") : "general search"));
|
||
queries.push("SQL filter: " + filterStr);
|
||
queries.push("Vector: semantic search for best skill match");
|
||
|
||
// Also run a direct SQL query to get exact counts and zip codes.
|
||
// LIMIT honors the parsed headcount (capped at 25 to keep the
|
||
// grid renderable; the staffer can ask for more).
|
||
const sqlFields = "name, role, city, state, zip, ROUND(CAST(reliability AS DOUBLE),2) rel, ROUND(CAST(availability AS DOUBLE),2) avail, skills, certifications, archetype";
|
||
const sqlLimit = Math.min(Math.max(topK, 5), 25);
|
||
const directSql = `SELECT ${sqlFields} FROM workers_500k WHERE ${filterStr} ORDER BY CAST(availability AS DOUBLE) DESC, CAST(reliability AS DOUBLE) DESC LIMIT ${sqlLimit}`;
|
||
|
||
// Derive role+geo for the pattern query so the meta-index
|
||
// surface lines up with what the user actually asked for.
|
||
// (G) When a staffer is acting, default the geo to their
|
||
// primary territory — their playbook view is shaped by
|
||
// where they actually fill, not the global Chicago/IL prior.
|
||
const roleForPatterns = understood.find(u => u.startsWith('role:'))?.split(': ')[1] || q;
|
||
const cityForPatterns = understood.find(u => u.startsWith('city:'))?.split(': ')[1]
|
||
|| staffer?.territory.cities[0] || 'Chicago';
|
||
const stateForPatterns = understood.find(u => u.startsWith('state:'))?.split(': ')[1]
|
||
|| staffer?.territory.state || 'IL';
|
||
|
||
const [searchR, directR, patternR] = await Promise.all([
|
||
api("POST", "/vectors/hybrid", {
|
||
question: q, index_name: "workers_500k_v1", sql_filter: filterStr,
|
||
filter_dataset: "ethereal_workers", id_column: "worker_id",
|
||
// Honor the parsed headcount (capped at 25 to keep the
|
||
// vector rerank from re-scoring more rows than render).
|
||
top_k: Math.min(Math.max(topK, 5), 25), generate: false,
|
||
// k=200 to catch compounding — direct measurement shows
|
||
// boost reliably fires only when ~all memory is scanned
|
||
// due to the narrow 0.55-0.67 cosine band in the 768d
|
||
// nomic-embed-text space. Brute force at 200 entries
|
||
// is sub-ms; no reason to underscan.
|
||
use_playbook_memory: true, playbook_memory_k: 200,
|
||
}),
|
||
api("POST", "/query/sql", { sql: directSql }),
|
||
api("POST", "/vectors/playbook_memory/patterns", {
|
||
query: `${roleForPatterns} in ${cityForPatterns}, ${stateForPatterns}`,
|
||
top_k_playbooks: 25, min_trait_frequency: 0.3,
|
||
}).catch(() => ({})),
|
||
]);
|
||
|
||
// Merge: use SQL results for structured data (zip, avail), vector for ranking
|
||
const sqlWorkers = directR.rows || [];
|
||
const vectorWorkers = (searchR.sources || []).map((s: any) => ({
|
||
doc_id: s.doc_id, score: s.score, text: s.chunk_text,
|
||
playbook_boost: s.playbook_boost || 0,
|
||
playbook_citations: s.playbook_citations || [],
|
||
}));
|
||
|
||
return ok({
|
||
type: "smart_search",
|
||
summary: `Found ${searchR.sql_matches || 0} workers matching your criteria${understood.length ? ' (' + understood.join(', ') + ')' : ''}`,
|
||
staffer: staffer ? { id: staffer.id, name: staffer.name, display: staffer.display, territory: staffer.territory } : null,
|
||
understood,
|
||
sql_results: sqlWorkers,
|
||
vector_results: vectorWorkers,
|
||
sql_matches: searchR.sql_matches,
|
||
queries_run: queries,
|
||
duration_ms: Date.now() - start,
|
||
// Meta-index signal — what similar past fills had in common.
|
||
// Non-empty when memory has ≥1 relevant playbook.
|
||
discovered_pattern: (patternR as any)?.discovered_pattern,
|
||
pattern_playbooks_matched: (patternR as any)?.matched_playbooks ?? 0,
|
||
});
|
||
}
|
||
}
|
||
|
||
activeTrace = null;
|
||
return err("Unknown path. Available: / /health /search /sql /match /worker/:id /ask /log /playbooks /profile/:id /vram /context /verify /simulation/run /console /intelligence/brief /intelligence/chat", 404);
|
||
} catch (e: any) {
|
||
if (activeTrace) { scoreTrace(activeTrace, "error", 0, e.message); }
|
||
activeTrace = null;
|
||
return err(e.message || String(e), 500);
|
||
} finally {
|
||
// Flush traces async — don't block the response
|
||
flushTraces().catch(() => {});
|
||
activeTrace = null;
|
||
}
|
||
},
|
||
});
|
||
|
||
console.error(`Lakehouse Agent Gateway :${PORT} → ${BASE}`);
|
||
}
|
||
|
||
main().catch(console.error);
|
||
|
||
// ─── Week simulation engine ───
|
||
|
||
const ROLES = ["Forklift Operator","Machine Operator","Assembler","Loader","Quality Tech","Welder","Sanitation Worker","Shipping Clerk","Production Worker","Maintenance Tech"];
|
||
const STATES = ["IL","IN","OH","MO","TN","KY","WI","MI"];
|
||
const CITIES: Record<string, string[]> = {
|
||
IL: ["Chicago","Springfield","Rockford","Peoria","Joliet"],
|
||
IN: ["Indianapolis","Fort Wayne","Evansville","South Bend"],
|
||
OH: ["Columbus","Cleveland","Cincinnati","Dayton"],
|
||
MO: ["St. Louis","Kansas City","Springfield"],
|
||
TN: ["Nashville","Memphis"], KY: ["Louisville","Lexington"],
|
||
WI: ["Milwaukee","Madison"], MI: ["Detroit","Grand Rapids"],
|
||
};
|
||
const CLIENT_PREFIXES = ["Midwest","Great Lakes","Prairie","Heartland","Summit","Valley","Central","Lakeside","Tri-State","Heritage","National","Premier","Metro","Capitol","Crossroads","Keystone","Riverfront","Gateway","Pinnacle","Cornerstone"];
|
||
const CLIENT_SUFFIXES = ["Logistics","Manufacturing","Assembly","Foods","Steel","Packaging","Health","Plastics","Energy","Solutions","Distribution","Services","Industries","Supply","Warehousing","Materials","Products","Corp","Group","Enterprises"];
|
||
function makeClient(): string { return pick(CLIENT_PREFIXES) + " " + pick(CLIENT_SUFFIXES); }
|
||
const STARTS = ["5:00 AM","6:00 AM","6:30 AM","7:00 AM","7:30 AM","8:00 AM"];
|
||
|
||
// Diverse scenarios — each tells a different story about WHY this contract exists
|
||
const SCENARIOS = [
|
||
// URGENT — real emergencies that need immediate action
|
||
{ priority: "urgent", weight: 8, note: "Worker walked off the job at 3 PM yesterday — client needs replacement by morning",
|
||
situation: "walkoff", action: "Replacement needed ASAP — previous worker quit mid-shift" },
|
||
{ priority: "urgent", weight: 5, note: "Client emailed at 11 PM — their regular crew has COVID exposure, entire team quarantined",
|
||
situation: "quarantine", action: "Full crew replacement — health emergency at job site" },
|
||
{ priority: "urgent", weight: 5, note: "2 no-shows this morning — client is short-staffed on the floor right now",
|
||
situation: "noshow", action: "Immediate backfill — client waiting on the phone" },
|
||
|
||
// HIGH — important but not crisis
|
||
{ priority: "high", weight: 10, note: "New contract starting Monday — client wants to meet workers this week",
|
||
situation: "new_client", action: "New client onboarding — first impression matters" },
|
||
{ priority: "high", weight: 8, note: "Client expanding to 2nd shift — need additional crew by next week",
|
||
situation: "expansion", action: "Growth opportunity — client adding a shift" },
|
||
{ priority: "high", weight: 6, note: "Worker's OSHA certification expires Friday — need certified replacement lined up",
|
||
situation: "cert_expiry", action: "Cert compliance — current worker can't continue without renewal" },
|
||
{ priority: "high", weight: 5, note: "Client requested specific workers back from last month's project",
|
||
situation: "client_request", action: "Client relationship — they asked for specific people" },
|
||
|
||
// MEDIUM — standard day-to-day operations
|
||
{ priority: "medium", weight: 15, note: "Ongoing weekly fill — same client, same role, reliable pipeline",
|
||
situation: "recurring", action: "Recurring contract — steady work" },
|
||
{ priority: "medium", weight: 12, note: "Seasonal uptick — warehouse volume increasing ahead of holidays",
|
||
situation: "seasonal", action: "Seasonal planning — volume ramping up" },
|
||
{ priority: "medium", weight: 10, note: "Backfill for worker on approved medical leave — returns in 3 weeks",
|
||
situation: "medical_leave", action: "Temporary coverage — worker returning soon" },
|
||
{ priority: "medium", weight: 8, note: "Client testing new role — wants to try 2 workers for a week before committing",
|
||
situation: "trial", action: "Trial placement — client evaluating the role" },
|
||
{ priority: "medium", weight: 6, note: "Cross-training opportunity — client wants workers who can learn a new skill",
|
||
situation: "cross_train", action: "Development opportunity — workers can learn new skills" },
|
||
|
||
// LOW — planning ahead
|
||
{ priority: "low", weight: 10, note: "Future fill — project starts in 2 weeks, gathering candidates now",
|
||
situation: "future", action: "Pipeline building — no rush, quality over speed" },
|
||
{ priority: "low", weight: 8, note: "Client exploring staffing options — not committed yet, just want to see who's available",
|
||
situation: "exploratory", action: "Exploratory — client shopping, impress them with quality" },
|
||
{ priority: "low", weight: 5, note: "Internal transfer — moving a worker from one site to another, need replacement at original",
|
||
situation: "transfer", action: "Planned transition — smooth handoff between sites" },
|
||
];
|
||
|
||
function pick<T>(arr: T[]): T { return arr[Math.floor(Math.random() * arr.length)]; }
|
||
|
||
// ─── Client-blacklist persistence (feature #2) ──────────────────────────
|
||
// Simple JSON file under mcp-server/data/. Synchronous writes are fine
|
||
// at the expected rate (a handful of blacklist adds per day).
|
||
const BLACKLIST_PATH = `${import.meta.dir}/data/client_blacklists.json`;
|
||
|
||
interface BlacklistEntry {
|
||
worker_id: string;
|
||
name: string;
|
||
reason: string;
|
||
added_at: string;
|
||
}
|
||
|
||
async function loadAllBlacklists(): Promise<Record<string, BlacklistEntry[]>> {
|
||
try {
|
||
const f = Bun.file(BLACKLIST_PATH);
|
||
if (!(await f.exists())) return {};
|
||
return await f.json() as Record<string, BlacklistEntry[]>;
|
||
} catch { return {}; }
|
||
}
|
||
async function saveAllBlacklists(all: Record<string, BlacklistEntry[]>): Promise<void> {
|
||
await Bun.write(BLACKLIST_PATH, JSON.stringify(all, null, 2));
|
||
}
|
||
async function loadClientBlacklist(client: string): Promise<BlacklistEntry[]> {
|
||
const all = await loadAllBlacklists();
|
||
return all[client] || [];
|
||
}
|
||
async function addToClientBlacklist(client: string, entry: BlacklistEntry): Promise<BlacklistEntry[]> {
|
||
const all = await loadAllBlacklists();
|
||
const list = all[client] || [];
|
||
// De-dupe: same worker_id replaces prior entry with fresher reason.
|
||
const filtered = list.filter(e => e.worker_id !== entry.worker_id);
|
||
filtered.push(entry);
|
||
all[client] = filtered;
|
||
await saveAllBlacklists(all);
|
||
return filtered;
|
||
}
|
||
async function removeFromClientBlacklist(client: string, worker_id: string): Promise<{ removed: boolean; total: number }> {
|
||
const all = await loadAllBlacklists();
|
||
const list = all[client] || [];
|
||
const filtered = list.filter(e => e.worker_id !== worker_id);
|
||
const removed = filtered.length < list.length;
|
||
all[client] = filtered;
|
||
await saveAllBlacklists(all);
|
||
return { removed, total: filtered.length };
|
||
}
|
||
|
||
// ─── Push daemon (alerts) ───────────────────────────────────────────────
|
||
// Background interval that detects notification-worthy events, assembles
|
||
// a digest, and dispatches to configured channels. Converts the app from
|
||
// "dashboard you visit" to "system that finds you" — essential for the
|
||
// phone-first shop that won't remember to open a URL.
|
||
|
||
const ALERTS_CFG_PATH = `${import.meta.dir}/data/notification_config.json`;
|
||
const ALERTS_STATE_PATH = `${import.meta.dir}/data/notification_state.json`;
|
||
const ALERTS_LOG_PATH = `${import.meta.dir}/data/notifications.jsonl`;
|
||
|
||
interface AlertsConfig {
|
||
enabled: boolean;
|
||
interval_minutes: number;
|
||
webhook_url?: string;
|
||
webhook_label?: string;
|
||
deadline_warn_days: number;
|
||
}
|
||
interface AlertsState {
|
||
last_run_at?: string;
|
||
last_forecast_by_role?: Record<string, { risk: string; coverage_pct: number; earliest_staffing_deadline: string }>;
|
||
last_playbook_entries?: number;
|
||
last_digest?: any;
|
||
}
|
||
|
||
async function loadAlertsConfig(): Promise<AlertsConfig> {
|
||
const f = Bun.file(ALERTS_CFG_PATH);
|
||
if (!(await f.exists())) {
|
||
return { enabled: true, interval_minutes: 15, deadline_warn_days: 7 };
|
||
}
|
||
try { return await f.json() as AlertsConfig; }
|
||
catch { return { enabled: true, interval_minutes: 15, deadline_warn_days: 7 }; }
|
||
}
|
||
async function saveAlertsConfig(c: AlertsConfig): Promise<void> {
|
||
await Bun.write(ALERTS_CFG_PATH, JSON.stringify(c, null, 2));
|
||
}
|
||
async function loadAlertsState(): Promise<AlertsState> {
|
||
const f = Bun.file(ALERTS_STATE_PATH);
|
||
if (!(await f.exists())) return {};
|
||
try { return await f.json() as AlertsState; } catch { return {}; }
|
||
}
|
||
async function saveAlertsState(s: AlertsState): Promise<void> {
|
||
await Bun.write(ALERTS_STATE_PATH, JSON.stringify(s, null, 2));
|
||
}
|
||
|
||
// Build a digest by diffing current state against last-observed state.
|
||
// Returns null if there's nothing worth sending.
|
||
async function buildDigest(): Promise<any | null> {
|
||
const cfg = await loadAlertsConfig();
|
||
const state = await loadAlertsState();
|
||
|
||
// Pull current snapshots in parallel. /intelligence/staffing_forecast
|
||
// is a BUN route (our localhost), not on the Rust gateway — reach it
|
||
// via in-process fetch. /vectors/playbook_memory/stats is on the
|
||
// gateway and gets there via api().
|
||
const bunPort = process.env.PORT || "3700";
|
||
const [forecast, memStats] = await Promise.all([
|
||
fetch(`http://localhost:${bunPort}/intelligence/staffing_forecast`, {
|
||
method: "POST", headers: { "Content-Type": "application/json" }, body: "{}"
|
||
}).then(r => r.json()).catch(() => null as any),
|
||
api("GET", "/vectors/playbook_memory/stats").catch(() => null as any),
|
||
]);
|
||
|
||
const events: any[] = [];
|
||
|
||
// Event: role risk status changed (new critical/tight)
|
||
const currentByRole: Record<string, any> = {};
|
||
const priorByRole = state.last_forecast_by_role || {};
|
||
if (forecast && Array.isArray(forecast.forecast)) {
|
||
for (const f of forecast.forecast) {
|
||
currentByRole[f.role] = {
|
||
risk: f.risk,
|
||
coverage_pct: f.coverage_pct,
|
||
earliest_staffing_deadline: f.earliest_staffing_deadline,
|
||
};
|
||
const prior = priorByRole[f.role];
|
||
const rank: Record<string, number> = { ok: 0, watch: 1, tight: 2, critical: 3 };
|
||
if (!prior || (rank[f.risk] ?? 0) > (rank[prior.risk] ?? 0)) {
|
||
// Risk got worse (or new role we haven't seen)
|
||
if (f.risk === "critical" || f.risk === "tight") {
|
||
events.push({
|
||
kind: "risk_escalation",
|
||
role: f.role,
|
||
risk: f.risk,
|
||
coverage_pct: f.coverage_pct,
|
||
demand: f.demand_workers,
|
||
available: f.bench_available,
|
||
prior_risk: prior?.risk ?? null,
|
||
});
|
||
}
|
||
}
|
||
// Event: staffing deadline within N days that wasn't there before
|
||
const d = f.days_to_deadline;
|
||
if (d !== undefined && d >= 0 && d <= cfg.deadline_warn_days) {
|
||
const priorD = prior?.earliest_staffing_deadline;
|
||
if (priorD !== f.earliest_staffing_deadline) {
|
||
events.push({
|
||
kind: "deadline_approaching",
|
||
role: f.role,
|
||
days_to_deadline: d,
|
||
date: f.earliest_staffing_deadline,
|
||
demand: f.demand_workers,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Event: playbook memory grew significantly since last check
|
||
const nowEntries = memStats?.entries ?? 0;
|
||
const priorEntries = state.last_playbook_entries ?? 0;
|
||
const grewBy = nowEntries - priorEntries;
|
||
if (grewBy >= 5) {
|
||
events.push({
|
||
kind: "memory_growth",
|
||
new_entries: grewBy,
|
||
total_entries: nowEntries,
|
||
total_endorsed_names: memStats?.total_names_endorsed ?? 0,
|
||
});
|
||
}
|
||
|
||
// Only return a digest if there's something to say. First-ever run is
|
||
// a special case: surface the snapshot as a "welcome" digest.
|
||
const isFirstRun = !state.last_run_at;
|
||
if (events.length === 0 && !isFirstRun) return null;
|
||
|
||
const digest = {
|
||
generated_at: new Date().toISOString(),
|
||
is_first_run: isFirstRun,
|
||
events,
|
||
snapshot: {
|
||
forecast_roles: Object.keys(currentByRole).length,
|
||
critical: forecast?.critical_roles ?? 0,
|
||
tight: forecast?.tight_roles ?? 0,
|
||
playbook_entries: nowEntries,
|
||
permits_30d: forecast?.permit_count ?? 0,
|
||
construction_pipeline_usd: forecast?.total_cost ?? 0,
|
||
},
|
||
};
|
||
|
||
// Persist the updated state for next diff
|
||
await saveAlertsState({
|
||
last_run_at: digest.generated_at,
|
||
last_forecast_by_role: currentByRole,
|
||
last_playbook_entries: nowEntries,
|
||
last_digest: digest,
|
||
});
|
||
|
||
return digest;
|
||
}
|
||
|
||
function formatDigestText(d: any): string {
|
||
const lines: string[] = [];
|
||
lines.push(`LAKEHOUSE DIGEST — ${d.generated_at.slice(0, 16).replace("T", " ")}`);
|
||
lines.push("");
|
||
if (d.is_first_run) {
|
||
lines.push(`[initial snapshot] · ${d.snapshot.forecast_roles} roles tracked · `
|
||
+ `${d.snapshot.playbook_entries} playbooks in memory · `
|
||
+ `${d.snapshot.permits_30d} permits last 30d`);
|
||
lines.push("");
|
||
}
|
||
const risk = d.events.filter((e: any) => e.kind === "risk_escalation");
|
||
if (risk.length) {
|
||
lines.push(`${risk.length} role${risk.length !== 1 ? "s" : ""} escalated to ${risk.map((r: any) => r.risk).filter((v: string, i: number, a: string[]) => a.indexOf(v) === i).join("/")}:`);
|
||
for (const e of risk.slice(0, 5)) {
|
||
lines.push(` • ${e.role} — coverage ${e.coverage_pct}% (${e.available}/${e.demand})${e.prior_risk ? ` · was ${e.prior_risk}` : " · new"}`);
|
||
}
|
||
lines.push("");
|
||
}
|
||
const dead = d.events.filter((e: any) => e.kind === "deadline_approaching");
|
||
if (dead.length) {
|
||
lines.push(`${dead.length} staffing deadline${dead.length !== 1 ? "s" : ""} within window:`);
|
||
for (const e of dead.slice(0, 5)) {
|
||
lines.push(` • ${e.role} — ${e.days_to_deadline}d to ${e.date} · demand ${e.demand}`);
|
||
}
|
||
lines.push("");
|
||
}
|
||
const mem = d.events.filter((e: any) => e.kind === "memory_growth");
|
||
for (const e of mem) {
|
||
lines.push(`+${e.new_entries} new playbooks (total ${e.total_entries}, ${e.total_endorsed_names} endorsed names)`);
|
||
}
|
||
lines.push(`snapshot: ${d.snapshot.critical} critical · ${d.snapshot.tight} tight · `
|
||
+ `$${(d.snapshot.construction_pipeline_usd || 0).toLocaleString("en-US", { maximumFractionDigits: 0 })} pipeline`);
|
||
return lines.join("\n");
|
||
}
|
||
|
||
async function dispatchDigest(d: any, cfg: AlertsConfig): Promise<{ channels: string[]; errors: string[] }> {
|
||
const channels: string[] = [];
|
||
const errors: string[] = [];
|
||
const text = formatDigestText(d);
|
||
|
||
// Channel 1: console
|
||
console.log(`[alerts] ${text.split("\n").join(" | ")}`);
|
||
channels.push("console");
|
||
|
||
// Channel 2: JSONL file (always-on audit)
|
||
try {
|
||
await Bun.write(ALERTS_LOG_PATH,
|
||
(await Bun.file(ALERTS_LOG_PATH).exists() ? await Bun.file(ALERTS_LOG_PATH).text() : "")
|
||
+ JSON.stringify({ at: d.generated_at, text, digest: d }) + "\n"
|
||
);
|
||
channels.push("file");
|
||
} catch (e: any) { errors.push(`file: ${e.message}`); }
|
||
|
||
// Channel 3: webhook (opt-in)
|
||
if (cfg.webhook_url) {
|
||
try {
|
||
const r = await fetch(cfg.webhook_url, {
|
||
method: "POST",
|
||
headers: { "Content-Type": "application/json" },
|
||
body: JSON.stringify({ text, digest: d }),
|
||
});
|
||
if (r.ok) channels.push("webhook");
|
||
else errors.push(`webhook ${r.status}: ${(await r.text()).slice(0, 200)}`);
|
||
} catch (e: any) { errors.push(`webhook: ${e.message}`); }
|
||
}
|
||
|
||
return { channels, errors };
|
||
}
|
||
|
||
// Background daemon — kicked off once on module init. Guard via a
|
||
// globalThis sentinel so the startAlertsDaemon() call from near the
|
||
// top of the file (before this block evaluates) doesn't hit a temporal
|
||
// dead zone on a let/const binding.
|
||
async function startAlertsDaemon() {
|
||
const g = globalThis as any;
|
||
if (g.__lakehouse_alerts_armed) return;
|
||
g.__lakehouse_alerts_armed = true;
|
||
const cfg = await loadAlertsConfig();
|
||
if (!cfg.enabled) {
|
||
console.log("[alerts] daemon disabled via config");
|
||
return;
|
||
}
|
||
const ms = Math.max(60, cfg.interval_minutes * 60) * 1000;
|
||
console.log(`[alerts] daemon armed · interval ${cfg.interval_minutes}min · webhook ${cfg.webhook_url ? "configured" : "disabled"}`);
|
||
// Fire once shortly after startup, then on interval.
|
||
setTimeout(runAlertsOnce, 10_000);
|
||
setInterval(runAlertsOnce, ms);
|
||
}
|
||
async function runAlertsOnce() {
|
||
try {
|
||
const cfg = await loadAlertsConfig();
|
||
if (!cfg.enabled) return;
|
||
const d = await buildDigest();
|
||
if (!d) return;
|
||
await dispatchDigest(d, cfg);
|
||
} catch (e: any) {
|
||
console.error(`[alerts] cycle error: ${e.message}`);
|
||
}
|
||
}
|
||
|
||
// Seed playbook_memory from a filled contract so the next hybrid query
|
||
// ranks against it. Used by both runWeekSimulation (per-day) and the /log
|
||
// endpoint (per manual logging). Fail-soft — seeding is best-effort.
|
||
// ─── Sample CSV generator ───────────────────────────────────────────────
|
||
// Fresh randomized staffing roster per request. Prevents the "upload
|
||
// same file twice and it's a no-op" problem from the static sample,
|
||
// and makes the dashboard numbers visibly update after onboarding.
|
||
|
||
const SAMPLE_FIRST_NAMES = [
|
||
"Sarah","Michael","Maria","David","Jennifer","Robert","Amanda","Carlos",
|
||
"Kim","James","Priya","Thomas","Lisa","Brandon","Emily","Marcus","Anita",
|
||
"Dmitri","Rachel","Samuel","Jordan","Natalia","Henry","Ava","Tyler",
|
||
"Hannah","Luis","Aisha","Victor","Monica","Derek","Yuki","Fatima","Kwame",
|
||
"Isabel","Rafael","Elena","Hiroshi","Nadia","Oscar","Sofia","Anders",
|
||
"Leila","Jamal","Chioma","Pavel","Bianca","Tariq","Inez","Reuben","Mira",
|
||
];
|
||
const SAMPLE_LAST_NAMES = [
|
||
"Johnson","Chen","Rodriguez","Park","Lopez","Williams","Taylor","Mendoza",
|
||
"Nguyen","O'Brien","Patel","Anderson","Nakamura","Moore","Zhang","Brooks",
|
||
"Volkov","Kim","Thompson","Martinez","Soto","Robinson","Clark","Hayes",
|
||
"Reyes","Brown","Wright","Diaz","Powell","Green","Castillo","Iwu",
|
||
"Kowalski","Lindström","Oyelaran","Saitō","Abebe","Mehta","Blanchard",
|
||
];
|
||
const SAMPLE_ROLES = [
|
||
"Forklift Operator","Welder","Warehouse Associate","Machine Operator",
|
||
"Loader","Maintenance Tech","Quality Tech","Electrician","Line Lead",
|
||
"Material Handler","Production Worker","Assembler","Shipping Clerk",
|
||
];
|
||
const SAMPLE_CITY_STATE: Array<[string, string]> = [
|
||
["Chicago","IL"],["Springfield","IL"],["Rockford","IL"],["Peoria","IL"],
|
||
["Indianapolis","IN"],["Fort Wayne","IN"],["Evansville","IN"],["South Bend","IN"],
|
||
["Columbus","OH"],["Cleveland","OH"],["Cincinnati","OH"],["Toledo","OH"],
|
||
["St. Louis","MO"],["Kansas City","MO"],["Springfield","MO"],
|
||
["Nashville","TN"],["Memphis","TN"],["Knoxville","TN"],
|
||
["Louisville","KY"],["Lexington","KY"],
|
||
["Milwaukee","WI"],["Madison","WI"],["Green Bay","WI"],
|
||
["Detroit","MI"],["Grand Rapids","MI"],["Lansing","MI"],
|
||
];
|
||
const SAMPLE_SKILL_POOLS: Record<string, string[]> = {
|
||
"Forklift Operator": ["pallet jack","hazmat","loading dock","overhead crane","cold storage","shipping","team lead"],
|
||
"Welder": ["TIG","MIG","pipe welding","blueprint reading","grinder","confined space"],
|
||
"Warehouse Associate": ["inventory","RF scanner","pick-to-light","Excel","packaging","team lead"],
|
||
"Machine Operator": ["CNC","SPC","gauge R&R","lean manufacturing","conveyor ops","first article"],
|
||
"Loader": ["loading dock","team lead","cold storage","first aid","bilingual"],
|
||
"Maintenance Tech": ["electrical","PLC","hydraulics","CMMS","LOTO","troubleshooting"],
|
||
"Quality Tech": ["ISO 9001","calibration","root cause analysis","SPC","Six Sigma"],
|
||
"Electrician": ["conduit","motor controls","troubleshooting","PLC","NEC"],
|
||
"Line Lead": ["team lead","training","SPC","scheduling"],
|
||
"Material Handler": ["RF scanner","pallet jack","receiving","packaging"],
|
||
"Production Worker": ["line work","first article","labeling","packaging","quality inspection"],
|
||
"Assembler": ["assembly","gauge R&R","line lead","first article"],
|
||
"Shipping Clerk": ["shipping","receiving","RF scanner","bilingual"],
|
||
};
|
||
const SAMPLE_CERT_POOL = ["OSHA-10","OSHA-30","Forklift","Hazmat","First Aid","LOTO","Confined Space","AWS D1.1","ServSafe","Six Sigma Green"];
|
||
const SAMPLE_ARCHETYPES = ["reliable","specialist","leader","communicator","flexible"];
|
||
|
||
function pick<T>(arr: T[]): T { return arr[Math.floor(Math.random() * arr.length)]; }
|
||
function pickN<T>(arr: T[], n: number): T[] {
|
||
const copy = arr.slice();
|
||
const out: T[] = [];
|
||
for (let i = 0; i < n && copy.length > 0; i++) {
|
||
out.push(copy.splice(Math.floor(Math.random() * copy.length), 1)[0]);
|
||
}
|
||
return out;
|
||
}
|
||
function csvEscape(s: string): string {
|
||
if (s.indexOf(",") >= 0 || s.indexOf('"') >= 0 || s.indexOf("\n") >= 0) {
|
||
return `"${s.replace(/"/g, '""')}"`;
|
||
}
|
||
return s;
|
||
}
|
||
|
||
function generateSampleRosterCSV(): string {
|
||
const count = 120 + Math.floor(Math.random() * 61); // 120-180
|
||
const ts = Date.now();
|
||
const lines: string[] = [
|
||
"worker_id,name,role,city,state,email,phone,skills,certifications,availability,reliability,archetype",
|
||
];
|
||
for (let i = 0; i < count; i++) {
|
||
const first = pick(SAMPLE_FIRST_NAMES);
|
||
const last = pick(SAMPLE_LAST_NAMES);
|
||
const name = `${first} ${last}`;
|
||
const role = pick(SAMPLE_ROLES);
|
||
const [city, state] = pick(SAMPLE_CITY_STATE);
|
||
const handle = `${first}.${last}`.toLowerCase().replace(/[^a-z\.]/g, "");
|
||
const email = `${handle}${Math.floor(Math.random() * 1000)}@example.com`;
|
||
const area = ["312","773","630","708","331","815","217","219","260","614","216","513","419","314","816","615","901","502","414","608","313","616"][Math.floor(Math.random() * 22)];
|
||
const phone = `(${area}) 555-${String(1000 + Math.floor(Math.random() * 9000))}`;
|
||
const skillPool = SAMPLE_SKILL_POOLS[role] || ["general"];
|
||
const skills = pickN(skillPool, 2 + Math.floor(Math.random() * 3)).join("|");
|
||
const certs = pickN(SAMPLE_CERT_POOL, 1 + Math.floor(Math.random() * 3)).join("|");
|
||
const availability = (0.3 + Math.random() * 0.69).toFixed(2);
|
||
const reliability = (0.55 + Math.random() * 0.44).toFixed(2);
|
||
const archetype = pick(SAMPLE_ARCHETYPES);
|
||
lines.push([
|
||
`W-${ts}-${String(i).padStart(4, "0")}`,
|
||
csvEscape(name),
|
||
csvEscape(role),
|
||
csvEscape(city),
|
||
state,
|
||
email,
|
||
phone,
|
||
csvEscape(skills),
|
||
csvEscape(certs),
|
||
availability,
|
||
reliability,
|
||
archetype,
|
||
].join(","));
|
||
}
|
||
return lines.join("\n") + "\n";
|
||
}
|
||
|
||
// ─── Rate/margin awareness ──────────────────────────────────────────────
|
||
// Derive implied pay and bill rates per worker / per contract without
|
||
// schema changes. Numbers are industry heuristics — a real deployment
|
||
// would replace these with the client's actual ATS pay_rate column and
|
||
// contract bill_rate. The shape stays the same; only the source changes.
|
||
|
||
const ROLE_BASE_PAY_RATE: Record<string, number> = {
|
||
"Electrician": 28,
|
||
"Welder": 26,
|
||
"Machine Operator": 24,
|
||
"Maintenance Tech": 26,
|
||
"Forklift Operator": 20,
|
||
"Loader": 17,
|
||
"Warehouse Associate": 17,
|
||
"Material Handler": 18,
|
||
"Production Worker": 18,
|
||
"Quality Tech": 23,
|
||
"Line Lead": 22,
|
||
"Assembler": 18,
|
||
"Shipping Clerk": 19,
|
||
};
|
||
const DEFAULT_BASE_PAY = 19;
|
||
// Staffing firm typically marks up pay to bill by 35-45% to cover
|
||
// overhead, insurance, and margin. Using 40% as the midpoint.
|
||
const BILL_MARKUP = 1.4;
|
||
|
||
function impliedPayRate(w: { role?: string | null; reliability?: number | string | null; archetype?: string | null }): number {
|
||
const role = w.role || "";
|
||
const base = ROLE_BASE_PAY_RATE[role] ?? DEFAULT_BASE_PAY;
|
||
const rel = typeof w.reliability === "string" ? parseFloat(w.reliability) : (w.reliability ?? 0.5);
|
||
const relBump = (isFinite(rel) ? rel : 0.5) * 4;
|
||
const arch = (w.archetype || "").toLowerCase();
|
||
const archBump = arch === "specialist" ? 4 : arch === "leader" ? 3 : arch === "reliable" ? 1 : 0;
|
||
return Math.round((base + relBump + archBump) * 100) / 100;
|
||
}
|
||
|
||
function impliedBillRate(role: string | null | undefined): number {
|
||
const base = ROLE_BASE_PAY_RATE[role || ""] ?? DEFAULT_BASE_PAY;
|
||
// Contract bill rate = base pay × markup. This is what a staffing firm
|
||
// would typically quote for this role — the worker's rate has to be
|
||
// below this to keep margin.
|
||
return Math.round((base * BILL_MARKUP) * 100) / 100;
|
||
}
|
||
|
||
// Parse a worker's role / reliability / archetype from a vector chunk
|
||
// shaped like "Name — Role in City, ST. Skills: ... . Certs: ... .
|
||
// Archetype: reliable. Reliability: 0.93, Availability: 0.73"
|
||
function parseWorkerChunk(chunk: string): { role?: string; reliability?: number; archetype?: string } {
|
||
if (!chunk) return {};
|
||
const out: any = {};
|
||
const roleMatch = chunk.match(/—\s*([^\.]+?)\s+in\s+/);
|
||
if (roleMatch) out.role = roleMatch[1].trim();
|
||
const relMatch = chunk.match(/Reliability:\s*([\d\.]+)/i);
|
||
if (relMatch) out.reliability = parseFloat(relMatch[1]);
|
||
const archMatch = chunk.match(/Archetype:\s*([A-Za-z]+)/i);
|
||
if (archMatch) out.archetype = archMatch[1];
|
||
return out;
|
||
}
|
||
|
||
// Attach implied_pay_rate to each hybrid source in place, using either
|
||
// the row's native fields (from sql_results) or parsed from chunk_text.
|
||
function enrichWithRates(sources: any[]): void {
|
||
for (const s of sources || []) {
|
||
const parsed = parseWorkerChunk(s.chunk_text || "");
|
||
const w = {
|
||
role: s.role ?? parsed.role,
|
||
reliability: s.reliability ?? s.rel ?? parsed.reliability,
|
||
archetype: s.archetype ?? s.arch ?? parsed.archetype,
|
||
};
|
||
s.implied_pay_rate = impliedPayRate(w);
|
||
}
|
||
}
|
||
|
||
async function seedPlaybookFromContract(c: any) {
|
||
const names = (c.matches || []).slice(0, 5)
|
||
.map((m: any) => m.name || m.doc_id)
|
||
.filter((n: string) => n && !n.startsWith("W500-"));
|
||
if (!names.length) return;
|
||
const op = `fill: ${c.role} x${c.headcount} in ${c.city}, ${c.state}`;
|
||
try {
|
||
await api("POST", "/vectors/playbook_memory/seed", {
|
||
operation: op,
|
||
approach: `${c.situation || c.priority || "fill"} → hybrid search`,
|
||
context: `client=${c.client || ""} start=${c.start || ""}`,
|
||
endorsed_names: names,
|
||
append: true,
|
||
});
|
||
} catch {}
|
||
}
|
||
|
||
async function runWeekSimulation() {
|
||
const days = ["Monday","Tuesday","Wednesday","Thursday","Friday"];
|
||
const staffers = ["Sarah (Lead)","Mike (Senior)","Kim (Junior)"];
|
||
const results: any[] = [];
|
||
let totalFilled = 0, totalNeeded = 0, emergencies = 0, handoffs = 0, playbookEntries = 0;
|
||
|
||
for (let d = 0; d < days.length; d++) {
|
||
const dayLabel = days[d];
|
||
const numContracts = 4 + Math.floor(Math.random() * 5); // 4-8 per day
|
||
const contracts: any[] = [];
|
||
const staffer = staffers[d % staffers.length];
|
||
const handoffTo = staffers[(d + 1) % staffers.length];
|
||
|
||
for (let c = 0; c < numContracts; c++) {
|
||
const state = pick(STATES);
|
||
const city = pick(CITIES[state] || [state]);
|
||
const role = pick(ROLES);
|
||
// Weighted scenario selection
|
||
const totalWeight = SCENARIOS.reduce((s, sc) => s + sc.weight, 0);
|
||
let r = Math.random() * totalWeight;
|
||
let scenario = SCENARIOS[0];
|
||
for (const sc of SCENARIOS) { r -= sc.weight; if (r <= 0) { scenario = sc; break; } }
|
||
const priority = scenario.priority;
|
||
const headcount = priority === "urgent" ? 3 + Math.floor(Math.random() * 4) :
|
||
priority === "high" ? 2 + Math.floor(Math.random() * 3) :
|
||
priority === "medium" ? 2 + Math.floor(Math.random() * 3) :
|
||
1 + Math.floor(Math.random() * 2);
|
||
const minRel = priority === "urgent" ? 0.6 : priority === "high" ? 0.75 : 0.8;
|
||
const cid = `W${d+1}-${String(c+1).padStart(3,"0")}`;
|
||
|
||
if (priority === "urgent") emergencies++;
|
||
totalNeeded += headcount;
|
||
|
||
// Run hybrid search — Phase 19: boost on so past playbooks shape ranking
|
||
let filled = 0;
|
||
let matches: any[] = [];
|
||
try {
|
||
const filt = `role = '${role}' AND state = '${state}' AND reliability >= ${minRel}`;
|
||
const r = await api("POST", "/vectors/hybrid", {
|
||
question: `Find ${role} workers in ${city}, ${state} for ${scenario.situation}`,
|
||
index_name: "workers_500k_v1",
|
||
sql_filter: filt,
|
||
filter_dataset: "ethereal_workers",
|
||
id_column: "worker_id",
|
||
top_k: headcount + 2,
|
||
generate: false,
|
||
use_playbook_memory: true,
|
||
});
|
||
matches = (r.sources || []).slice(0, headcount).map((s: any) => ({
|
||
doc_id: s.doc_id,
|
||
name: s.chunk_text?.split("—")[0]?.trim() || s.doc_id,
|
||
score: s.score,
|
||
chunk_text: s.chunk_text || "",
|
||
playbook_boost: s.playbook_boost || 0,
|
||
playbook_citations: s.playbook_citations || [],
|
||
}));
|
||
filled = matches.length;
|
||
} catch {}
|
||
totalFilled += Math.min(filled, headcount);
|
||
|
||
contracts.push({
|
||
id: cid, client: makeClient(), role, state, city,
|
||
headcount, filled: Math.min(filled, headcount), priority,
|
||
start: pick(STARTS), notes: scenario.note, situation: scenario.situation,
|
||
action: scenario.action, matches,
|
||
staffer, handoff_to: d < 4 ? handoffTo : null,
|
||
});
|
||
}
|
||
|
||
// End of day: seed playbook_memory with TODAY's filled contracts so
|
||
// tomorrow's hybrid search ranks against them. This is the in-week
|
||
// feedback loop — without this, day 5 doesn't benefit from day 1.
|
||
for (const c of contracts) {
|
||
if (c.matches && c.matches.length) {
|
||
await seedPlaybookFromContract(c).catch(() => {});
|
||
}
|
||
}
|
||
|
||
if (d < 4) {
|
||
handoffs++;
|
||
try {
|
||
await api("POST", "/api/ingest/file?name=successful_playbooks", null); // just trigger
|
||
} catch {}
|
||
}
|
||
playbookEntries++;
|
||
|
||
results.push({
|
||
label: dayLabel,
|
||
staffer,
|
||
handoff_to: d < 4 ? handoffTo : null,
|
||
contracts,
|
||
filled: contracts.reduce((s: number, c: any) => s + c.filled, 0),
|
||
needed: contracts.reduce((s: number, c: any) => s + c.headcount, 0),
|
||
});
|
||
}
|
||
|
||
const summary = {
|
||
total_contracts: results.reduce((s, d) => s + d.contracts.length, 0),
|
||
total_needed: totalNeeded,
|
||
total_filled: totalFilled,
|
||
fill_pct: Math.round(totalFilled / Math.max(totalNeeded, 1) * 100),
|
||
emergencies,
|
||
handoffs,
|
||
playbook_entries: playbookEntries,
|
||
};
|
||
|
||
// BUG FIX 2026-04-20: previously this POSTed a multi-row CSV to
|
||
// /ingest/file?name=successful_playbooks at end of every simulation.
|
||
// That endpoint REPLACES the dataset's object list — so each
|
||
// /simulation/run wiped the prior simulation's rows. The SQL
|
||
// successful_playbooks table was never accumulating; it always reflected
|
||
// only the most-recent simulation batch.
|
||
//
|
||
// Per-day per-contract seeding via /vectors/playbook_memory/seed
|
||
// (added Pass 1, runs inside the day loop above) is the path that
|
||
// actually accumulates feedback. The SQL successful_playbooks table is
|
||
// intentionally not written by /simulation/run anymore until a proper
|
||
// append surface exists.
|
||
|
||
return { days: results, summary };
|
||
}
|
||
|
||
// Kick off the push/alerts daemon once per process. Placed at the END of
|
||
// the module so all const/let declarations in the alerts block (paths,
|
||
// helpers, etc.) have evaluated before the daemon reads them. Calling
|
||
// from earlier in the file would hit a temporal dead zone on these
|
||
// bindings.
|
||
startAlertsDaemon().catch(e => console.error(`[alerts] startup error: ${e.message}`));
|
||
|