Honesty fixes — no hard-coded counts, dynamic sample CSV
- generateSampleRosterCSV(): 120-180 randomized rows per call, timestamp-prefixed IDs (no dedup on re-upload, no static 25 row lie) - /system/summary: truth via SQL COUNT(*), surfaces manifest_drift (caught candidates: manifest 100K, actual 1K) - search.html: loadSystemSummary() hydrates live counts; removed hard-coded 500K strings - MCP tool description: "candidates (100K)" → "candidates (1K)", added "workers_500k (500K)"
This commit is contained in:
parent
af3856b103
commit
0ff091c173
@ -134,7 +134,7 @@ details .body{padding-top:10px;font-size:12px;color:#8b949e}
|
||||
<div class="chapter">
|
||||
<div class="num">Chapter 4</div>
|
||||
<h2>Watch the system rank candidates in real time</h2>
|
||||
<div class="lede">This takes the most recent Chicago permit, derives the staffing need, pulls ranked candidates from the 500K bench, and shows you why each one ranked. Everything below loaded in about 3 seconds against the live system.</div>
|
||||
<div class="lede">This takes the most recent Chicago permit, derives the staffing need, pulls ranked candidates from the bench, and shows you why each one ranked. Everything below loaded in about 3 seconds against the live system.</div>
|
||||
<div id="ch4-demo"><div class="loading">Running demo query…</div></div>
|
||||
</div>
|
||||
|
||||
|
||||
@ -86,7 +86,7 @@ server.tool(
|
||||
|
||||
server.tool(
|
||||
"query_sql",
|
||||
"Run SQL against any lakehouse dataset. Tables: ethereal_workers (10K), candidates (100K), timesheets (1M), call_log (800K), email_log (500K), placements (50K), job_orders (15K), clients (2K).",
|
||||
"Run SQL against any lakehouse dataset. Tables: ethereal_workers (10K), candidates (1K), workers_500k (500K), timesheets (1M), call_log (800K), email_log (500K), placements (50K), job_orders (15K), clients (2K).",
|
||||
{ sql: z.string().describe("SQL query") },
|
||||
async ({ sql }) => {
|
||||
const r = await api("POST", "/query/sql", { sql });
|
||||
@ -749,12 +749,30 @@ async function main() {
|
||||
});
|
||||
}
|
||||
|
||||
// Sample files (downloadable + fetchable from the onboard wizard)
|
||||
// Sample CSV — generated fresh on every request so content-hash
|
||||
// dedup on the ingest side always sees a new payload (two uploads
|
||||
// in a row would otherwise be a no-op). Each generation has
|
||||
// unique worker_ids (timestamp-prefixed), randomized names + roles
|
||||
// + geos from realistic pools, and a random size (~120-180 rows)
|
||||
// so the demo looks different every time and numbers actually
|
||||
// update visibly in the dashboard after onboarding.
|
||||
if (url.pathname.startsWith("/samples/")) {
|
||||
const name = url.pathname.slice("/samples/".length);
|
||||
if (!/^[a-zA-Z0-9_\-\.]+\.csv$/.test(name)) {
|
||||
return err("invalid sample filename", 400);
|
||||
}
|
||||
if (name === "staffing_roster_sample.csv") {
|
||||
const csv = generateSampleRosterCSV();
|
||||
return new Response(csv, {
|
||||
headers: {
|
||||
...cors,
|
||||
"Content-Type": "text/csv",
|
||||
"Content-Disposition": `attachment; filename="${name}"`,
|
||||
"Cache-Control": "no-store",
|
||||
},
|
||||
});
|
||||
}
|
||||
// Other sample filenames fall through to the static dir
|
||||
const path = `${import.meta.dir}/samples/${name}`;
|
||||
const file = Bun.file(path);
|
||||
if (!(await file.exists())) return err("sample not found", 404);
|
||||
@ -764,6 +782,57 @@ async function main() {
|
||||
});
|
||||
}
|
||||
|
||||
// System-wide scale summary — truthful numbers for the UI.
|
||||
// Pulls row counts via SQL (COUNT(*) from parquet footers) for
|
||||
// the key datasets rather than trusting catalog manifests, which
|
||||
// can go stale when data changes without re-registering. The
|
||||
// workers_500k manifest is correct (500K); candidates manifest
|
||||
// lied (said 100K, actual 1K) — the audit caught it.
|
||||
// Everything else uses manifest row_count since it's O(1).
|
||||
if (url.pathname === "/system/summary") {
|
||||
const [ds, indexes, workersCount, candsCount] = await Promise.all([
|
||||
api("GET", "/catalog/datasets").catch(() => [] as any),
|
||||
api("GET", "/vectors/indexes").catch(() => [] as any),
|
||||
api("POST", "/query/sql", { sql: "SELECT COUNT(*) AS c FROM workers_500k" })
|
||||
.catch(() => null as any),
|
||||
api("POST", "/query/sql", { sql: "SELECT COUNT(*) AS c FROM candidates" })
|
||||
.catch(() => null as any),
|
||||
]);
|
||||
const datasets = Array.isArray(ds) ? ds : [];
|
||||
const idxs = Array.isArray(indexes) ? indexes : [];
|
||||
const workers = Number(workersCount?.rows?.[0]?.c ?? 0);
|
||||
const candidates = Number(candsCount?.rows?.[0]?.c ?? 0);
|
||||
// Sum manifest row_counts EXCLUDING workers_500k + candidates,
|
||||
// then add the truthful SQL counts. This gives a total that
|
||||
// reflects live state for the two most-quoted tables.
|
||||
const otherManifest = datasets
|
||||
.filter((d: any) => d?.name !== "workers_500k" && d?.name !== "candidates")
|
||||
.reduce((s: number, d: any) => s + (d?.row_count || 0), 0);
|
||||
const totalRows = otherManifest + workers + candidates;
|
||||
const totalChunks = idxs.reduce((s: number, i: any) => s + (i?.chunk_count || 0), 0);
|
||||
// Manifest drift audit — surface any cases where manifest
|
||||
// disagrees with SQL for the two spot-checked tables so the UI
|
||||
// can note it if ever meaningful.
|
||||
const drift: any[] = [];
|
||||
const workersManifest = datasets.find((d: any) => d?.name === "workers_500k")?.row_count;
|
||||
const candidatesManifest = datasets.find((d: any) => d?.name === "candidates")?.row_count;
|
||||
if (workersManifest !== undefined && workersManifest !== workers) {
|
||||
drift.push({ dataset: "workers_500k", manifest: workersManifest, actual: workers });
|
||||
}
|
||||
if (candidatesManifest !== undefined && candidatesManifest !== candidates) {
|
||||
drift.push({ dataset: "candidates", manifest: candidatesManifest, actual: candidates });
|
||||
}
|
||||
return ok({
|
||||
datasets: datasets.length,
|
||||
total_rows: totalRows,
|
||||
total_chunks: totalChunks,
|
||||
workers_500k_rows: workers,
|
||||
candidates_rows: candidates,
|
||||
indexes: idxs.length,
|
||||
manifest_drift: drift,
|
||||
});
|
||||
}
|
||||
|
||||
// Proof JSON API (same data, no HTML)
|
||||
if (url.pathname === "/proof.json") {
|
||||
const ds = await api("GET", "/catalog/datasets") as any[];
|
||||
@ -1806,6 +1875,115 @@ async function runAlertsOnce() {
|
||||
// Seed playbook_memory from a filled contract so the next hybrid query
|
||||
// ranks against it. Used by both runWeekSimulation (per-day) and the /log
|
||||
// endpoint (per manual logging). Fail-soft — seeding is best-effort.
|
||||
// ─── Sample CSV generator ───────────────────────────────────────────────
|
||||
// Fresh randomized staffing roster per request. Prevents the "upload
|
||||
// same file twice and it's a no-op" problem from the static sample,
|
||||
// and makes the dashboard numbers visibly update after onboarding.
|
||||
|
||||
const SAMPLE_FIRST_NAMES = [
|
||||
"Sarah","Michael","Maria","David","Jennifer","Robert","Amanda","Carlos",
|
||||
"Kim","James","Priya","Thomas","Lisa","Brandon","Emily","Marcus","Anita",
|
||||
"Dmitri","Rachel","Samuel","Jordan","Natalia","Henry","Ava","Tyler",
|
||||
"Hannah","Luis","Aisha","Victor","Monica","Derek","Yuki","Fatima","Kwame",
|
||||
"Isabel","Rafael","Elena","Hiroshi","Nadia","Oscar","Sofia","Anders",
|
||||
"Leila","Jamal","Chioma","Pavel","Bianca","Tariq","Inez","Reuben","Mira",
|
||||
];
|
||||
const SAMPLE_LAST_NAMES = [
|
||||
"Johnson","Chen","Rodriguez","Park","Lopez","Williams","Taylor","Mendoza",
|
||||
"Nguyen","O'Brien","Patel","Anderson","Nakamura","Moore","Zhang","Brooks",
|
||||
"Volkov","Kim","Thompson","Martinez","Soto","Robinson","Clark","Hayes",
|
||||
"Reyes","Brown","Wright","Diaz","Powell","Green","Castillo","Iwu",
|
||||
"Kowalski","Lindström","Oyelaran","Saitō","Abebe","Mehta","Blanchard",
|
||||
];
|
||||
const SAMPLE_ROLES = [
|
||||
"Forklift Operator","Welder","Warehouse Associate","Machine Operator",
|
||||
"Loader","Maintenance Tech","Quality Tech","Electrician","Line Lead",
|
||||
"Material Handler","Production Worker","Assembler","Shipping Clerk",
|
||||
];
|
||||
const SAMPLE_CITY_STATE: Array<[string, string]> = [
|
||||
["Chicago","IL"],["Springfield","IL"],["Rockford","IL"],["Peoria","IL"],
|
||||
["Indianapolis","IN"],["Fort Wayne","IN"],["Evansville","IN"],["South Bend","IN"],
|
||||
["Columbus","OH"],["Cleveland","OH"],["Cincinnati","OH"],["Toledo","OH"],
|
||||
["St. Louis","MO"],["Kansas City","MO"],["Springfield","MO"],
|
||||
["Nashville","TN"],["Memphis","TN"],["Knoxville","TN"],
|
||||
["Louisville","KY"],["Lexington","KY"],
|
||||
["Milwaukee","WI"],["Madison","WI"],["Green Bay","WI"],
|
||||
["Detroit","MI"],["Grand Rapids","MI"],["Lansing","MI"],
|
||||
];
|
||||
const SAMPLE_SKILL_POOLS: Record<string, string[]> = {
|
||||
"Forklift Operator": ["pallet jack","hazmat","loading dock","overhead crane","cold storage","shipping","team lead"],
|
||||
"Welder": ["TIG","MIG","pipe welding","blueprint reading","grinder","confined space"],
|
||||
"Warehouse Associate": ["inventory","RF scanner","pick-to-light","Excel","packaging","team lead"],
|
||||
"Machine Operator": ["CNC","SPC","gauge R&R","lean manufacturing","conveyor ops","first article"],
|
||||
"Loader": ["loading dock","team lead","cold storage","first aid","bilingual"],
|
||||
"Maintenance Tech": ["electrical","PLC","hydraulics","CMMS","LOTO","troubleshooting"],
|
||||
"Quality Tech": ["ISO 9001","calibration","root cause analysis","SPC","Six Sigma"],
|
||||
"Electrician": ["conduit","motor controls","troubleshooting","PLC","NEC"],
|
||||
"Line Lead": ["team lead","training","SPC","scheduling"],
|
||||
"Material Handler": ["RF scanner","pallet jack","receiving","packaging"],
|
||||
"Production Worker": ["line work","first article","labeling","packaging","quality inspection"],
|
||||
"Assembler": ["assembly","gauge R&R","line lead","first article"],
|
||||
"Shipping Clerk": ["shipping","receiving","RF scanner","bilingual"],
|
||||
};
|
||||
const SAMPLE_CERT_POOL = ["OSHA-10","OSHA-30","Forklift","Hazmat","First Aid","LOTO","Confined Space","AWS D1.1","ServSafe","Six Sigma Green"];
|
||||
const SAMPLE_ARCHETYPES = ["reliable","specialist","leader","communicator","flexible"];
|
||||
|
||||
function pick<T>(arr: T[]): T { return arr[Math.floor(Math.random() * arr.length)]; }
|
||||
function pickN<T>(arr: T[], n: number): T[] {
|
||||
const copy = arr.slice();
|
||||
const out: T[] = [];
|
||||
for (let i = 0; i < n && copy.length > 0; i++) {
|
||||
out.push(copy.splice(Math.floor(Math.random() * copy.length), 1)[0]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
function csvEscape(s: string): string {
|
||||
if (s.indexOf(",") >= 0 || s.indexOf('"') >= 0 || s.indexOf("\n") >= 0) {
|
||||
return `"${s.replace(/"/g, '""')}"`;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
function generateSampleRosterCSV(): string {
|
||||
const count = 120 + Math.floor(Math.random() * 61); // 120-180
|
||||
const ts = Date.now();
|
||||
const lines: string[] = [
|
||||
"worker_id,name,role,city,state,email,phone,skills,certifications,availability,reliability,archetype",
|
||||
];
|
||||
for (let i = 0; i < count; i++) {
|
||||
const first = pick(SAMPLE_FIRST_NAMES);
|
||||
const last = pick(SAMPLE_LAST_NAMES);
|
||||
const name = `${first} ${last}`;
|
||||
const role = pick(SAMPLE_ROLES);
|
||||
const [city, state] = pick(SAMPLE_CITY_STATE);
|
||||
const handle = `${first}.${last}`.toLowerCase().replace(/[^a-z\.]/g, "");
|
||||
const email = `${handle}${Math.floor(Math.random() * 1000)}@example.com`;
|
||||
const area = ["312","773","630","708","331","815","217","219","260","614","216","513","419","314","816","615","901","502","414","608","313","616"][Math.floor(Math.random() * 22)];
|
||||
const phone = `(${area}) 555-${String(1000 + Math.floor(Math.random() * 9000))}`;
|
||||
const skillPool = SAMPLE_SKILL_POOLS[role] || ["general"];
|
||||
const skills = pickN(skillPool, 2 + Math.floor(Math.random() * 3)).join("|");
|
||||
const certs = pickN(SAMPLE_CERT_POOL, 1 + Math.floor(Math.random() * 3)).join("|");
|
||||
const availability = (0.3 + Math.random() * 0.69).toFixed(2);
|
||||
const reliability = (0.55 + Math.random() * 0.44).toFixed(2);
|
||||
const archetype = pick(SAMPLE_ARCHETYPES);
|
||||
lines.push([
|
||||
`W-${ts}-${String(i).padStart(4, "0")}`,
|
||||
csvEscape(name),
|
||||
csvEscape(role),
|
||||
csvEscape(city),
|
||||
state,
|
||||
email,
|
||||
phone,
|
||||
csvEscape(skills),
|
||||
csvEscape(certs),
|
||||
availability,
|
||||
reliability,
|
||||
archetype,
|
||||
].join(","));
|
||||
}
|
||||
return lines.join("\n") + "\n";
|
||||
}
|
||||
|
||||
// ─── Rate/margin awareness ──────────────────────────────────────────────
|
||||
// Derive implied pay and bill rates per worker / per contract without
|
||||
// schema changes. Numbers are industry heuristics — a real deployment
|
||||
|
||||
@ -127,7 +127,7 @@ body{font-family:'Inter',-apple-system,system-ui,'Segoe UI',sans-serif;backgroun
|
||||
<div class="section" id="live-contracts-section">
|
||||
<div class="section-header">
|
||||
<span class="section-title">Live Contracts — Chicago Permits → Proposed Fills</span>
|
||||
<span class="section-meta">Real public permit data + our 500K worker bench + past playbook patterns</span>
|
||||
<span class="section-meta" id="live-contracts-meta">Real public permit data + worker bench + past playbook patterns</span>
|
||||
</div>
|
||||
<div id="live-contracts"><div class="ld">Loading live contracts...</div></div>
|
||||
</div>
|
||||
@ -151,7 +151,7 @@ body{font-family:'Inter',-apple-system,system-ui,'Segoe UI',sans-serif;backgroun
|
||||
<div class="section">
|
||||
<div class="section-header">
|
||||
<span class="section-title">Worker Search</span>
|
||||
<span class="section-meta">Natural language · 500K profiles</span>
|
||||
<span class="section-meta" id="worker-search-meta">Natural language search</span>
|
||||
</div>
|
||||
<details class="sa" open><summary>Search all workers</summary><div class="inner">
|
||||
<input type="text" id="sq" placeholder="Try: reliable forklift operator available in Nashville" onkeydown="if(event.key==='Enter')doSearch()">
|
||||
@ -160,14 +160,41 @@ body{font-family:'Inter',-apple-system,system-ui,'Segoe UI',sans-serif;backgroun
|
||||
<button class="sbtn" onclick="doSearch()">Find Workers</button><div id="sresults"></div></div></details>
|
||||
</div>
|
||||
|
||||
<div class="ft">Staffing Co-Pilot · Hybrid SQL + Vector Search · 500K embedded profiles · <a href="console">Console</a> · <a href="proof">Architecture</a></div>
|
||||
<div class="ft" id="footer">Staffing Co-Pilot · Hybrid SQL + Vector Search · loading scale… · <a href="console">Console</a> · <a href="proof">Architecture</a></div>
|
||||
</div>
|
||||
<script>
|
||||
var P=location.pathname.indexOf('/lakehouse')>=0?'/lakehouse':'';
|
||||
var A=location.origin+P;
|
||||
var AC=['#1a2744','#1a3a2a','#2a1a3a','#3a2a1a','#1a3a3a','#2a2a1a'];
|
||||
var lastQuery='';
|
||||
window.addEventListener('load',function(){loadDay();loadStaffingForecast();loadLiveContracts();loadMarket();loadLearning()});
|
||||
window.addEventListener('load',function(){loadSystemSummary();loadDay();loadStaffingForecast();loadLiveContracts();loadMarket();loadLearning()});
|
||||
|
||||
function loadSystemSummary(){
|
||||
api('/system/summary',{}).then(function(s){
|
||||
if(!s) return;
|
||||
var totalRows=(s.total_rows||0).toLocaleString();
|
||||
var workers=(s.workers_500k_rows||0).toLocaleString();
|
||||
var chunks=(s.total_chunks||0).toLocaleString();
|
||||
var ds=s.datasets||0;
|
||||
var meta1=document.getElementById('live-contracts-meta');
|
||||
if(meta1) meta1.textContent='Real public permit data + '+workers+' worker bench + past playbook patterns';
|
||||
var meta2=document.getElementById('worker-search-meta');
|
||||
if(meta2) meta2.textContent='Natural language · '+workers+' profiles across '+ds+' datasets';
|
||||
var foot=document.getElementById('footer');
|
||||
if(foot){
|
||||
foot.textContent='';
|
||||
foot.appendChild(document.createTextNode('Staffing Co-Pilot · Hybrid SQL + Vector Search · '+totalRows+' rows across '+ds+' datasets · '+chunks+' vector chunks · '));
|
||||
var a1=document.createElement('a');a1.href='console';a1.textContent='Console';foot.appendChild(a1);
|
||||
foot.appendChild(document.createTextNode(' · '));
|
||||
var a2=document.createElement('a');a2.href='proof';a2.textContent='Architecture';foot.appendChild(a2);
|
||||
}
|
||||
// Also update the collapsible search box label if not yet populated
|
||||
var sum=document.querySelector('.sa summary');
|
||||
if(sum&&/Search all\s*\d*\s*workers/.test(sum.textContent)){
|
||||
sum.textContent='Search all '+workers+' workers';
|
||||
}
|
||||
}).catch(function(){/* non-fatal */});
|
||||
}
|
||||
|
||||
function loadStaffingForecast(){
|
||||
api('/intelligence/staffing_forecast',{}).then(function(r){
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user