From 23eb04a145f33afe22606eaa77d43708210a6ae6 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 20 Apr 2026 18:13:56 -0500 Subject: [PATCH] =?UTF-8?q?Onboarding=20wizard=20=E2=80=94=20ingest=20any?= =?UTF-8?q?=20staffing=20CSV=20in=203=20steps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New /onboard page. Client-facing wizard for getting real data into the system without engineering help. Flow: 1. Drop a CSV (or click 'Use the sample as my data' — ships a 25-row realistic staffing roster under /samples/staffing_roster_sample.csv) 2. Browser parses client-side. Columns auto-typed (text/int/decimal/ date). PII flagged by name hint AND content regex (emails, phones). First rows previewed. Read-only — nothing written yet. 3. Name the dataset (lowercase+underscores). Commit. 4. Post-commit: dataset is live. Shows 4 next steps the operator can take (SQL query, vector index, dashboard search, playbook training). Backend: - /onboard serves onboard.html - /samples/*.csv serves CSV files from mcp-server/samples/ with filename validation (only [a-zA-Z0-9_-.]+.csv, prevents path traversal) - /onboard/ingest forwards multipart/form-data to gateway /ingest/file preserving the boundary. The generic /api/* passthrough breaks multipart because it reads as text and forwards as JSON; this route uses arrayBuffer + original Content-Type. Verified end-to-end: upload sample roster (25 rows, 12 columns) → parse in browser → show columns + PII flags + preview → commit → gateway writes Parquet, registers in catalog → immediately queryable: SELECT * FROM onboard_demo2 LIMIT 3 → Sarah Johnson, Forklift Operator, Chicago, IL, 0.92 Round-trip <1 second. Nav updated on all pages to link Onboard. Shipped with a sample CSV so the full flow is demonstrable without real client data. When a real client shows up, same path — they upload their CSV. No engineering ticket, no code change, no schema pre-definition. Security: sample filename regex prevents path traversal. CSV parse is client-side pure JS (no DOM injection). Commit uses existing /ingest/file validation (schema fingerprint, PII server-side, content-hash dedup). --- mcp-server/console.html | 1 + mcp-server/index.ts | 48 ++ mcp-server/onboard.html | 412 ++++++++++++++++++ mcp-server/proof.html | 1 + mcp-server/samples/staffing_roster_sample.csv | 26 ++ mcp-server/search.html | 1 + mcp-server/spec.html | 1 + 7 files changed, 490 insertions(+) create mode 100644 mcp-server/onboard.html create mode 100644 mcp-server/samples/staffing_roster_sample.csv diff --git a/mcp-server/console.html b/mcp-server/console.html index 2039283..6bef979 100644 --- a/mcp-server/console.html +++ b/mcp-server/console.html @@ -97,6 +97,7 @@ details .body{padding-top:10px;font-size:12px;color:#8b949e} Walkthrough Architecture Spec + Onboard
Reading live state…
diff --git a/mcp-server/index.ts b/mcp-server/index.ts index f1c09aa..b17d681 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -650,6 +650,54 @@ async function main() { }); } + // Onboard — client-facing ingest wizard. Upload any CSV, preview + // columns + PII + sample rows, commit via /ingest/file. Works + // with a shipped sample roster so anyone can trial the flow + // without real client data. + if (url.pathname === "/onboard") { + return new Response(Bun.file(import.meta.dir + "/onboard.html"), { + headers: { ...cors, "Content-Type": "text/html" }, + }); + } + + // Onboard ingest — forwards multipart/form-data correctly to + // the Rust gateway /ingest/file. The generic /api/* passthrough + // can't handle multipart because it reads as text and forwards + // as JSON, losing the boundary. This route preserves the body + // and Content-Type. + if (url.pathname === "/onboard/ingest" && req.method === "POST") { + const name = url.searchParams.get("name"); + if (!name || !/^[a-z][a-z0-9_]*$/.test(name)) { + return err("dataset name required (lowercase+underscores)", 400); + } + const contentType = req.headers.get("content-type") || ""; + const upstream = await fetch(`${BASE}/ingest/file?name=${encodeURIComponent(name)}`, { + method: "POST", + headers: { "Content-Type": contentType }, + body: await req.arrayBuffer(), + }); + const body = await upstream.text(); + return new Response(body, { + status: upstream.status, + headers: { ...cors, "Content-Type": upstream.headers.get("content-type") || "application/json" }, + }); + } + + // Sample files (downloadable + fetchable from the onboard wizard) + if (url.pathname.startsWith("/samples/")) { + const name = url.pathname.slice("/samples/".length); + if (!/^[a-zA-Z0-9_\-\.]+\.csv$/.test(name)) { + return err("invalid sample filename", 400); + } + const path = `${import.meta.dir}/samples/${name}`; + const file = Bun.file(path); + if (!(await file.exists())) return err("sample not found", 404); + return new Response(file, { + headers: { ...cors, "Content-Type": "text/csv", + "Content-Disposition": `attachment; filename="${name}"` }, + }); + } + // Proof JSON API (same data, no HTML) if (url.pathname === "/proof.json") { const ds = await api("GET", "/catalog/datasets") as any[]; diff --git a/mcp-server/onboard.html b/mcp-server/onboard.html new file mode 100644 index 0000000..60105bf --- /dev/null +++ b/mcp-server/onboard.html @@ -0,0 +1,412 @@ + + + +Lakehouse — Connect Your Data + + + +
+

Lakehouse — Connect Your Data

+ +
30 minutes from CSV to live search
+
+ +
+ +
+

Ingest any staffing CSV in three steps

+

+ Upload your ATS export, your worker roster, or any CSV with a name column. + The wizard auto-detects columns, flags PII, previews the first rows, then ingests + as a queryable Parquet dataset. Everything that follows — hybrid search, + playbook ranking, pattern discovery — works against your data automatically. +

+
+ + +
+

Pick a file

+
Drag a CSV in, pick from disk, or use the sample roster to see the flow without any real data.
+ +
+ Download sample roster (25 workers) + +
+
+ + + + + + + + + + +
+ + + + + diff --git a/mcp-server/proof.html b/mcp-server/proof.html index 4b44168..87a4797 100644 --- a/mcp-server/proof.html +++ b/mcp-server/proof.html @@ -83,6 +83,7 @@ pre{background:#161b22;border:1px solid #171d27;border-radius:8px;padding:14px 1 Walkthrough Architecture Spec + Onboard
Running live tests…
diff --git a/mcp-server/samples/staffing_roster_sample.csv b/mcp-server/samples/staffing_roster_sample.csv new file mode 100644 index 0000000..05c8a73 --- /dev/null +++ b/mcp-server/samples/staffing_roster_sample.csv @@ -0,0 +1,26 @@ +worker_id,name,role,city,state,email,phone,skills,certifications,availability,reliability,archetype +W-1001,Sarah Johnson,Forklift Operator,Chicago,IL,sarah.johnson@example.com,(312) 555-0101,pallet jack|hazmat|loading dock,OSHA-10|Forklift,0.92,0.88,reliable +W-1002,Michael Chen,Welder,Toledo,OH,m.chen@example.com,(419) 555-0234,TIG|MIG|blueprint reading|grinder,OSHA-10|AWS D1.1,0.71,0.94,specialist +W-1003,Maria Rodriguez,Warehouse Associate,Chicago,IL,maria.r@example.com,(312) 555-0311,inventory|RF scanner|pick-to-light,OSHA-10,0.85,0.80,reliable +W-1004,David Park,Machine Operator,Detroit,MI,dpark@example.com,(313) 555-0412,CNC|gauge R&R|lean manufacturing,OSHA-10|First Aid,0.60,0.91,specialist +W-1005,Jennifer Lopez,Loader,Chicago,IL,j.lopez@example.com,(312) 555-0501,loading dock|team lead|cold storage,OSHA-10|Forklift,0.95,0.76,communicator +W-1006,Robert Williams,Maintenance Tech,Milwaukee,WI,rwilliams@example.com,(414) 555-0612,electrical|PLC|hydraulics|CMMS,OSHA-10|LOTO,0.45,0.93,specialist +W-1007,Amanda Taylor,Quality Tech,Toledo,OH,a.taylor@example.com,(419) 555-0701,ISO 9001|calibration|root cause analysis,OSHA-10|Six Sigma Green,0.80,0.87,leader +W-1008,Carlos Mendoza,Electrician,Chicago,IL,c.mendoza@example.com,(312) 555-0808,conduit|motor controls|troubleshooting,OSHA-30|NEC,0.88,0.96,specialist +W-1009,Kim Nguyen,Forklift Operator,Detroit,MI,k.nguyen@example.com,(313) 555-0915,pallet jack|shipping|team lead,OSHA-10|Forklift,0.73,0.82,flexible +W-1010,James O'Brien,Welder,Milwaukee,WI,jobrien@example.com,(414) 555-1021,TIG|pipe welding|confined space,OSHA-10|AWS D1.1,0.90,0.79,reliable +W-1011,Priya Patel,Production Worker,Chicago,IL,priya.p@example.com,(312) 555-1118,line work|first article|labeling,OSHA-10,0.66,0.85,reliable +W-1012,Thomas Anderson,Material Handler,Toledo,OH,tanderson@example.com,(419) 555-1225,RF scanner|pallet jack|receiving,OSHA-10|Forklift,0.82,0.77,flexible +W-1013,Lisa Nakamura,Warehouse Associate,Detroit,MI,l.nakamura@example.com,(313) 555-1312,inventory|Excel|packaging,OSHA-10,0.77,0.89,communicator +W-1014,Brandon Moore,Machine Operator,Milwaukee,WI,b.moore@example.com,(414) 555-1405,CNC|SPC|conveyor ops,OSHA-10,0.56,0.94,specialist +W-1015,Emily Zhang,Assembler,Chicago,IL,emily.z@example.com,(312) 555-1502,assembly|gauge R&R|line lead,OSHA-10|Six Sigma Yellow,0.91,0.81,leader +W-1016,Marcus Johnson,Forklift Operator,Toledo,OH,mjohnson@example.com,(419) 555-1609,pallet jack|hazmat|overhead crane,OSHA-10|Forklift|Hazmat,0.68,0.88,reliable +W-1017,Anita Brooks,Line Lead,Detroit,MI,a.brooks@example.com,(313) 555-1715,team lead|training|SPC,OSHA-30|Six Sigma Green,0.52,0.97,leader +W-1018,Dmitri Volkov,Welder,Chicago,IL,d.volkov@example.com,(312) 555-1820,TIG|MIG|blueprint reading,OSHA-10|AWS D1.1|Confined Space,0.84,0.83,specialist +W-1019,Rachel Kim,Loader,Milwaukee,WI,r.kim@example.com,(414) 555-1911,loading dock|first aid|bilingual,OSHA-10|First Aid,0.79,0.84,communicator +W-1020,Samuel Park,Maintenance Tech,Toledo,OH,spark@example.com,(419) 555-2012,electrical|PLC|troubleshooting|CMMS,OSHA-10|LOTO,0.70,0.92,specialist +W-1021,Jordan Williams,Quality Tech,Detroit,MI,j.williams@example.com,(313) 555-2117,ISO 9001|calibration|SPC,OSHA-10|Six Sigma Green,0.83,0.86,leader +W-1022,Natalia Soto,Warehouse Associate,Chicago,IL,n.soto@example.com,(312) 555-2221,RF scanner|inventory|team lead,OSHA-10,0.89,0.90,communicator +W-1023,Henry Chen,Machine Operator,Milwaukee,WI,h.chen@example.com,(414) 555-2318,CNC|conveyor ops|root cause,OSHA-10,0.64,0.93,specialist +W-1024,Ava Martinez,Forklift Operator,Toledo,OH,a.martinez@example.com,(419) 555-2411,pallet jack|bilingual|cold storage,OSHA-10|Forklift,0.87,0.80,flexible +W-1025,Tyler Rodriguez,Production Worker,Chicago,IL,t.rodriguez@example.com,(312) 555-2515,line work|packaging|quality inspection,OSHA-10,0.74,0.86,reliable diff --git a/mcp-server/search.html b/mcp-server/search.html index 9578ed7..77812da 100644 --- a/mcp-server/search.html +++ b/mcp-server/search.html @@ -106,6 +106,7 @@ body{font-family:'Inter',-apple-system,system-ui,'Segoe UI',sans-serif;backgroun Walkthrough Architecture Spec + Onboard
Loading...
diff --git a/mcp-server/spec.html b/mcp-server/spec.html index 35a71b9..0a5d83d 100644 --- a/mcp-server/spec.html +++ b/mcp-server/spec.html @@ -80,6 +80,7 @@ table.plain tr:hover td{background:#0d1117} Walkthrough Architecture Spec + Onboard
v1 · 2026-04-20