diff --git a/mcp-server/index.ts b/mcp-server/index.ts index 437e794..873eaae 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -789,6 +789,26 @@ async function main() { // workers_500k manifest is correct (500K); candidates manifest // lied (said 100K, actual 1K) — the audit caught it. // Everything else uses manifest row_count since it's O(1). + // Phase 24 refinement — unified memory query endpoint. Accepts + // any input (natural language, structured JSON, mixed) via + // POST body {input: }. Normalizer handles the shape. + // Returns a single bundle with every memory surface relevant: + // playbook workers, KB recommendation, neighbor signatures, + // prior lessons, top staffers, discovered patterns. + if (url.pathname === "/memory/query" && req.method === "POST") { + try { + const body: any = await req.json(); + const { queryMemory } = await import("../tests/multi-agent/memory_query.ts"); + const result = await queryMemory(body.input ?? body); + return ok(result); + } catch (e) { + return new Response(JSON.stringify({ error: (e as Error).message }), { + status: 500, + headers: { "content-type": "application/json" }, + }); + } + } + if (url.pathname === "/system/summary") { const [ds, indexes, workersCount, candsCount] = await Promise.all([ api("GET", "/catalog/datasets").catch(() => [] as any), diff --git a/tests/multi-agent/memory_query.ts b/tests/multi-agent/memory_query.ts new file mode 100644 index 0000000..be203d2 --- /dev/null +++ b/tests/multi-agent/memory_query.ts @@ -0,0 +1,239 @@ +// Unified memory query — one surface that takes any input, normalizes +// it, and returns every memory signal the system has: playbook workers, +// KB pathway recommendations, prior lessons, staffer competence stats, +// and cross-staffer discovered patterns. This is the "seamlessly with +// whatever input" answer J framed it as. +// +// Why a unified gateway instead of separate calls: the memory surfaces +// are semantically related (playbook_memory workers reference runs the +// KB tracks; prior_lessons reference staffers the stats rank). Callers +// shouldn't have to know the topology. + +import { readFile, readdir } from "node:fs/promises"; +import { join } from "node:path"; +import { normalizeInput, type NormalizedInput } from "./normalize.ts"; +import { findNeighbors, loadRecommendation, loadStafferStats, computeSignature, type StafferStats, type PathwayRecommendation } from "./kb.ts"; +import { GATEWAY, SIDECAR } from "./agent.ts"; + +export interface MemoryQueryResult { + input: NormalizedInput; + playbook_workers: Array<{ + doc_id: string; + name: string; + score: number; + playbook_boost: number; + playbook_citations: string[]; + }>; + pathway_recommendation: PathwayRecommendation | null; + neighbor_signatures: Array<{ + sig_hash: string; + events_digest: string; + similarity: number; + weighted_score: number; + best_staffer_id: string | null; + }>; + prior_lessons: Array<{ + date: string; + client: string; + cities: string; + lesson: string; + }>; + top_staffers: StafferStats[]; + discovered_patterns: { + sig_hash: string; + top_workers: Array<{ name: string; endorsements: number }>; + } | null; + latency_ms: { + normalize: number; + playbook_search: number; + kb_neighbors: number; + staffer_stats: number; + prior_lessons: number; + total: number; + }; +} + +// Main entry. Normalizes the input, then fans out to every memory +// source in parallel. Each source is best-effort: a down dependency +// just leaves its field empty rather than breaking the query. +export async function queryMemory(raw: unknown): Promise { + const t0 = Date.now(); + + // Normalize input first — sets the tone for every downstream call. + const tNorm = Date.now(); + const input = await normalizeInput(raw); + const normalize_ms = Date.now() - tNorm; + + // Synthesize a minimal scenario spec from normalized input for sig + // computation + KB lookup. Only needed if we have at least role + city. + const pseudoSpec = (input.role && input.city && input.state) ? { + client: input.client ?? "(unknown)", + events: [{ + kind: input.intent === "rescue" ? "misplacement" : "baseline_fill", + role: input.role, + count: input.count ?? 1, + city: input.city, + state: input.state, + }], + } : null; + + // Fire everything that CAN fire in parallel. + const tPb = Date.now(); + const playbookPromise: Promise = + (input.role && input.city && input.state) + ? fetch(`${GATEWAY}/vectors/hybrid`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + index_name: "workers_500k_v1", + sql_filter: `role = '${input.role.replace(/'/g, "''")}' AND city = '${input.city.replace(/'/g, "''")}' AND state = '${input.state}' AND CAST(availability AS DOUBLE) > 0.5`, + question: `${input.role} ${input.city} ${input.state}`, + top_k: 10, + generate: false, + use_playbook_memory: true, + playbook_memory_k: 100, + }), + }).then(r => r.ok ? r.json() : { sources: [] }) + .then((d: any) => (d.sources ?? []).map((s: any) => ({ + doc_id: s.doc_id, + name: s.chunk_text?.split("—")[0]?.trim() ?? "?", + score: s.score ?? 0, + playbook_boost: s.playbook_boost ?? 0, + playbook_citations: s.playbook_citations ?? [], + }))) + .catch(() => []) + : Promise.resolve([]); + + const tKb = Date.now(); + const recPromise = pseudoSpec + ? loadRecommendation(pseudoSpec).catch(() => null) + : Promise.resolve(null); + + const neighborsPromise = pseudoSpec + ? findNeighbors(pseudoSpec, 5).catch(() => []) + : Promise.resolve([]); + + const tLess = Date.now(); + const lessonsPromise: Promise = + (input.city && input.state) + ? loadRelevantLessons(input.city, input.state).catch(() => []) + : Promise.resolve([]); + + const tStaff = Date.now(); + const staffersPromise = loadStafferStats().catch(() => []); + + // Parallel await + const [playbook_workers, pathway_recommendation, neighbors_raw, prior_lessons, staffers] = + await Promise.all([playbookPromise, recPromise, neighborsPromise, lessonsPromise, staffersPromise]); + + // Derived: top-k staffers by competence, from recent activity. + const top_staffers = staffers + .filter(s => s.total_runs > 0) + .sort((a, b) => b.competence_score - a.competence_score) + .slice(0, 5); + + // Derived: discovered patterns for this sig (workers endorsed ≥ 2 staffers). + let discovered_patterns: MemoryQueryResult["discovered_patterns"] = null; + if (pseudoSpec) { + const sig_hash = computeSignature(pseudoSpec); + const workers = await collectTopEndorsedWorkers(sig_hash, input.role, input.city, input.state).catch(() => []); + if (workers.length > 0) discovered_patterns = { sig_hash, top_workers: workers }; + } + + return { + input, + playbook_workers, + pathway_recommendation, + neighbor_signatures: neighbors_raw.map(n => ({ + sig_hash: n.sig.sig_hash, + events_digest: n.sig.events_digest, + similarity: n.similarity, + weighted_score: n.weighted_score, + best_staffer_id: n.best_staffer_id, + })), + prior_lessons, + top_staffers, + discovered_patterns, + latency_ms: { + normalize: normalize_ms, + playbook_search: Date.now() - tPb, + kb_neighbors: Date.now() - tKb, + staffer_stats: Date.now() - tStaff, + prior_lessons: Date.now() - tLess, + total: Date.now() - t0, + }, + }; +} + +async function loadRelevantLessons(city: string, state: string | null): Promise { + try { + const dir = join("data", "_playbook_lessons"); + const files = await readdir(dir).catch(() => [] as string[]); + const out: MemoryQueryResult["prior_lessons"] = []; + for (const f of files) { + if (!f.endsWith(".json")) continue; + try { + const raw = await readFile(join(dir, f), "utf8"); + const rec = JSON.parse(raw); + const lessonCities = (rec.cities ?? "").split(","); + const lessonStates = (rec.states ?? "").split(","); + if (lessonCities.includes(city) || (state && lessonStates.includes(state))) { + out.push({ + date: rec.date, + client: rec.client, + cities: rec.cities, + lesson: rec.lesson.slice(0, 500), + }); + } + } catch { /* skip malformed */ } + } + out.sort((a, b) => (b.date ?? "").localeCompare(a.date ?? "")); + return out.slice(0, 3); + } catch { + return []; + } +} + +async function collectTopEndorsedWorkers( + sig_hash: string, + role: string | null, + city: string | null, + state: string | null, +): Promise> { + if (!role || !city || !state) return []; + // Pull from playbook_memory — each successful fill was endorsed; + // count by (name, role, city, state) across past playbooks. + try { + const resp = await fetch(`${GATEWAY}/query/sql`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + sql: `SELECT operation, result FROM successful_playbooks_live + WHERE operation LIKE 'fill: ${role.replace(/'/g, "''")} %' + AND operation LIKE '%in ${city.replace(/'/g, "''")}, ${state}%' + ORDER BY timestamp DESC LIMIT 50`, + }), + }); + if (!resp.ok) return []; + const data: any = await resp.json(); + // successful_playbooks_live.result has the shape + // "N/N filled → Name A, Name B, Name C" + // or historical "Name A | Name B | Name C". Handle both. + const counts = new Map(); + for (const row of data.rows ?? []) { + const raw = String(row.result ?? ""); + const afterArrow = raw.includes("→") ? raw.split("→")[1] : raw; + const names = afterArrow + .split(/[,|]/) + .map((n: string) => n.trim()) + .filter(n => n && !/^\d+\/\d+/.test(n) && !n.toLowerCase().startsWith("filled")); + for (const n of names) counts.set(n, (counts.get(n) ?? 0) + 1); + } + return [...counts.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, 5) + .map(([name, endorsements]) => ({ name, endorsements })); + } catch { + return []; + } +} diff --git a/tests/multi-agent/normalize.test.ts b/tests/multi-agent/normalize.test.ts new file mode 100644 index 0000000..b43bd31 --- /dev/null +++ b/tests/multi-agent/normalize.test.ts @@ -0,0 +1,77 @@ +import { test, expect } from "bun:test"; +import { normalizeInputSync, normalizeInput } from "./normalize.ts"; + +test("structured FillEvent → fast path", () => { + const n = normalizeInputSync({ + kind: "baseline_fill", role: "Welder", city: "Nashville", + state: "TN", count: 4, + }); + expect(n.extraction_method).toBe("structured"); + expect(n.role).toBe("Welder"); + expect(n.city).toBe("Nashville"); + expect(n.state).toBe("TN"); + expect(n.count).toBe(4); + expect(n.confidence).toBe("high"); + expect(n.missing_fields.length).toBe(0); +}); + +test("misplacement kind → rescue intent", () => { + const n = normalizeInputSync({ kind: "misplacement", role: "Welder", city: "Nashville", state: "TN", count: 1 }); + expect(n.intent).toBe("rescue"); +}); + +test("structured with full state name normalizes to abbrev", () => { + const n = normalizeInputSync({ role: "Welder", city: "Nashville", state: "Tennessee" }); + expect(n.state).toBe("TN"); +}); + +test("natural-language regex path extracts count + role + city + state", () => { + const n = normalizeInputSync("I need 3 Welders in Nashville, TN for next week"); + expect(n.role).toBe("Welder"); + expect(n.city).toBe("Nashville"); + expect(n.state).toBe("TN"); + expect(n.count).toBe(3); + expect(n.intent).toBe("fill"); + expect(n.extraction_method).toBe("regex"); +}); + +test("natural language with plural role + full state name", () => { + const n = normalizeInputSync("fill 5 Forklift Operators in Chicago, Illinois"); + expect(n.role).toBe("Forklift Operator"); + expect(n.city).toBe("Chicago"); + expect(n.state).toBe("IL"); + expect(n.count).toBe(5); +}); + +test("rescue intent keyword beats fill", () => { + const n = normalizeInputSync("we need to rescue the Nashville welder fill, 2 workers"); + expect(n.intent).toBe("rescue"); + expect(n.count).toBe(2); + expect(n.role).toBe("Welder"); +}); + +test("partial input reports missing fields with confidence=low or medium", () => { + const n = normalizeInputSync("need welders"); + expect(n.role).toBe("Welder"); + expect(n.city).toBeNull(); + expect(n.state).toBeNull(); + expect(n.count).toBeNull(); + expect(n.missing_fields).toContain("city"); + expect(n.missing_fields).toContain("state"); + expect(n.confidence).toBe("low"); +}); + +test("empty object does NOT go through structured path (no role or city)", () => { + const n = normalizeInputSync({}); + expect(n.extraction_method).toBe("regex"); +}); + +test("async normalizeInput on clean structured input matches sync", async () => { + const input = { role: "Welder", city: "Nashville", state: "TN", count: 3 }; + const sync = normalizeInputSync(input); + const async_ = await normalizeInput(input); + expect(async_.role).toBe(sync.role); + expect(async_.city).toBe(sync.city); + expect(async_.state).toBe(sync.state); + expect(async_.count).toBe(sync.count); +}); diff --git a/tests/multi-agent/normalize.ts b/tests/multi-agent/normalize.ts new file mode 100644 index 0000000..92aaccd --- /dev/null +++ b/tests/multi-agent/normalize.ts @@ -0,0 +1,296 @@ +// Input normalizer — accepts whatever shape a caller provides (clean +// structured spec, partial JSON, natural language, a mix) and produces +// a canonical shape the rest of the pipeline understands. This is the +// missing piece that lets KB + playbook_memory + overseer respond +// "seamlessly with whatever input they're given" as J framed it. +// +// The pipeline already works when input is `{role, city, state, count}`; +// what breaks today is "I need three welders in Nashville Monday" — +// the SQL-filter extractors return None, the boost pipeline sees no +// role/geo signal, and retrieval silently degrades to raw cosine. +// +// Strategy: fast path for structured input, LLM fallback only for +// natural language. We don't want to burn an LLM call on "role=Welder +// city=Nashville state=TN count=3" — that's wasteful. + +import { generateContinuable } from "./agent.ts"; + +export interface NormalizedInput { + role: string | null; + city: string | null; + state: string | null; // two-letter + count: number | null; + client: string | null; + deadline: string | null; // ISO date or null + intent: "fill" | "lookup" | "rescue" | "rebuild" | "unknown"; + raw_text: string; // what the caller originally sent + confidence: "high" | "medium" | "low"; + extraction_method: "structured" | "regex" | "llm" | "mixed"; + missing_fields: string[]; // what normalizer couldn't determine +} + +// US state abbreviations for regex city,state extraction. +const STATE_ABBRS = new Set([ + "AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA", + "KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ", + "NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT", + "VA","WA","WV","WI","WY","DC", +]); + +const STATE_NAMES_TO_ABBR: Record = { + alabama: "AL", alaska: "AK", arizona: "AZ", arkansas: "AR", california: "CA", + colorado: "CO", connecticut: "CT", delaware: "DE", florida: "FL", georgia: "GA", + hawaii: "HI", idaho: "ID", illinois: "IL", indiana: "IN", iowa: "IA", + kansas: "KS", kentucky: "KY", louisiana: "LA", maine: "ME", maryland: "MD", + massachusetts: "MA", michigan: "MI", minnesota: "MN", mississippi: "MS", missouri: "MO", + montana: "MT", nebraska: "NE", nevada: "NV", "new hampshire": "NH", "new jersey": "NJ", + "new mexico": "NM", "new york": "NY", "north carolina": "NC", "north dakota": "ND", + ohio: "OH", oklahoma: "OK", oregon: "OR", pennsylvania: "PA", "rhode island": "RI", + "south carolina": "SC", "south dakota": "SD", tennessee: "TN", texas: "TX", utah: "UT", + vermont: "VT", virginia: "VA", washington: "WA", "west virginia": "WV", wisconsin: "WI", + wyoming: "WY", +}; + +// Canonical staffing roles — the normalizer recognizes these regardless +// of casing / plurality. Extend as domains grow. +const KNOWN_ROLES = [ + "Warehouse Associate", "Machine Operator", "Forklift Operator", "Loader", + "Material Handler", "Assembler", "Quality Tech", "Picker", "Packer", + "Shipping Clerk", "Receiving Clerk", "Welder", "CNC Operator", + "Maintenance Tech", "Electrician", "Tool & Die Maker", "Safety Coordinator", + "Logistics Coordinator", "Packaging Operator", "Sanitation Worker", + "Line Lead", +]; + +// Intent keyword hints. Check in order — "rescue" beats generic "fill" +// for "rescue the Nashville welder fill" kind of input. +const INTENT_HINTS: Array<{ re: RegExp; intent: NormalizedInput["intent"] }> = [ + { re: /\brescue|pivot|retry|remediat/i, intent: "rescue" }, + { re: /\brebuild|reindex|re-?embed/i, intent: "rebuild" }, + { re: /\blookup|find|show me|who is|what is/i, intent: "lookup" }, + { re: /\bfill|hire|staff|need|schedule|book/i, intent: "fill" }, +]; + +// Fast structured path — the input is already a FillEvent-shaped JSON +// or a close cousin. Returns null to fall through to regex/LLM. +function fromStructured(raw: any): NormalizedInput | null { + if (!raw || typeof raw !== "object") return null; + const role = typeof raw.role === "string" ? raw.role : null; + const city = typeof raw.city === "string" ? raw.city : null; + const stateRaw = typeof raw.state === "string" ? raw.state : null; + const count = typeof raw.count === "number" ? raw.count : null; + // Only treat as structured if AT LEAST role OR city is present; a + // bare {kind: "baseline_fill"} shouldn't go through this path. + if (!role && !city) return null; + return { + role, + city, + state: normalizeState(stateRaw), + count, + client: typeof raw.client === "string" ? raw.client : null, + deadline: typeof raw.deadline === "string" ? raw.deadline : null, + intent: (raw.kind === "misplacement" || /rescue/i.test(raw.kind ?? "")) ? "rescue" : "fill", + raw_text: JSON.stringify(raw), + confidence: "high", + extraction_method: "structured", + missing_fields: missingFields(role, city, stateRaw, count), + }; +} + +function normalizeState(s: string | null | undefined): string | null { + if (!s) return null; + const trimmed = s.trim(); + if (STATE_ABBRS.has(trimmed.toUpperCase())) return trimmed.toUpperCase(); + const full = trimmed.toLowerCase(); + if (STATE_NAMES_TO_ABBR[full]) return STATE_NAMES_TO_ABBR[full]; + return null; +} + +function missingFields(role: any, city: any, state: any, count: any): string[] { + const out: string[] = []; + if (!role) out.push("role"); + if (!city) out.push("city"); + if (!state) out.push("state"); + if (count === null || count === undefined) out.push("count"); + return out; +} + +// Regex path — handles structured-ish text like "3 welders in Nashville TN". +// Returns partial extraction; LLM path fills the rest if this leaves +// too many missing fields. +function fromRegex(text: string): NormalizedInput { + const lower = text.toLowerCase(); + + // Count: "3 welders" or "need 5" or "fill 8 safety coordinators" + let count: number | null = null; + const countMatch = text.match(/\b(?:need|fill|book|hire|schedule|want)\s+(\d+)\b/i) + ?? text.match(/\b(\d+)\s+(?:x\s+)?[A-Za-z]/); + if (countMatch) count = parseInt(countMatch[1], 10); + + // Role: longest prefix match from KNOWN_ROLES (case-insensitive). + // Plural forms ("welders") match "Welder" via endsWith("s") strip. + let role: string | null = null; + for (const known of KNOWN_ROLES.sort((a, b) => b.length - a.length)) { + const needle = known.toLowerCase(); + if (lower.includes(needle)) { role = known; break; } + if (needle.endsWith("r") && lower.includes(needle + "s")) { role = known; break; } + } + + // City, state: "Nashville TN" or "Nashville, Tennessee" or "Nashville, TN". + // City capture is 1-3 capitalized words — anything longer is usually + // the surrounding phrase ("Forklift Operators in Chicago"). Prefer + // "in {city}" anchor when present. + let city: string | null = null; + let state: string | null = null; + const cityPat = `([A-Z][a-zA-Z.'-]+(?:\\s+[A-Z][a-zA-Z.'-]+){0,2})`; + // First preference: "in {City}, XX" or "in {City} XX" + const inAbbrev = text.match(new RegExp(`\\bin\\s+${cityPat},?\\s+([A-Z]{2})\\b`)); + if (inAbbrev && STATE_ABBRS.has(inAbbrev[2])) { + city = inAbbrev[1].trim(); + state = inAbbrev[2]; + } else { + // Bare "{City}, XX" (no "in" anchor) + const bareAbbrev = text.match(new RegExp(`${cityPat},\\s+([A-Z]{2})\\b`)); + if (bareAbbrev && STATE_ABBRS.has(bareAbbrev[2])) { + city = bareAbbrev[1].trim(); + state = bareAbbrev[2]; + } else { + // "{City}, FullName" variant. Can't use case-insensitive flag + // because it would let cityPat's [A-Z] match lowercase letters. + // Instead try both exact-case and Title-case variants. + for (const [full, abbr] of Object.entries(STATE_NAMES_TO_ABBR)) { + const fullTitle = full.split(" ").map(w => w[0].toUpperCase() + w.slice(1)).join(" "); + const variants = [full, fullTitle]; + for (const variant of variants) { + const re = new RegExp(`\\bin\\s+${cityPat},?\\s+${variant}\\b`); + const m = text.match(re); + if (m) { city = m[1].trim(); state = abbr; break; } + } + if (city) break; + // Bare "{City}, FullName" (no "in") + for (const variant of variants) { + const re = new RegExp(`${cityPat},\\s+${variant}\\b`); + const m = text.match(re); + if (m) { city = m[1].trim(); state = abbr; break; } + } + if (city) break; + } + } + } + + // Intent + let intent: NormalizedInput["intent"] = "unknown"; + for (const h of INTENT_HINTS) { + if (h.re.test(text)) { intent = h.intent; break; } + } + + const missing = missingFields(role, city, state, count); + return { + role, city, state, count, + client: null, + deadline: null, + intent, + raw_text: text, + confidence: missing.length === 0 ? "high" : missing.length <= 2 ? "medium" : "low", + extraction_method: "regex", + missing_fields: missing, + }; +} + +// LLM path — when regex leaves too many fields missing, ask a small +// local model to extract what's present. Uses `think:false` and a +// strict JSON prompt so the call stays cheap. +async function fromLLM(text: string, fallback: NormalizedInput): Promise { + const prompt = `Extract the staffing request shape from this text. Respond with ONLY a JSON object, no prose. + +Text: ${text} + +Required shape: +{ + "role": "string | null (one of: Welder, Forklift Operator, Warehouse Associate, Machine Operator, Loader, Material Handler, Assembler, Quality Tech, Picker, Packer, Shipping Clerk, Receiving Clerk, CNC Operator, Maintenance Tech, Electrician, Tool & Die Maker, Safety Coordinator, Logistics Coordinator, Packaging Operator, Sanitation Worker, Line Lead, or null if unclear)", + "city": "string | null (US city name, properly capitalized)", + "state": "string | null (two-letter abbrev like TN, IL)", + "count": "number | null (integer number of workers needed)", + "client": "string | null (company/client name if mentioned)", + "deadline": "string | null (ISO date YYYY-MM-DD if a specific date is mentioned, else null)", + "intent": "string (one of: fill, lookup, rescue, rebuild, unknown)" +} + +Return the JSON object now:`; + + try { + const raw = await generateContinuable("qwen3:latest", prompt, { + max_tokens: 400, + shape: "json", + think: false, + max_continuations: 2, + }); + const match = raw.match(/\{[\s\S]*\}/); + if (!match) return fallback; + const parsed = JSON.parse(match[0]); + const role = typeof parsed.role === "string" && parsed.role !== "null" ? parsed.role : null; + const city = typeof parsed.city === "string" && parsed.city !== "null" ? parsed.city : null; + const state = normalizeState(parsed.state); + const count = typeof parsed.count === "number" ? parsed.count + : typeof parsed.count === "string" ? parseInt(parsed.count, 10) || null + : null; + const client = typeof parsed.client === "string" && parsed.client !== "null" ? parsed.client : null; + const deadline = typeof parsed.deadline === "string" && parsed.deadline !== "null" ? parsed.deadline : null; + const intent = ["fill", "lookup", "rescue", "rebuild", "unknown"].includes(parsed.intent) + ? parsed.intent as NormalizedInput["intent"] + : "unknown"; + const missing = missingFields(role, city, state, count); + return { + role: role ?? fallback.role, + city: city ?? fallback.city, + state: state ?? fallback.state, + count: count ?? fallback.count, + client: client ?? fallback.client, + deadline: deadline ?? fallback.deadline, + intent: intent !== "unknown" ? intent : fallback.intent, + raw_text: text, + confidence: missing.length === 0 ? "high" : missing.length <= 2 ? "medium" : "low", + extraction_method: "llm", + missing_fields: missing, + }; + } catch { + return fallback; + } +} + +// Top-level normalizer. Auto-detects input shape: +// - Object with role/city → fast structured path +// - String with enough regex signal → regex path +// - Low-signal string → LLM path with regex seed +export async function normalizeInput(raw: unknown): Promise { + // Structured path + if (raw && typeof raw === "object") { + const s = fromStructured(raw); + if (s) return s; + } + // String path + const text = typeof raw === "string" ? raw + : raw && typeof raw === "object" ? JSON.stringify(raw) + : String(raw); + const reg = fromRegex(text); + // If regex got at least role + city, trust it. + if (reg.role && reg.city) { + return reg; + } + // Otherwise try LLM fallback, using regex result as seed. + return await fromLLM(text, reg); +} + +// Synchronous-only variant — returns whatever regex/structured can get +// without an LLM call. Useful when caller can't await (e.g. a prompt +// template). +export function normalizeInputSync(raw: unknown): NormalizedInput { + if (raw && typeof raw === "object") { + const s = fromStructured(raw); + if (s) return s; + } + const text = typeof raw === "string" ? raw + : raw && typeof raw === "object" ? JSON.stringify(raw) + : String(raw); + return fromRegex(text); +}