J asked directly: "did we implement our memory findings so that our
knowledge base and our configuration playbook [work] seamlessly with
whatever input they're given?" Honest answer tonight was "one of five
findings shipped, normalizer is the blocker." This closes that gap.
NORMALIZER (tests/multi-agent/normalize.ts):
Accepts structured JSON, natural language, or mixed. Returns canonical
NormalizedInput { role, city, state, count, client, deadline, intent,
confidence, extraction_method, missing_fields } for any downstream
consumer.
Three-tier path:
1. Structured fast-path — already-shaped input skips LLM
2. Regex path — "need 3 welders in Nashville, TN" parses without LLM.
City/state parser tightened to 1-3 capitalized words + "in {city}"
anchor preference + case-exact full-state-name variants to prevent
"Forklift Operators in Chicago" being captured as the city name
3. LLM fallback — qwen3 local with think:false + 400 max_tokens for
inputs the regex can't handle
Unit tests (tests/multi-agent/normalize.test.ts): 9/9 pass. Covers
structured fast-path, misplacement→rescue intent, state-name→abbrev
conversion, regex extraction from natural language, plural role +
full state name edge case, rescue intent keyword precedence, partial
input reporting missing fields, empty object fallthrough, async/sync
parity on clean inputs.
UNIFIED MEMORY QUERY (tests/multi-agent/memory_query.ts):
One function, five parallel fan-outs, one bundle returned:
- playbook_workers — hybrid_search via gateway with use_playbook_memory
- pathway_recommendation — KB recommender for this sig
- neighbor_signatures — K-NN sigs weighted by staffer competence
- prior_lessons — T3 overseer lessons filtered by city/state
- top_staffers — competence-sorted leaderboard
- discovered_patterns — top workers endorsed across past playbooks
for this (role, city, state)
- latency_ms — per-source + total
Every branch is best-effort: one source down doesn't break the bundle.
HTTP ENDPOINT (mcp-server/index.ts):
POST /memory/query with body {input: <anything>} → MemoryQueryResult
Returns the same shape the TS function does. Typed with types.ts for
future UI consumption.
VERIFIED:
curl POST /memory/query with structured {role,city,state,count}
→ extraction_method=structured, 10 playbook workers, top score 0.878
curl POST /memory/query with "I need 3 welders in Nashville, TN"
→ extraction_method=regex (no LLM call), 319ms total, 8 endorsements
for Lauren Gomez auto-discovered as top Nashville Welder
Honest remaining gaps (documented for next phase):
- Mem0 ADD/UPDATE/DELETE/NOOP — we still only ADD + mark_failed
- Zep validity windows — playbook entries have timestamps but no
retirement semantic
- Letta working-memory / hot cache — every query scans all 1560
playbook entries
- Memory profiles / scoped queries — global pool, no per-staffer
private subsets
2 of 5 findings now shipped (multi-strategy retrieval in Rust, input
normalization + unified query in TS). The remaining 3 are architectural
additions queued as Phase 25 items — validity windows first since it's
the most load-bearing for long-running systems.
297 lines
13 KiB
TypeScript
297 lines
13 KiB
TypeScript
// Input normalizer — accepts whatever shape a caller provides (clean
|
|
// structured spec, partial JSON, natural language, a mix) and produces
|
|
// a canonical shape the rest of the pipeline understands. This is the
|
|
// missing piece that lets KB + playbook_memory + overseer respond
|
|
// "seamlessly with whatever input they're given" as J framed it.
|
|
//
|
|
// The pipeline already works when input is `{role, city, state, count}`;
|
|
// what breaks today is "I need three welders in Nashville Monday" —
|
|
// the SQL-filter extractors return None, the boost pipeline sees no
|
|
// role/geo signal, and retrieval silently degrades to raw cosine.
|
|
//
|
|
// Strategy: fast path for structured input, LLM fallback only for
|
|
// natural language. We don't want to burn an LLM call on "role=Welder
|
|
// city=Nashville state=TN count=3" — that's wasteful.
|
|
|
|
import { generateContinuable } from "./agent.ts";
|
|
|
|
export interface NormalizedInput {
|
|
role: string | null;
|
|
city: string | null;
|
|
state: string | null; // two-letter
|
|
count: number | null;
|
|
client: string | null;
|
|
deadline: string | null; // ISO date or null
|
|
intent: "fill" | "lookup" | "rescue" | "rebuild" | "unknown";
|
|
raw_text: string; // what the caller originally sent
|
|
confidence: "high" | "medium" | "low";
|
|
extraction_method: "structured" | "regex" | "llm" | "mixed";
|
|
missing_fields: string[]; // what normalizer couldn't determine
|
|
}
|
|
|
|
// US state abbreviations for regex city,state extraction.
|
|
const STATE_ABBRS = new Set([
|
|
"AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA",
|
|
"KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ",
|
|
"NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT",
|
|
"VA","WA","WV","WI","WY","DC",
|
|
]);
|
|
|
|
const STATE_NAMES_TO_ABBR: Record<string, string> = {
|
|
alabama: "AL", alaska: "AK", arizona: "AZ", arkansas: "AR", california: "CA",
|
|
colorado: "CO", connecticut: "CT", delaware: "DE", florida: "FL", georgia: "GA",
|
|
hawaii: "HI", idaho: "ID", illinois: "IL", indiana: "IN", iowa: "IA",
|
|
kansas: "KS", kentucky: "KY", louisiana: "LA", maine: "ME", maryland: "MD",
|
|
massachusetts: "MA", michigan: "MI", minnesota: "MN", mississippi: "MS", missouri: "MO",
|
|
montana: "MT", nebraska: "NE", nevada: "NV", "new hampshire": "NH", "new jersey": "NJ",
|
|
"new mexico": "NM", "new york": "NY", "north carolina": "NC", "north dakota": "ND",
|
|
ohio: "OH", oklahoma: "OK", oregon: "OR", pennsylvania: "PA", "rhode island": "RI",
|
|
"south carolina": "SC", "south dakota": "SD", tennessee: "TN", texas: "TX", utah: "UT",
|
|
vermont: "VT", virginia: "VA", washington: "WA", "west virginia": "WV", wisconsin: "WI",
|
|
wyoming: "WY",
|
|
};
|
|
|
|
// Canonical staffing roles — the normalizer recognizes these regardless
|
|
// of casing / plurality. Extend as domains grow.
|
|
const KNOWN_ROLES = [
|
|
"Warehouse Associate", "Machine Operator", "Forklift Operator", "Loader",
|
|
"Material Handler", "Assembler", "Quality Tech", "Picker", "Packer",
|
|
"Shipping Clerk", "Receiving Clerk", "Welder", "CNC Operator",
|
|
"Maintenance Tech", "Electrician", "Tool & Die Maker", "Safety Coordinator",
|
|
"Logistics Coordinator", "Packaging Operator", "Sanitation Worker",
|
|
"Line Lead",
|
|
];
|
|
|
|
// Intent keyword hints. Check in order — "rescue" beats generic "fill"
|
|
// for "rescue the Nashville welder fill" kind of input.
|
|
const INTENT_HINTS: Array<{ re: RegExp; intent: NormalizedInput["intent"] }> = [
|
|
{ re: /\brescue|pivot|retry|remediat/i, intent: "rescue" },
|
|
{ re: /\brebuild|reindex|re-?embed/i, intent: "rebuild" },
|
|
{ re: /\blookup|find|show me|who is|what is/i, intent: "lookup" },
|
|
{ re: /\bfill|hire|staff|need|schedule|book/i, intent: "fill" },
|
|
];
|
|
|
|
// Fast structured path — the input is already a FillEvent-shaped JSON
|
|
// or a close cousin. Returns null to fall through to regex/LLM.
|
|
function fromStructured(raw: any): NormalizedInput | null {
|
|
if (!raw || typeof raw !== "object") return null;
|
|
const role = typeof raw.role === "string" ? raw.role : null;
|
|
const city = typeof raw.city === "string" ? raw.city : null;
|
|
const stateRaw = typeof raw.state === "string" ? raw.state : null;
|
|
const count = typeof raw.count === "number" ? raw.count : null;
|
|
// Only treat as structured if AT LEAST role OR city is present; a
|
|
// bare {kind: "baseline_fill"} shouldn't go through this path.
|
|
if (!role && !city) return null;
|
|
return {
|
|
role,
|
|
city,
|
|
state: normalizeState(stateRaw),
|
|
count,
|
|
client: typeof raw.client === "string" ? raw.client : null,
|
|
deadline: typeof raw.deadline === "string" ? raw.deadline : null,
|
|
intent: (raw.kind === "misplacement" || /rescue/i.test(raw.kind ?? "")) ? "rescue" : "fill",
|
|
raw_text: JSON.stringify(raw),
|
|
confidence: "high",
|
|
extraction_method: "structured",
|
|
missing_fields: missingFields(role, city, stateRaw, count),
|
|
};
|
|
}
|
|
|
|
function normalizeState(s: string | null | undefined): string | null {
|
|
if (!s) return null;
|
|
const trimmed = s.trim();
|
|
if (STATE_ABBRS.has(trimmed.toUpperCase())) return trimmed.toUpperCase();
|
|
const full = trimmed.toLowerCase();
|
|
if (STATE_NAMES_TO_ABBR[full]) return STATE_NAMES_TO_ABBR[full];
|
|
return null;
|
|
}
|
|
|
|
function missingFields(role: any, city: any, state: any, count: any): string[] {
|
|
const out: string[] = [];
|
|
if (!role) out.push("role");
|
|
if (!city) out.push("city");
|
|
if (!state) out.push("state");
|
|
if (count === null || count === undefined) out.push("count");
|
|
return out;
|
|
}
|
|
|
|
// Regex path — handles structured-ish text like "3 welders in Nashville TN".
|
|
// Returns partial extraction; LLM path fills the rest if this leaves
|
|
// too many missing fields.
|
|
function fromRegex(text: string): NormalizedInput {
|
|
const lower = text.toLowerCase();
|
|
|
|
// Count: "3 welders" or "need 5" or "fill 8 safety coordinators"
|
|
let count: number | null = null;
|
|
const countMatch = text.match(/\b(?:need|fill|book|hire|schedule|want)\s+(\d+)\b/i)
|
|
?? text.match(/\b(\d+)\s+(?:x\s+)?[A-Za-z]/);
|
|
if (countMatch) count = parseInt(countMatch[1], 10);
|
|
|
|
// Role: longest prefix match from KNOWN_ROLES (case-insensitive).
|
|
// Plural forms ("welders") match "Welder" via endsWith("s") strip.
|
|
let role: string | null = null;
|
|
for (const known of KNOWN_ROLES.sort((a, b) => b.length - a.length)) {
|
|
const needle = known.toLowerCase();
|
|
if (lower.includes(needle)) { role = known; break; }
|
|
if (needle.endsWith("r") && lower.includes(needle + "s")) { role = known; break; }
|
|
}
|
|
|
|
// City, state: "Nashville TN" or "Nashville, Tennessee" or "Nashville, TN".
|
|
// City capture is 1-3 capitalized words — anything longer is usually
|
|
// the surrounding phrase ("Forklift Operators in Chicago"). Prefer
|
|
// "in {city}" anchor when present.
|
|
let city: string | null = null;
|
|
let state: string | null = null;
|
|
const cityPat = `([A-Z][a-zA-Z.'-]+(?:\\s+[A-Z][a-zA-Z.'-]+){0,2})`;
|
|
// First preference: "in {City}, XX" or "in {City} XX"
|
|
const inAbbrev = text.match(new RegExp(`\\bin\\s+${cityPat},?\\s+([A-Z]{2})\\b`));
|
|
if (inAbbrev && STATE_ABBRS.has(inAbbrev[2])) {
|
|
city = inAbbrev[1].trim();
|
|
state = inAbbrev[2];
|
|
} else {
|
|
// Bare "{City}, XX" (no "in" anchor)
|
|
const bareAbbrev = text.match(new RegExp(`${cityPat},\\s+([A-Z]{2})\\b`));
|
|
if (bareAbbrev && STATE_ABBRS.has(bareAbbrev[2])) {
|
|
city = bareAbbrev[1].trim();
|
|
state = bareAbbrev[2];
|
|
} else {
|
|
// "{City}, FullName" variant. Can't use case-insensitive flag
|
|
// because it would let cityPat's [A-Z] match lowercase letters.
|
|
// Instead try both exact-case and Title-case variants.
|
|
for (const [full, abbr] of Object.entries(STATE_NAMES_TO_ABBR)) {
|
|
const fullTitle = full.split(" ").map(w => w[0].toUpperCase() + w.slice(1)).join(" ");
|
|
const variants = [full, fullTitle];
|
|
for (const variant of variants) {
|
|
const re = new RegExp(`\\bin\\s+${cityPat},?\\s+${variant}\\b`);
|
|
const m = text.match(re);
|
|
if (m) { city = m[1].trim(); state = abbr; break; }
|
|
}
|
|
if (city) break;
|
|
// Bare "{City}, FullName" (no "in")
|
|
for (const variant of variants) {
|
|
const re = new RegExp(`${cityPat},\\s+${variant}\\b`);
|
|
const m = text.match(re);
|
|
if (m) { city = m[1].trim(); state = abbr; break; }
|
|
}
|
|
if (city) break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Intent
|
|
let intent: NormalizedInput["intent"] = "unknown";
|
|
for (const h of INTENT_HINTS) {
|
|
if (h.re.test(text)) { intent = h.intent; break; }
|
|
}
|
|
|
|
const missing = missingFields(role, city, state, count);
|
|
return {
|
|
role, city, state, count,
|
|
client: null,
|
|
deadline: null,
|
|
intent,
|
|
raw_text: text,
|
|
confidence: missing.length === 0 ? "high" : missing.length <= 2 ? "medium" : "low",
|
|
extraction_method: "regex",
|
|
missing_fields: missing,
|
|
};
|
|
}
|
|
|
|
// LLM path — when regex leaves too many fields missing, ask a small
|
|
// local model to extract what's present. Uses `think:false` and a
|
|
// strict JSON prompt so the call stays cheap.
|
|
async function fromLLM(text: string, fallback: NormalizedInput): Promise<NormalizedInput> {
|
|
const prompt = `Extract the staffing request shape from this text. Respond with ONLY a JSON object, no prose.
|
|
|
|
Text: ${text}
|
|
|
|
Required shape:
|
|
{
|
|
"role": "string | null (one of: Welder, Forklift Operator, Warehouse Associate, Machine Operator, Loader, Material Handler, Assembler, Quality Tech, Picker, Packer, Shipping Clerk, Receiving Clerk, CNC Operator, Maintenance Tech, Electrician, Tool & Die Maker, Safety Coordinator, Logistics Coordinator, Packaging Operator, Sanitation Worker, Line Lead, or null if unclear)",
|
|
"city": "string | null (US city name, properly capitalized)",
|
|
"state": "string | null (two-letter abbrev like TN, IL)",
|
|
"count": "number | null (integer number of workers needed)",
|
|
"client": "string | null (company/client name if mentioned)",
|
|
"deadline": "string | null (ISO date YYYY-MM-DD if a specific date is mentioned, else null)",
|
|
"intent": "string (one of: fill, lookup, rescue, rebuild, unknown)"
|
|
}
|
|
|
|
Return the JSON object now:`;
|
|
|
|
try {
|
|
const raw = await generateContinuable("qwen3:latest", prompt, {
|
|
max_tokens: 400,
|
|
shape: "json",
|
|
think: false,
|
|
max_continuations: 2,
|
|
});
|
|
const match = raw.match(/\{[\s\S]*\}/);
|
|
if (!match) return fallback;
|
|
const parsed = JSON.parse(match[0]);
|
|
const role = typeof parsed.role === "string" && parsed.role !== "null" ? parsed.role : null;
|
|
const city = typeof parsed.city === "string" && parsed.city !== "null" ? parsed.city : null;
|
|
const state = normalizeState(parsed.state);
|
|
const count = typeof parsed.count === "number" ? parsed.count
|
|
: typeof parsed.count === "string" ? parseInt(parsed.count, 10) || null
|
|
: null;
|
|
const client = typeof parsed.client === "string" && parsed.client !== "null" ? parsed.client : null;
|
|
const deadline = typeof parsed.deadline === "string" && parsed.deadline !== "null" ? parsed.deadline : null;
|
|
const intent = ["fill", "lookup", "rescue", "rebuild", "unknown"].includes(parsed.intent)
|
|
? parsed.intent as NormalizedInput["intent"]
|
|
: "unknown";
|
|
const missing = missingFields(role, city, state, count);
|
|
return {
|
|
role: role ?? fallback.role,
|
|
city: city ?? fallback.city,
|
|
state: state ?? fallback.state,
|
|
count: count ?? fallback.count,
|
|
client: client ?? fallback.client,
|
|
deadline: deadline ?? fallback.deadline,
|
|
intent: intent !== "unknown" ? intent : fallback.intent,
|
|
raw_text: text,
|
|
confidence: missing.length === 0 ? "high" : missing.length <= 2 ? "medium" : "low",
|
|
extraction_method: "llm",
|
|
missing_fields: missing,
|
|
};
|
|
} catch {
|
|
return fallback;
|
|
}
|
|
}
|
|
|
|
// Top-level normalizer. Auto-detects input shape:
|
|
// - Object with role/city → fast structured path
|
|
// - String with enough regex signal → regex path
|
|
// - Low-signal string → LLM path with regex seed
|
|
export async function normalizeInput(raw: unknown): Promise<NormalizedInput> {
|
|
// Structured path
|
|
if (raw && typeof raw === "object") {
|
|
const s = fromStructured(raw);
|
|
if (s) return s;
|
|
}
|
|
// String path
|
|
const text = typeof raw === "string" ? raw
|
|
: raw && typeof raw === "object" ? JSON.stringify(raw)
|
|
: String(raw);
|
|
const reg = fromRegex(text);
|
|
// If regex got at least role + city, trust it.
|
|
if (reg.role && reg.city) {
|
|
return reg;
|
|
}
|
|
// Otherwise try LLM fallback, using regex result as seed.
|
|
return await fromLLM(text, reg);
|
|
}
|
|
|
|
// Synchronous-only variant — returns whatever regex/structured can get
|
|
// without an LLM call. Useful when caller can't await (e.g. a prompt
|
|
// template).
|
|
export function normalizeInputSync(raw: unknown): NormalizedInput {
|
|
if (raw && typeof raw === "object") {
|
|
const s = fromStructured(raw);
|
|
if (s) return s;
|
|
}
|
|
const text = typeof raw === "string" ? raw
|
|
: raw && typeof raw === "object" ? JSON.stringify(raw)
|
|
: String(raw);
|
|
return fromRegex(text);
|
|
}
|