T3 overview tier — mid-day checkpoints + cross-day lesson
Hot path (T1/T2) stays mistral + qwen2.5. The new T3 tier runs a
thinking model SPARINGLY — after every misplacement, every N-th event
(default N=3), and once post-scenario for the cross-day lesson.
- agent.ts: generateCloud() for Ollama Cloud (gpt-oss:120b etc). Uses
the same /api/generate shape; thinking field is discarded.
- scenario.ts: runOverviewCheckpoint + runCrossDayLesson. Outputs land
in checkpoints.jsonl and lesson.md. Lesson also seeds playbook_memory
under operation "cross-day-lesson-{date}" — future runs pick it up
through the existing similarity boost.
- Env knobs: LH_OVERVIEW_CLOUD=1 routes T3 to cloud, LH_OVERVIEW_MODEL
overrides (default gpt-oss:20b local, gpt-oss:120b cloud),
LH_T3_CHECKPOINT_EVERY controls cadence, LH_T3_DISABLE=1 turns it off.
Why this shape: prior feedback_phase19_seed_text.md warned that verbose
seeds dilute the embedding and silently kill the boost. T3's rich prose
goes to lesson.md; the embedded "approach" + "context" stay terse.
Verified end-to-end: local 20b checkpoint 10.9s, lesson 4.0s; cloud
120b lesson 3.7s. Cloud output is both faster AND more specific than
local (sequenced, tactical, logging advice included).
This commit is contained in:
parent
0ff091c173
commit
e4ae5b646e
@ -9,6 +9,12 @@
|
||||
export const GATEWAY = "http://localhost:3100";
|
||||
export const SIDECAR = "http://localhost:3200";
|
||||
|
||||
// Ollama Cloud — used for the T3 overview tier when LH_OVERVIEW_CLOUD=1.
|
||||
// Same /api/generate surface as local Ollama; just needs the bearer key.
|
||||
// Default base and key are read from env so secrets never land in git.
|
||||
export const OLLAMA_CLOUD_URL = process.env.OLLAMA_CLOUD_URL ?? "https://ollama.com";
|
||||
export const OLLAMA_CLOUD_KEY = process.env.OLLAMA_CLOUD_KEY ?? "";
|
||||
|
||||
// --- Shared types ---
|
||||
|
||||
export type Role = "executor" | "reviewer";
|
||||
@ -111,6 +117,48 @@ export async function generate(model: string, prompt: string, opts: {
|
||||
return text;
|
||||
}
|
||||
|
||||
// Cloud generate — hits Ollama Cloud directly with the bearer key. Same
|
||||
// /api/generate shape as local Ollama; `thinking` field (for gpt-oss:Nb)
|
||||
// is discarded, only `response` is returned. Caller should budget
|
||||
// num_predict ≥ 400 so thinking-model reasoning has room before the
|
||||
// visible response starts.
|
||||
export async function generateCloud(model: string, prompt: string, opts: {
|
||||
max_tokens?: number;
|
||||
temperature?: number;
|
||||
system?: string;
|
||||
} = {}): Promise<string> {
|
||||
if (!OLLAMA_CLOUD_KEY) {
|
||||
throw new Error("OLLAMA_CLOUD_KEY not set; cannot reach Ollama Cloud");
|
||||
}
|
||||
const body: Record<string, any> = {
|
||||
model,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: opts.temperature ?? 0.3,
|
||||
num_predict: Math.max(opts.max_tokens ?? 800, 400),
|
||||
},
|
||||
};
|
||||
if (opts.system) body.system = opts.system;
|
||||
const resp = await fetch(`${OLLAMA_CLOUD_URL}/api/generate`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Authorization": `Bearer ${OLLAMA_CLOUD_KEY}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
throw new Error(`Ollama Cloud ${resp.status}: ${await resp.text().catch(() => "?")}`);
|
||||
}
|
||||
const data: any = await resp.json();
|
||||
const text = data.response ?? "";
|
||||
if (!text) {
|
||||
throw new Error(`Ollama Cloud returned empty response for ${model}: ${JSON.stringify(data).slice(0, 200)}`);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
// --- Prompt construction ---
|
||||
|
||||
const TOOL_CATALOG = `
|
||||
|
||||
@ -28,6 +28,7 @@ import {
|
||||
hybridSearch,
|
||||
sqlQuery,
|
||||
generate,
|
||||
generateCloud,
|
||||
parseAction,
|
||||
executorPrompt,
|
||||
reviewerPrompt,
|
||||
@ -46,6 +47,24 @@ import { join } from "node:path";
|
||||
const EXECUTOR_MODEL = "mistral:latest";
|
||||
const REVIEWER_MODEL = "qwen2.5:latest";
|
||||
const DRAFT_MODEL = "qwen2.5:latest"; // artifact generation; short outputs
|
||||
|
||||
// T3 overview tier. Called sparingly — NOT per tool call. Two insertion
|
||||
// points: (B) mid-scenario checkpoint after every misplacement event and
|
||||
// every N events, and (A) cross-day lesson after all events complete.
|
||||
// gpt-oss:20b is a thinking model: it spends tokens in a hidden reasoning
|
||||
// block before emitting `response`. Budget accordingly — never under 400.
|
||||
const OVERVIEW_CLOUD = process.env.LH_OVERVIEW_CLOUD === "1";
|
||||
const OVERVIEW_MODEL = process.env.LH_OVERVIEW_MODEL ?? (OVERVIEW_CLOUD ? "gpt-oss:120b" : "gpt-oss:20b");
|
||||
const T3_CHECKPOINT_EVERY = Number(process.env.LH_T3_CHECKPOINT_EVERY ?? 3);
|
||||
const T3_DISABLED = process.env.LH_T3_DISABLE === "1";
|
||||
|
||||
// Dispatcher: route T3 calls to local sidecar or Ollama Cloud depending
|
||||
// on the LH_OVERVIEW_CLOUD flag. Hot-path T1/T2 always stay local.
|
||||
async function overviewGenerate(prompt: string, opts: { temperature?: number; max_tokens?: number } = {}): Promise<string> {
|
||||
if (OVERVIEW_CLOUD) return generateCloud(OVERVIEW_MODEL, prompt, opts);
|
||||
return generate(OVERVIEW_MODEL, prompt, opts);
|
||||
}
|
||||
|
||||
const MAX_TURNS = 14;
|
||||
const MAX_CONSECUTIVE_DRIFTS = 3;
|
||||
const WORKERS_INDEX = "workers_500k_v1";
|
||||
@ -723,13 +742,122 @@ async function resolveWorkerIds(fills: Fill[], event: FillEvent): Promise<Fill[]
|
||||
return resolved;
|
||||
}
|
||||
|
||||
// =================== T3 overview tier ===================
|
||||
// Called sparingly so reasoning overhead stays amortized.
|
||||
// (B) Checkpoint — after every misplacement AND every N-th event.
|
||||
// (A) Cross-day lesson — once at end of scenario.
|
||||
// Results land in `checkpoints.jsonl` and `lesson.md`, and the lesson
|
||||
// seeds playbook_memory under operation "cross-day-lesson-{date}" so
|
||||
// future scenarios can surface it on similar setups.
|
||||
|
||||
interface OverviewCheckpoint {
|
||||
after_event: string; // event.at label
|
||||
event_kind: EventKind;
|
||||
ok: boolean;
|
||||
model: string;
|
||||
duration_secs: number;
|
||||
hint: string; // T3's "what to do differently next time"
|
||||
risk: string; // T3's named risk flag
|
||||
}
|
||||
|
||||
async function runOverviewCheckpoint(
|
||||
event: FillEvent,
|
||||
result: EventResult,
|
||||
prior: EventResult[],
|
||||
): Promise<OverviewCheckpoint | null> {
|
||||
if (T3_DISABLED) return null;
|
||||
const start = Date.now();
|
||||
|
||||
const priorSummary = prior.slice(-3).map(p =>
|
||||
`- ${p.event.at} ${p.event.kind} ${p.event.role}×${p.event.count} → ${p.ok ? p.fills.length + "/" + p.event.count + " filled" : "FAIL"}; pool=${p.pool_size ?? "?"}; cites=${p.playbook_citations?.length ?? 0}`
|
||||
).join("\n");
|
||||
|
||||
const thisOne = `This event: ${event.at} ${event.kind} ${event.role}×${event.count} in ${event.city}, ${event.state}. `
|
||||
+ `Outcome: ${result.ok ? "filled " + result.fills.length + "/" + event.count : "FAILED: " + (result.error ?? "unknown")}. `
|
||||
+ `Pool size: ${result.pool_size ?? "n/a"}. Turns: ${result.turns}. Playbook citations: ${result.playbook_citations?.length ?? 0}. `
|
||||
+ `Gap signals: ${result.gap_signals.join("; ") || "none"}.`;
|
||||
|
||||
const prompt = `You are the overview reviewer for a staffing coordinator agent system. A mid-day checkpoint has been triggered.
|
||||
|
||||
Recent events (most recent last):
|
||||
${priorSummary || "(no prior events)"}
|
||||
|
||||
${thisOne}
|
||||
|
||||
Your job: emit ONE risk flag (≤6 words) and ONE actionable hint (≤25 words) for the NEXT event. Be concrete: name the role, city, or worker class if relevant. Do not restate what happened. Think step by step, then output strictly as:
|
||||
|
||||
RISK: <flag>
|
||||
HINT: <hint>`;
|
||||
|
||||
let text = "";
|
||||
try {
|
||||
text = await overviewGenerate(prompt, { temperature: 0.2, max_tokens: 600 });
|
||||
} catch (e) {
|
||||
return {
|
||||
after_event: event.at,
|
||||
event_kind: event.kind,
|
||||
ok: false,
|
||||
model: OVERVIEW_MODEL,
|
||||
duration_secs: (Date.now() - start) / 1000,
|
||||
hint: "(T3 unavailable)",
|
||||
risk: (e as Error).message.slice(0, 80),
|
||||
};
|
||||
}
|
||||
|
||||
const riskMatch = text.match(/RISK:\s*(.+)/i);
|
||||
const hintMatch = text.match(/HINT:\s*(.+)/i);
|
||||
return {
|
||||
after_event: event.at,
|
||||
event_kind: event.kind,
|
||||
ok: Boolean(riskMatch && hintMatch),
|
||||
model: OVERVIEW_MODEL,
|
||||
duration_secs: (Date.now() - start) / 1000,
|
||||
risk: (riskMatch?.[1] ?? "(unparsed)").trim().slice(0, 120),
|
||||
hint: (hintMatch?.[1] ?? text).trim().slice(0, 400),
|
||||
};
|
||||
}
|
||||
|
||||
async function runCrossDayLesson(ctx: ScenarioContext, checkpoints: OverviewCheckpoint[]): Promise<string | null> {
|
||||
if (T3_DISABLED) return null;
|
||||
|
||||
const eventDigest = ctx.results.map(r =>
|
||||
`- ${r.event.at} ${r.event.kind} ${r.event.role}×${r.event.count} ${r.event.city},${r.event.state} → ${r.ok ? r.fills.length + " filled" : "FAIL"}; pool=${r.pool_size ?? "?"}; turns=${r.turns}; cites=${r.playbook_citations?.length ?? 0}; gaps=${r.gap_signals.length}`
|
||||
).join("\n");
|
||||
|
||||
const checkpointDigest = checkpoints.length > 0
|
||||
? checkpoints.map(c => `- after ${c.after_event} (${c.event_kind}): risk="${c.risk}" hint="${c.hint}"`).join("\n")
|
||||
: "(no mid-day checkpoints)";
|
||||
|
||||
const prompt = `You are the end-of-day lesson writer for a staffing coordinator agent system. The day is done. Distill it.
|
||||
|
||||
Client: ${ctx.spec.client} Date: ${ctx.spec.date}
|
||||
|
||||
Events that ran:
|
||||
${eventDigest}
|
||||
|
||||
Mid-day checkpoints:
|
||||
${checkpointDigest}
|
||||
|
||||
Your job: write ONE actionable lesson for future runs that face similar setups. Target audience: the agent tomorrow. Keep the lesson to 3-5 sentences. No filler, no restating. Think step by step about what pattern repeated, what to pre-fetch, or what to avoid — then write the lesson as plain prose.
|
||||
|
||||
LESSON:`;
|
||||
|
||||
try {
|
||||
const text = await overviewGenerate(prompt, { temperature: 0.2, max_tokens: 900 });
|
||||
const m = text.match(/LESSON:\s*([\s\S]+)/i);
|
||||
return (m ? m[1] : text).trim();
|
||||
} catch (e) {
|
||||
return `(T3 lesson unavailable: ${(e as Error).message})`;
|
||||
}
|
||||
}
|
||||
|
||||
// =================== EOD gap report ===================
|
||||
|
||||
async function writeRetrospective(ctx: ScenarioContext): Promise<void> {
|
||||
const lines: string[] = [];
|
||||
lines.push(`# Scenario retrospective — ${ctx.spec.client}, ${ctx.spec.date}`);
|
||||
lines.push("");
|
||||
lines.push(`Executor: \`${EXECUTOR_MODEL}\` Reviewer: \`${REVIEWER_MODEL}\` Draft: \`${DRAFT_MODEL}\``);
|
||||
lines.push(`Executor: \`${EXECUTOR_MODEL}\` Reviewer: \`${REVIEWER_MODEL}\` Draft: \`${DRAFT_MODEL}\` Overview(T3): \`${T3_DISABLED ? "disabled" : OVERVIEW_MODEL}\``);
|
||||
lines.push("");
|
||||
|
||||
// --- Per-event summary ---
|
||||
@ -882,11 +1010,16 @@ async function main() {
|
||||
await writeFile(join(out_dir, "sms.md"), `# SMS drafts — ${spec.client}, ${spec.date}\n`);
|
||||
await writeFile(join(out_dir, "emails.md"), `# Client emails — ${spec.client}, ${spec.date}\n`);
|
||||
await writeFile(join(out_dir, "dispatch.jsonl"), "");
|
||||
await writeFile(join(out_dir, "checkpoints.jsonl"), "");
|
||||
|
||||
const checkpoints: OverviewCheckpoint[] = [];
|
||||
|
||||
console.log(`▶ scenario: ${spec.client}, ${spec.date}, ${spec.events.length} events`);
|
||||
console.log(`▶ models: exec=${EXECUTOR_MODEL} review=${REVIEWER_MODEL} overview=${T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (OVERVIEW_CLOUD ? " (cloud)" : "")}`);
|
||||
console.log(`▶ out: ${out_dir}\n`);
|
||||
|
||||
for (const event of spec.events) {
|
||||
for (let i = 0; i < spec.events.length; i++) {
|
||||
const event = spec.events[i];
|
||||
// Expand misplacement-style exclusions from the current roster: it
|
||||
// wants to replace a worker from a prior event, so grab everyone
|
||||
// booked at that at-label and add as exclusions.
|
||||
@ -916,6 +1049,19 @@ async function main() {
|
||||
ctx.gap_signals.push({ event: event.at, category: category.trim(), detail: rest.join(":").trim() });
|
||||
}
|
||||
|
||||
// Option B — T3 checkpoint after every misplacement, and every N-th event.
|
||||
const isLast = i === spec.events.length - 1;
|
||||
const nthHit = T3_CHECKPOINT_EVERY > 0 && ((i + 1) % T3_CHECKPOINT_EVERY === 0);
|
||||
const shouldCheckpoint = !T3_DISABLED && (event.kind === "misplacement" || nthHit || isLast);
|
||||
if (shouldCheckpoint) {
|
||||
const cp = await runOverviewCheckpoint(event, result, ctx.results.slice(0, -1));
|
||||
if (cp) {
|
||||
checkpoints.push(cp);
|
||||
await appendFile(join(out_dir, "checkpoints.jsonl"), JSON.stringify(cp) + "\n");
|
||||
console.log(` T3 checkpoint (${cp.duration_secs.toFixed(1)}s): risk="${cp.risk}" hint="${cp.hint.slice(0, 80)}${cp.hint.length > 80 ? "…" : ""}"`);
|
||||
}
|
||||
}
|
||||
|
||||
// Small breather to not hammer Ollama on back-to-back runs.
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
@ -924,6 +1070,48 @@ async function main() {
|
||||
await writeFile(join(out_dir, "roster.json"), JSON.stringify(ctx.roster, null, 2));
|
||||
await writeFile(join(out_dir, "results.json"), JSON.stringify(ctx.results, null, 2));
|
||||
|
||||
// Option A — T3 cross-day lesson. One final call distills the whole run.
|
||||
// Saved to lesson.md and also seeded into playbook_memory so tomorrow's
|
||||
// agent can retrieve it on similar setups.
|
||||
if (!T3_DISABLED) {
|
||||
console.log(`\n▶ T3 cross-day lesson via ${OVERVIEW_MODEL}…`);
|
||||
const tLesson = Date.now();
|
||||
const lesson = await runCrossDayLesson(ctx, checkpoints);
|
||||
const lessonSecs = ((Date.now() - tLesson) / 1000).toFixed(1);
|
||||
if (lesson) {
|
||||
await writeFile(
|
||||
join(out_dir, "lesson.md"),
|
||||
`# Cross-day lesson — ${ctx.spec.client}, ${ctx.spec.date}\n\n`
|
||||
+ `_Generated by \`${OVERVIEW_MODEL}\` in ${lessonSecs}s. `
|
||||
+ `Based on ${ctx.results.length} events + ${checkpoints.length} mid-day checkpoints._\n\n`
|
||||
+ lesson + "\n"
|
||||
);
|
||||
console.log(`✓ lesson (${lessonSecs}s) → ${join(out_dir, "lesson.md")}`);
|
||||
|
||||
// Seed the lesson into playbook_memory for future retrieval. Keep
|
||||
// the embedded `approach` + `context` terse per feedback_phase19_seed_text.md;
|
||||
// the rich prose lives in lesson.md and a separate `rationale` field.
|
||||
try {
|
||||
const kinds = [...new Set(ctx.spec.events.map(e => e.kind))].join("+");
|
||||
const cities = [...new Set(ctx.spec.events.map(e => e.city))].slice(0, 3).join(",");
|
||||
await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
operation: `cross-day-lesson-${ctx.spec.date}`,
|
||||
approach: `${kinds} day in ${cities}`,
|
||||
context: `${ctx.spec.client} ${ctx.spec.date}`,
|
||||
rationale: lesson.slice(0, 2000),
|
||||
endorsed_names: [],
|
||||
append: true,
|
||||
}),
|
||||
});
|
||||
} catch (e) {
|
||||
console.log(` (lesson seed skipped: ${(e as Error).message})`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await writeRetrospective(ctx);
|
||||
|
||||
const okCount = ctx.results.filter(r => r.ok).length;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user