T3 overview tier — mid-day checkpoints + cross-day lesson

Hot path (T1/T2) stays mistral + qwen2.5. The new T3 tier runs a
thinking model SPARINGLY — after every misplacement, every N-th event
(default N=3), and once post-scenario for the cross-day lesson.

- agent.ts: generateCloud() for Ollama Cloud (gpt-oss:120b etc). Uses
  the same /api/generate shape; thinking field is discarded.
- scenario.ts: runOverviewCheckpoint + runCrossDayLesson. Outputs land
  in checkpoints.jsonl and lesson.md. Lesson also seeds playbook_memory
  under operation "cross-day-lesson-{date}" — future runs pick it up
  through the existing similarity boost.
- Env knobs: LH_OVERVIEW_CLOUD=1 routes T3 to cloud, LH_OVERVIEW_MODEL
  overrides (default gpt-oss:20b local, gpt-oss:120b cloud),
  LH_T3_CHECKPOINT_EVERY controls cadence, LH_T3_DISABLE=1 turns it off.

Why this shape: prior feedback_phase19_seed_text.md warned that verbose
seeds dilute the embedding and silently kill the boost. T3's rich prose
goes to lesson.md; the embedded "approach" + "context" stay terse.

Verified end-to-end: local 20b checkpoint 10.9s, lesson 4.0s; cloud
120b lesson 3.7s. Cloud output is both faster AND more specific than
local (sequenced, tactical, logging advice included).
This commit is contained in:
root 2026-04-20 19:21:45 -05:00
parent 0ff091c173
commit e4ae5b646e
2 changed files with 238 additions and 2 deletions

View File

@ -9,6 +9,12 @@
export const GATEWAY = "http://localhost:3100";
export const SIDECAR = "http://localhost:3200";
// Ollama Cloud — used for the T3 overview tier when LH_OVERVIEW_CLOUD=1.
// Same /api/generate surface as local Ollama; just needs the bearer key.
// Default base and key are read from env so secrets never land in git.
export const OLLAMA_CLOUD_URL = process.env.OLLAMA_CLOUD_URL ?? "https://ollama.com";
export const OLLAMA_CLOUD_KEY = process.env.OLLAMA_CLOUD_KEY ?? "";
// --- Shared types ---
export type Role = "executor" | "reviewer";
@ -111,6 +117,48 @@ export async function generate(model: string, prompt: string, opts: {
return text;
}
// Cloud generate — hits Ollama Cloud directly with the bearer key. Same
// /api/generate shape as local Ollama; `thinking` field (for gpt-oss:Nb)
// is discarded, only `response` is returned. Caller should budget
// num_predict ≥ 400 so thinking-model reasoning has room before the
// visible response starts.
export async function generateCloud(model: string, prompt: string, opts: {
max_tokens?: number;
temperature?: number;
system?: string;
} = {}): Promise<string> {
if (!OLLAMA_CLOUD_KEY) {
throw new Error("OLLAMA_CLOUD_KEY not set; cannot reach Ollama Cloud");
}
const body: Record<string, any> = {
model,
prompt,
stream: false,
options: {
temperature: opts.temperature ?? 0.3,
num_predict: Math.max(opts.max_tokens ?? 800, 400),
},
};
if (opts.system) body.system = opts.system;
const resp = await fetch(`${OLLAMA_CLOUD_URL}/api/generate`, {
method: "POST",
headers: {
"Authorization": `Bearer ${OLLAMA_CLOUD_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
});
if (!resp.ok) {
throw new Error(`Ollama Cloud ${resp.status}: ${await resp.text().catch(() => "?")}`);
}
const data: any = await resp.json();
const text = data.response ?? "";
if (!text) {
throw new Error(`Ollama Cloud returned empty response for ${model}: ${JSON.stringify(data).slice(0, 200)}`);
}
return text;
}
// --- Prompt construction ---
const TOOL_CATALOG = `

View File

@ -28,6 +28,7 @@ import {
hybridSearch,
sqlQuery,
generate,
generateCloud,
parseAction,
executorPrompt,
reviewerPrompt,
@ -46,6 +47,24 @@ import { join } from "node:path";
const EXECUTOR_MODEL = "mistral:latest";
const REVIEWER_MODEL = "qwen2.5:latest";
const DRAFT_MODEL = "qwen2.5:latest"; // artifact generation; short outputs
// T3 overview tier. Called sparingly — NOT per tool call. Two insertion
// points: (B) mid-scenario checkpoint after every misplacement event and
// every N events, and (A) cross-day lesson after all events complete.
// gpt-oss:20b is a thinking model: it spends tokens in a hidden reasoning
// block before emitting `response`. Budget accordingly — never under 400.
const OVERVIEW_CLOUD = process.env.LH_OVERVIEW_CLOUD === "1";
const OVERVIEW_MODEL = process.env.LH_OVERVIEW_MODEL ?? (OVERVIEW_CLOUD ? "gpt-oss:120b" : "gpt-oss:20b");
const T3_CHECKPOINT_EVERY = Number(process.env.LH_T3_CHECKPOINT_EVERY ?? 3);
const T3_DISABLED = process.env.LH_T3_DISABLE === "1";
// Dispatcher: route T3 calls to local sidecar or Ollama Cloud depending
// on the LH_OVERVIEW_CLOUD flag. Hot-path T1/T2 always stay local.
async function overviewGenerate(prompt: string, opts: { temperature?: number; max_tokens?: number } = {}): Promise<string> {
if (OVERVIEW_CLOUD) return generateCloud(OVERVIEW_MODEL, prompt, opts);
return generate(OVERVIEW_MODEL, prompt, opts);
}
const MAX_TURNS = 14;
const MAX_CONSECUTIVE_DRIFTS = 3;
const WORKERS_INDEX = "workers_500k_v1";
@ -723,13 +742,122 @@ async function resolveWorkerIds(fills: Fill[], event: FillEvent): Promise<Fill[]
return resolved;
}
// =================== T3 overview tier ===================
// Called sparingly so reasoning overhead stays amortized.
// (B) Checkpoint — after every misplacement AND every N-th event.
// (A) Cross-day lesson — once at end of scenario.
// Results land in `checkpoints.jsonl` and `lesson.md`, and the lesson
// seeds playbook_memory under operation "cross-day-lesson-{date}" so
// future scenarios can surface it on similar setups.
interface OverviewCheckpoint {
after_event: string; // event.at label
event_kind: EventKind;
ok: boolean;
model: string;
duration_secs: number;
hint: string; // T3's "what to do differently next time"
risk: string; // T3's named risk flag
}
async function runOverviewCheckpoint(
event: FillEvent,
result: EventResult,
prior: EventResult[],
): Promise<OverviewCheckpoint | null> {
if (T3_DISABLED) return null;
const start = Date.now();
const priorSummary = prior.slice(-3).map(p =>
`- ${p.event.at} ${p.event.kind} ${p.event.role}×${p.event.count}${p.ok ? p.fills.length + "/" + p.event.count + " filled" : "FAIL"}; pool=${p.pool_size ?? "?"}; cites=${p.playbook_citations?.length ?? 0}`
).join("\n");
const thisOne = `This event: ${event.at} ${event.kind} ${event.role}×${event.count} in ${event.city}, ${event.state}. `
+ `Outcome: ${result.ok ? "filled " + result.fills.length + "/" + event.count : "FAILED: " + (result.error ?? "unknown")}. `
+ `Pool size: ${result.pool_size ?? "n/a"}. Turns: ${result.turns}. Playbook citations: ${result.playbook_citations?.length ?? 0}. `
+ `Gap signals: ${result.gap_signals.join("; ") || "none"}.`;
const prompt = `You are the overview reviewer for a staffing coordinator agent system. A mid-day checkpoint has been triggered.
Recent events (most recent last):
${priorSummary || "(no prior events)"}
${thisOne}
Your job: emit ONE risk flag (6 words) and ONE actionable hint (25 words) for the NEXT event. Be concrete: name the role, city, or worker class if relevant. Do not restate what happened. Think step by step, then output strictly as:
RISK: <flag>
HINT: <hint>`;
let text = "";
try {
text = await overviewGenerate(prompt, { temperature: 0.2, max_tokens: 600 });
} catch (e) {
return {
after_event: event.at,
event_kind: event.kind,
ok: false,
model: OVERVIEW_MODEL,
duration_secs: (Date.now() - start) / 1000,
hint: "(T3 unavailable)",
risk: (e as Error).message.slice(0, 80),
};
}
const riskMatch = text.match(/RISK:\s*(.+)/i);
const hintMatch = text.match(/HINT:\s*(.+)/i);
return {
after_event: event.at,
event_kind: event.kind,
ok: Boolean(riskMatch && hintMatch),
model: OVERVIEW_MODEL,
duration_secs: (Date.now() - start) / 1000,
risk: (riskMatch?.[1] ?? "(unparsed)").trim().slice(0, 120),
hint: (hintMatch?.[1] ?? text).trim().slice(0, 400),
};
}
async function runCrossDayLesson(ctx: ScenarioContext, checkpoints: OverviewCheckpoint[]): Promise<string | null> {
if (T3_DISABLED) return null;
const eventDigest = ctx.results.map(r =>
`- ${r.event.at} ${r.event.kind} ${r.event.role}×${r.event.count} ${r.event.city},${r.event.state}${r.ok ? r.fills.length + " filled" : "FAIL"}; pool=${r.pool_size ?? "?"}; turns=${r.turns}; cites=${r.playbook_citations?.length ?? 0}; gaps=${r.gap_signals.length}`
).join("\n");
const checkpointDigest = checkpoints.length > 0
? checkpoints.map(c => `- after ${c.after_event} (${c.event_kind}): risk="${c.risk}" hint="${c.hint}"`).join("\n")
: "(no mid-day checkpoints)";
const prompt = `You are the end-of-day lesson writer for a staffing coordinator agent system. The day is done. Distill it.
Client: ${ctx.spec.client} Date: ${ctx.spec.date}
Events that ran:
${eventDigest}
Mid-day checkpoints:
${checkpointDigest}
Your job: write ONE actionable lesson for future runs that face similar setups. Target audience: the agent tomorrow. Keep the lesson to 3-5 sentences. No filler, no restating. Think step by step about what pattern repeated, what to pre-fetch, or what to avoid then write the lesson as plain prose.
LESSON:`;
try {
const text = await overviewGenerate(prompt, { temperature: 0.2, max_tokens: 900 });
const m = text.match(/LESSON:\s*([\s\S]+)/i);
return (m ? m[1] : text).trim();
} catch (e) {
return `(T3 lesson unavailable: ${(e as Error).message})`;
}
}
// =================== EOD gap report ===================
async function writeRetrospective(ctx: ScenarioContext): Promise<void> {
const lines: string[] = [];
lines.push(`# Scenario retrospective — ${ctx.spec.client}, ${ctx.spec.date}`);
lines.push("");
lines.push(`Executor: \`${EXECUTOR_MODEL}\` Reviewer: \`${REVIEWER_MODEL}\` Draft: \`${DRAFT_MODEL}\``);
lines.push(`Executor: \`${EXECUTOR_MODEL}\` Reviewer: \`${REVIEWER_MODEL}\` Draft: \`${DRAFT_MODEL}\` Overview(T3): \`${T3_DISABLED ? "disabled" : OVERVIEW_MODEL}\``);
lines.push("");
// --- Per-event summary ---
@ -882,11 +1010,16 @@ async function main() {
await writeFile(join(out_dir, "sms.md"), `# SMS drafts — ${spec.client}, ${spec.date}\n`);
await writeFile(join(out_dir, "emails.md"), `# Client emails — ${spec.client}, ${spec.date}\n`);
await writeFile(join(out_dir, "dispatch.jsonl"), "");
await writeFile(join(out_dir, "checkpoints.jsonl"), "");
const checkpoints: OverviewCheckpoint[] = [];
console.log(`▶ scenario: ${spec.client}, ${spec.date}, ${spec.events.length} events`);
console.log(`▶ models: exec=${EXECUTOR_MODEL} review=${REVIEWER_MODEL} overview=${T3_DISABLED ? "disabled" : OVERVIEW_MODEL + (OVERVIEW_CLOUD ? " (cloud)" : "")}`);
console.log(`▶ out: ${out_dir}\n`);
for (const event of spec.events) {
for (let i = 0; i < spec.events.length; i++) {
const event = spec.events[i];
// Expand misplacement-style exclusions from the current roster: it
// wants to replace a worker from a prior event, so grab everyone
// booked at that at-label and add as exclusions.
@ -916,6 +1049,19 @@ async function main() {
ctx.gap_signals.push({ event: event.at, category: category.trim(), detail: rest.join(":").trim() });
}
// Option B — T3 checkpoint after every misplacement, and every N-th event.
const isLast = i === spec.events.length - 1;
const nthHit = T3_CHECKPOINT_EVERY > 0 && ((i + 1) % T3_CHECKPOINT_EVERY === 0);
const shouldCheckpoint = !T3_DISABLED && (event.kind === "misplacement" || nthHit || isLast);
if (shouldCheckpoint) {
const cp = await runOverviewCheckpoint(event, result, ctx.results.slice(0, -1));
if (cp) {
checkpoints.push(cp);
await appendFile(join(out_dir, "checkpoints.jsonl"), JSON.stringify(cp) + "\n");
console.log(` T3 checkpoint (${cp.duration_secs.toFixed(1)}s): risk="${cp.risk}" hint="${cp.hint.slice(0, 80)}${cp.hint.length > 80 ? "…" : ""}"`);
}
}
// Small breather to not hammer Ollama on back-to-back runs.
await new Promise(r => setTimeout(r, 500));
}
@ -924,6 +1070,48 @@ async function main() {
await writeFile(join(out_dir, "roster.json"), JSON.stringify(ctx.roster, null, 2));
await writeFile(join(out_dir, "results.json"), JSON.stringify(ctx.results, null, 2));
// Option A — T3 cross-day lesson. One final call distills the whole run.
// Saved to lesson.md and also seeded into playbook_memory so tomorrow's
// agent can retrieve it on similar setups.
if (!T3_DISABLED) {
console.log(`\n▶ T3 cross-day lesson via ${OVERVIEW_MODEL}`);
const tLesson = Date.now();
const lesson = await runCrossDayLesson(ctx, checkpoints);
const lessonSecs = ((Date.now() - tLesson) / 1000).toFixed(1);
if (lesson) {
await writeFile(
join(out_dir, "lesson.md"),
`# Cross-day lesson — ${ctx.spec.client}, ${ctx.spec.date}\n\n`
+ `_Generated by \`${OVERVIEW_MODEL}\` in ${lessonSecs}s. `
+ `Based on ${ctx.results.length} events + ${checkpoints.length} mid-day checkpoints._\n\n`
+ lesson + "\n"
);
console.log(`✓ lesson (${lessonSecs}s) → ${join(out_dir, "lesson.md")}`);
// Seed the lesson into playbook_memory for future retrieval. Keep
// the embedded `approach` + `context` terse per feedback_phase19_seed_text.md;
// the rich prose lives in lesson.md and a separate `rationale` field.
try {
const kinds = [...new Set(ctx.spec.events.map(e => e.kind))].join("+");
const cities = [...new Set(ctx.spec.events.map(e => e.city))].slice(0, 3).join(",");
await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
operation: `cross-day-lesson-${ctx.spec.date}`,
approach: `${kinds} day in ${cities}`,
context: `${ctx.spec.client} ${ctx.spec.date}`,
rationale: lesson.slice(0, 2000),
endorsed_names: [],
append: true,
}),
});
} catch (e) {
console.log(` (lesson seed skipped: ${(e as Error).message})`);
}
}
}
await writeRetrospective(ctx);
const okCount = ctx.results.filter(r => r.ok).length;