Three coupled fixes that together turned the Riverfront Steel scenario
from 0/5 (mistral) to 4/5 (qwen3.5) with T3 flagging real staffing
concerns rather than linter advice.
MODEL SWAP
- Executor: mistral → qwen3.5:latest (9.7B, 262K ctx, thinking).
mistral's decoder emitted malformed JSON on complex SQL filters
regardless of prompt; J called it — stop using mistral.
- Reviewer: qwen2.5 → qwen3:latest (40K ctx)
- Applied to scenario.ts, orchestrator.ts, network_proving.ts,
run_e2e_rated.ts
CONTINUATION PRIMITIVE (agent.ts)
- generateContinuable(): empty-response → geometric backoff retry;
truncated-JSON → continue from partial as scratchpad; bounded by
budget cap + max_continuations. No more "bump max_tokens until it
stops truncating" tourniquet.
- generateTreeSplit(): map-reduce for oversized input corpora with
running scratchpad digest, reduce pass for final synthesis.
- Empty text no longer throws — it's a signal to continuable that
thinking ate the budget.
think:false FOR HOT PATH
- qwen3.5 burned ~650 tokens of hidden thinking for trivial JSON
emission. For executor/reviewer/draft: think:false. For T3/T4/T5
overseers: thinking stays on (that's the point).
- Sidecar generate endpoint accepts `think` bool, passes through to
Ollama's /api/generate.
VERIFIED OUTCOMES
Riverfront Steel 2026-04-21, qwen3.5+continuable+think:false:
08:00 baseline_fill 3/3 4 turns
10:30 recurring 2/2 3 turns (1 playbook citation)
12:15 expansion 0/5 drift-aborted (5-fill orchestration
problem, separate work)
14:00 emergency 4/4 3 turns (1 citation)
15:45 misplacement 1/1 3 turns
→ T3 caught Patrick Ross double-booking across events
→ T3 flagged forklift cert drift on the event that failed
→ Cross-day lesson proposed "maintain buffer of ≥3 emergency
candidates, pre-fetch certs for expansion, booking system
cross-check" — real staffing advice, not generic linter output
PRD PHASE 21 rewritten to reflect the actual primitive shape (two-
call map-reduce with scratchpad glue) instead of the tourniquet
approach originally documented. Rust port queued for next sprint.
scripts/ab_t3_test.sh: A/B harness that chains B→C→D runs and emits
tests/multi-agent/playbooks/ab_scorecard.json.
470 lines
20 KiB
TypeScript
470 lines
20 KiB
TypeScript
// Network proving: continuous build → verify → repeat with hot-swap profile.
|
|
//
|
|
// J's framing: "have them guide each other, when the test is complete we have
|
|
// a successful playbook, then spin up another agent that tests the viability
|
|
// of our network with the playbook and the hot-swap profile. Keep spinning up
|
|
// agents and testing — pass theory, real-world execution, not isolated unit
|
|
// tests."
|
|
//
|
|
// Each round = TWO phases:
|
|
//
|
|
// 1. BUILD phase. Two agents (mistral executor + qwen2.5 reviewer) work
|
|
// on a real staffing fill task. They guide each other via the critique
|
|
// loop. On consensus → seal a playbook with CANONICAL short seed text
|
|
// (the Pass 1 lesson — verbose seeds silently kill boost). Real Ollama,
|
|
// real workers_500k, real /vectors/hybrid path.
|
|
//
|
|
// 2. VERIFY phase. A FRESH qwen2.5 agent spins up, activates the
|
|
// staffing-recruiter profile (Phase 17 hot-swap), runs a probe query
|
|
// against the same network, and judges from the live response whether
|
|
// prior rounds' playbooks actually surface relevant workers higher.
|
|
// The verifier writes a verdict: did the network learn?
|
|
//
|
|
// Three rounds, progressively harder:
|
|
// R0: Welder x2 in Toledo, OH — baseline
|
|
// R1: Welder x2 in Cleveland, OH — same role, different city
|
|
// → tests geo discrimination
|
|
// (Toledo workers MUST NOT
|
|
// bleed into Cleveland boost)
|
|
// R2: Welder x3 in Toledo, OH — re-fill same city, bigger
|
|
// count → tests compounding
|
|
// (R0's endorsements should
|
|
// still rank up here)
|
|
//
|
|
// Run: bun run tests/multi-agent/network_proving.ts
|
|
//
|
|
// Fail-fast: any HTTP error or model crash bubbles to top-level, exits 1.
|
|
|
|
import {
|
|
type LogEntry,
|
|
type TaskSpec,
|
|
type Action,
|
|
type Fill,
|
|
GATEWAY,
|
|
generate,
|
|
parseAction,
|
|
executorPrompt,
|
|
reviewerPrompt,
|
|
sqlQuery,
|
|
callTool,
|
|
} from "./agent.ts";
|
|
|
|
const EXECUTOR_MODEL = "qwen3.5:latest";
|
|
const REVIEWER_MODEL = "qwen3:latest";
|
|
const VERIFIER_MODEL = "qwen2.5:latest";
|
|
const PROFILE_ID = "staffing-recruiter";
|
|
const INDEX_NAME = "workers_500k_v1";
|
|
const MAX_TURNS = 12;
|
|
const MAX_TOOL_ERRORS = 3;
|
|
const MAX_DRIFTS = 3;
|
|
|
|
const TASK_DECK: TaskSpec[] = [
|
|
{
|
|
id: "R0", operation: "fill: Welder x2 in Toledo, OH",
|
|
target_role: "Welder", target_count: 2, target_city: "Toledo", target_state: "OH",
|
|
approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
|
|
},
|
|
{
|
|
id: "R1", operation: "fill: Welder x2 in Cleveland, OH",
|
|
target_role: "Welder", target_count: 2, target_city: "Cleveland", target_state: "OH",
|
|
approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
|
|
},
|
|
{
|
|
id: "R2", operation: "fill: Welder x3 in Toledo, OH",
|
|
target_role: "Welder", target_count: 3, target_city: "Toledo", target_state: "OH",
|
|
approach_hint: "hybrid_search workers_500k_v1 with sql_filter role+state+city, then sql verify",
|
|
},
|
|
];
|
|
|
|
interface BuildResult {
|
|
ok: boolean;
|
|
task: TaskSpec;
|
|
fills: Fill[];
|
|
turns: number;
|
|
duration_secs: number;
|
|
playbook_id?: string;
|
|
entries_after_seed?: number;
|
|
error?: string;
|
|
}
|
|
|
|
interface VerifyResult {
|
|
profile_activated: boolean;
|
|
warmed_indexes: number;
|
|
probe_boost_total: number; // sum of playbook_boost across top-K
|
|
probe_boosted_hits: number; // how many hits had boost > 0
|
|
probe_top_citations: string[]; // playbook_ids cited
|
|
geo_discrimination_ok: boolean; // when prior playbook is in different city, boost should NOT bleed
|
|
verdict: string; // qwen2.5's natural-language judgment
|
|
confidence: number; // 0-100 self-rated
|
|
duration_secs: number;
|
|
}
|
|
|
|
interface RoundLedger {
|
|
round: number;
|
|
task: TaskSpec;
|
|
build: BuildResult;
|
|
verify: VerifyResult;
|
|
score: number; // /10 per round
|
|
notes: string[];
|
|
}
|
|
|
|
// ─────────────────────── BUILD phase (two-agent loop) ───────────────────────
|
|
|
|
async function executeToolCall(name: string, args: Record<string, any>): Promise<any> {
|
|
if (name === "hybrid_search") {
|
|
const { sql_filter, question, index_name, k } = args;
|
|
if (!sql_filter || !question || !index_name) {
|
|
throw new Error(`hybrid_search needs sql_filter+question+index_name, got keys=${Object.keys(args).join(",")}`);
|
|
}
|
|
const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
|
|
method: "POST", headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
sql_filter, question, index_name,
|
|
filter_dataset: "workers_500k", id_column: "worker_id",
|
|
top_k: k ?? 10, generate: false, use_playbook_memory: true,
|
|
}),
|
|
});
|
|
if (!r.ok) throw new Error(`hybrid → ${r.status}: ${await r.text()}`);
|
|
return r.json();
|
|
}
|
|
if (name === "sql") {
|
|
if (!args.query) throw new Error("sql needs query");
|
|
if (!/^\s*SELECT/i.test(args.query)) throw new Error("sql allows SELECT only");
|
|
return sqlQuery(args.query);
|
|
}
|
|
return callTool(name, args);
|
|
}
|
|
|
|
function trim(r: any) {
|
|
if (r && Array.isArray(r.rows)) return { ...r, rows: r.rows.slice(0, 20) };
|
|
if (r && Array.isArray(r.sources)) return { ...r, sources: r.sources.slice(0, 12) };
|
|
return r;
|
|
}
|
|
|
|
function fmtTurn(prefix: string, e: Omit<LogEntry, "at">): string {
|
|
const c: any = e.content ?? {};
|
|
const head = `[${prefix} t${e.turn.toString().padStart(2, "0")} ${e.role.padEnd(8)} ${e.kind.padEnd(14)}]`;
|
|
if (e.kind === "tool_call") return `${head} ${c.tool}(${JSON.stringify(c.args ?? {}).slice(0, 70)})`;
|
|
if (e.kind === "tool_result") {
|
|
if (c.error) return `${head} error: ${c.error}`;
|
|
if (Array.isArray(c.sources)) return `${head} hybrid sql=${c.sql_matches} reranked=${c.vector_reranked}`;
|
|
if (Array.isArray(c.rows)) return `${head} sql ${c.rows.length} rows`;
|
|
return `${head} ${JSON.stringify(c).slice(0, 70)}`;
|
|
}
|
|
if (e.kind === "critique") return `${head} verdict=${c.verdict} ${(c.notes ?? "").slice(0, 50)}`;
|
|
if (e.kind === "propose_done") return `${head} ${(c.fills ?? []).length} fills: ${(c.fills ?? []).map((f: Fill) => f.name).join(", ")}`;
|
|
if (e.kind === "consensus_done") return `${head} ✓`;
|
|
if (e.kind === "plan") return `${head} ${(c.steps ?? []).length} steps`;
|
|
return `${head} ${JSON.stringify(c).slice(0, 60)}`;
|
|
}
|
|
|
|
async function buildPhase(task: TaskSpec, prefix: string): Promise<BuildResult> {
|
|
const t0 = Date.now();
|
|
const log: LogEntry[] = [];
|
|
let turn = 0, sealed: { fills: Fill[]; approach: string } | null = null;
|
|
let toolErrors = 0, drifts = 0;
|
|
|
|
const append = (e: Omit<LogEntry, "at">): LogEntry => {
|
|
const full: LogEntry = { ...e, at: new Date().toISOString() };
|
|
log.push(full);
|
|
console.log(fmtTurn(prefix, e));
|
|
return full;
|
|
};
|
|
|
|
try {
|
|
while (turn < MAX_TURNS && !sealed) {
|
|
turn += 1;
|
|
|
|
const execRaw = await generate(EXECUTOR_MODEL, executorPrompt(task, log), { temperature: 0.2, max_tokens: 1200 });
|
|
const execAction = parseAction(execRaw, "executor");
|
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: execAction.kind as any, content: execAction });
|
|
|
|
if (execAction.kind === "tool_call") {
|
|
try {
|
|
const r = await executeToolCall(execAction.tool, execAction.args);
|
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result", content: trim(r) });
|
|
toolErrors = 0;
|
|
} catch (e) {
|
|
append({ turn, role: "executor", model: EXECUTOR_MODEL, kind: "tool_result",
|
|
content: { error: (e as Error).message, tool: execAction.tool, args: execAction.args } });
|
|
toolErrors += 1;
|
|
if (toolErrors >= MAX_TOOL_ERRORS) throw new Error(`${MAX_TOOL_ERRORS} consecutive tool errors`);
|
|
}
|
|
}
|
|
|
|
const revRaw = await generate(REVIEWER_MODEL, reviewerPrompt(task, log), { temperature: 0.1, max_tokens: 1000 });
|
|
const revAction = parseAction(revRaw, "reviewer");
|
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "critique", content: revAction });
|
|
|
|
if (revAction.kind !== "critique") throw new Error(`reviewer non-critique`);
|
|
if (revAction.verdict === "drift") {
|
|
drifts += 1;
|
|
if (drifts >= MAX_DRIFTS) throw new Error(`${MAX_DRIFTS} consecutive drifts`);
|
|
} else drifts = 0;
|
|
|
|
if (execAction.kind === "propose_done" && revAction.verdict === "approve_done") {
|
|
if (execAction.fills.length !== task.target_count) {
|
|
throw new Error(`fills=${execAction.fills.length} target=${task.target_count}`);
|
|
}
|
|
append({ turn, role: "reviewer", model: REVIEWER_MODEL, kind: "consensus_done", content: { fills: execAction.fills } });
|
|
sealed = { fills: execAction.fills, approach: (execAction as any).rationale ?? "multi-agent" };
|
|
}
|
|
}
|
|
|
|
if (!sealed) throw new Error(`no consensus after ${MAX_TURNS} turns`);
|
|
|
|
// Phase 19 seed — CANONICAL short text (Pass 1 lesson). The verbose
|
|
// executor rationale stays out of the embedding; we keep a separate
|
|
// human-readable record in the playbook log.
|
|
const canonicalApproach = `${task.target_role.toLowerCase()} fill via hybrid search`;
|
|
const canonicalContext = `${task.target_role} fill in ${task.target_city}, ${task.target_state}`;
|
|
let playbook_id: string | undefined;
|
|
let entries_after_seed: number | undefined;
|
|
try {
|
|
const sr = await fetch(`${GATEWAY}/vectors/playbook_memory/seed`, {
|
|
method: "POST", headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
operation: task.operation,
|
|
approach: canonicalApproach,
|
|
context: canonicalContext,
|
|
endorsed_names: sealed.fills.map(f => f.name),
|
|
append: true,
|
|
}),
|
|
});
|
|
if (sr.ok) {
|
|
const j = await sr.json() as any;
|
|
playbook_id = j.playbook_id;
|
|
entries_after_seed = j.entries_after;
|
|
console.log(`[${prefix}] ↳ seeded id=${playbook_id} entries=${entries_after_seed}`);
|
|
} else {
|
|
console.warn(`[${prefix}] ↳ seed failed: ${sr.status} ${await sr.text()}`);
|
|
}
|
|
} catch (e) {
|
|
console.warn(`[${prefix}] ↳ seed errored: ${(e as Error).message}`);
|
|
}
|
|
|
|
return {
|
|
ok: true, task, fills: sealed.fills, turns: turn,
|
|
duration_secs: Math.round((Date.now() - t0) / 1000),
|
|
playbook_id, entries_after_seed,
|
|
};
|
|
} catch (e) {
|
|
return {
|
|
ok: false, task, fills: [], turns: turn,
|
|
duration_secs: Math.round((Date.now() - t0) / 1000),
|
|
error: (e as Error).message,
|
|
};
|
|
}
|
|
}
|
|
|
|
// ─────────────────────── VERIFY phase (fresh single agent) ───────────────────────
|
|
|
|
async function activateProfile(): Promise<{ ok: boolean; warmed: number; ms: number }> {
|
|
const t0 = Date.now();
|
|
const r = await fetch(`${GATEWAY}/vectors/profile/${PROFILE_ID}/activate`, { method: "POST" });
|
|
const ms = Date.now() - t0;
|
|
if (!r.ok) {
|
|
console.warn(`profile activation failed: ${r.status} ${await r.text()}`);
|
|
return { ok: false, warmed: 0, ms };
|
|
}
|
|
const j = await r.json() as any;
|
|
return { ok: true, warmed: (j.warmed_indexes ?? []).length, ms };
|
|
}
|
|
|
|
async function probeWithBoost(task: TaskSpec) {
|
|
const sql_filter = `role = '${task.target_role.replace(/'/g, "''")}' `
|
|
+ `AND state = '${task.target_state}' `
|
|
+ `AND city = '${task.target_city.replace(/'/g, "''")}'`;
|
|
const r = await fetch(`${GATEWAY}/vectors/hybrid`, {
|
|
method: "POST", headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
index_name: INDEX_NAME, filter_dataset: "workers_500k", id_column: "worker_id",
|
|
sql_filter, question: `${task.target_role} in ${task.target_city}, ${task.target_state}`,
|
|
top_k: 10, generate: false, use_playbook_memory: true, playbook_memory_k: 15,
|
|
}),
|
|
});
|
|
if (!r.ok) throw new Error(`probe → ${r.status}: ${await r.text()}`);
|
|
const j = (await r.json()) as any;
|
|
const sources: any[] = j.sources ?? [];
|
|
const boostedHits = sources.filter(s => (s.playbook_boost ?? 0) > 0).length;
|
|
const totalBoost = sources.reduce((s, x) => s + (x.playbook_boost ?? 0), 0);
|
|
const cites = Array.from(new Set(sources.flatMap(s => s.playbook_citations ?? []))).slice(0, 5);
|
|
const topNames = sources.slice(0, 5).map(s => {
|
|
const t = String(s.chunk_text ?? "");
|
|
return t.split("—")[0]?.trim() ?? s.doc_id;
|
|
});
|
|
return { sources, boostedHits, totalBoost, cites, topNames };
|
|
}
|
|
|
|
// Verifier prompt — fresh agent, no shared log with the build pair. It
|
|
// gets the round's task, the prior rounds' sealed playbooks, and the live
|
|
// probe result, and renders a human-readable verdict with a confidence.
|
|
function verifierPrompt(task: TaskSpec, priorPlaybooks: Array<{op: string; fills: string[]}>,
|
|
probe: { boostedHits: number; totalBoost: number; cites: string[]; topNames: string[] }
|
|
): string {
|
|
const priorBlock = priorPlaybooks.length === 0
|
|
? "(no prior playbooks — this is the first round)"
|
|
: priorPlaybooks.map((p, i) => ` ${i+1}. ${p.op} → endorsed [${p.fills.join(", ")}]`).join("\n");
|
|
|
|
return `You are the VERIFIER agent. A fresh round just sealed a playbook on a real staffing
|
|
substrate. Your job: judge whether the system learned from prior rounds.
|
|
|
|
CURRENT ROUND:
|
|
task: ${task.operation}
|
|
in city: ${task.target_city}, ${task.target_state}
|
|
|
|
PRIOR PLAYBOOKS (in playbook_memory):
|
|
${priorBlock}
|
|
|
|
I activated the staffing-recruiter profile and ran a hybrid query for this exact task with
|
|
use_playbook_memory=true. Live result from the substrate:
|
|
- top-5 surfaced workers: ${probe.topNames.join(", ")}
|
|
- hits with non-zero playbook_boost: ${probe.boostedHits} / 10
|
|
- total boost across top-10: ${probe.totalBoost.toFixed(3)}
|
|
- playbook citations: [${probe.cites.join(", ")}]
|
|
|
|
JUDGE:
|
|
1. If a prior playbook covered this same city + role, the boost should fire on the workers
|
|
it endorsed (boostedHits > 0, citations non-empty).
|
|
2. If no prior playbook covers this combo, boost should be ~0 — that means the system is
|
|
correctly NOT bleeding endorsements across geos.
|
|
3. Anything in between (e.g. some boost but for the wrong reason) is a partial pass.
|
|
|
|
Respond with ONE JSON object only:
|
|
{"learned": true|false, "verdict": "<one sentence>", "confidence": 0-100}
|
|
|
|
learned=true means the network behaved as expected for this round (whether that's "boost fired
|
|
because it should" or "boost stayed zero because it should"). learned=false means the system
|
|
either failed to learn from a relevant prior playbook OR bled an irrelevant one. confidence is
|
|
how sure you are.`;
|
|
}
|
|
|
|
async function verifyPhase(task: TaskSpec, ledger: RoundLedger[]): Promise<VerifyResult> {
|
|
const t0 = Date.now();
|
|
const act = await activateProfile();
|
|
const probe = await probeWithBoost(task);
|
|
|
|
// Decide what counts as geo-correct based on prior playbooks
|
|
const priorMatchesThisGeo = ledger.some(r =>
|
|
r.build.ok &&
|
|
r.task.target_city === task.target_city &&
|
|
r.task.target_state === task.target_state &&
|
|
r.task.target_role === task.target_role
|
|
);
|
|
const priorOtherGeo = ledger.some(r =>
|
|
r.build.ok &&
|
|
r.task.target_role === task.target_role &&
|
|
!(r.task.target_city === task.target_city && r.task.target_state === task.target_state)
|
|
);
|
|
|
|
let geo_discrimination_ok: boolean;
|
|
if (priorMatchesThisGeo) {
|
|
geo_discrimination_ok = probe.boostedHits > 0; // expected lift
|
|
} else if (priorOtherGeo) {
|
|
geo_discrimination_ok = probe.boostedHits === 0; // must NOT bleed
|
|
} else {
|
|
geo_discrimination_ok = true; // no signal expected either way
|
|
}
|
|
|
|
// Spin up the fresh verifier agent
|
|
const priorPlaybooks = ledger.filter(r => r.build.ok).map(r => ({
|
|
op: r.task.operation, fills: r.build.fills.map(f => f.name),
|
|
}));
|
|
|
|
let verdict = "verifier failed to respond"; let confidence = 0;
|
|
try {
|
|
const raw = await generate(VERIFIER_MODEL, verifierPrompt(task, priorPlaybooks, probe), {
|
|
temperature: 0.1, max_tokens: 250,
|
|
});
|
|
const start = raw.indexOf("{"), end = raw.lastIndexOf("}");
|
|
if (start >= 0 && end > start) {
|
|
const j = JSON.parse(raw.slice(start, end + 1));
|
|
verdict = j.verdict ?? verdict;
|
|
confidence = Number(j.confidence) || 0;
|
|
}
|
|
} catch (e) {
|
|
verdict = `verifier parse error: ${(e as Error).message}`;
|
|
}
|
|
|
|
return {
|
|
profile_activated: act.ok,
|
|
warmed_indexes: act.warmed,
|
|
probe_boost_total: probe.totalBoost,
|
|
probe_boosted_hits: probe.boostedHits,
|
|
probe_top_citations: probe.cites,
|
|
geo_discrimination_ok,
|
|
verdict, confidence,
|
|
duration_secs: Math.round((Date.now() - t0) / 1000),
|
|
};
|
|
}
|
|
|
|
// ─────────────────────── round scoring ───────────────────────
|
|
|
|
function scoreRound(r: RoundLedger): { score: number; notes: string[] } {
|
|
const notes: string[] = [];
|
|
let s = 0;
|
|
if (r.build.ok) { s += 3; notes.push(`✓ build sealed (${r.build.fills.map(f => f.name).join(", ")})`); }
|
|
else { notes.push(`✗ build failed: ${r.build.error}`); }
|
|
if (r.build.playbook_id) { s += 1; notes.push(`✓ seeded id=${r.build.playbook_id}`); }
|
|
if (r.verify.profile_activated) { s += 1; notes.push(`✓ profile activated (warmed=${r.verify.warmed_indexes})`); }
|
|
if (r.verify.geo_discrimination_ok) { s += 3; notes.push(`✓ geo discrimination correct (boostedHits=${r.verify.probe_boosted_hits})`); }
|
|
else { notes.push(`✗ geo discrimination failed (boostedHits=${r.verify.probe_boosted_hits})`); }
|
|
if (r.verify.confidence >= 60) { s += 2; notes.push(`✓ verifier confident (${r.verify.confidence}%): ${r.verify.verdict}`); }
|
|
else { notes.push(`◑ verifier confidence ${r.verify.confidence}%: ${r.verify.verdict}`); }
|
|
return { score: s, notes };
|
|
}
|
|
|
|
// ─────────────────────── main loop ───────────────────────
|
|
|
|
async function main() {
|
|
console.log(`▶ Network proving — ${TASK_DECK.length} rounds, profile=${PROFILE_ID}`);
|
|
console.log(`▶ build pair: ${EXECUTOR_MODEL} + ${REVIEWER_MODEL}; verifier: ${VERIFIER_MODEL}\n`);
|
|
|
|
const ledger: RoundLedger[] = [];
|
|
|
|
for (let i = 0; i < TASK_DECK.length; i++) {
|
|
const task = TASK_DECK[i];
|
|
console.log(`\n══════════ Round ${i} — ${task.operation} ══════════`);
|
|
|
|
console.log(`\n[${task.id}] BUILD phase (two agents collaborating)`);
|
|
const build = await buildPhase(task, task.id);
|
|
|
|
console.log(`\n[${task.id}] VERIFY phase (fresh agent + hot-swap profile)`);
|
|
const verify = await verifyPhase(task, ledger);
|
|
console.log(` profile=${verify.profile_activated ? "ok" : "fail"} warmed=${verify.warmed_indexes} `
|
|
+ `boosted=${verify.probe_boosted_hits}/10 totalBoost=${verify.probe_boost_total.toFixed(3)} `
|
|
+ `cites=${verify.probe_top_citations.length} confidence=${verify.confidence}%`);
|
|
console.log(` verdict: ${verify.verdict}`);
|
|
|
|
const round: RoundLedger = { round: i, task, build, verify, score: 0, notes: [] };
|
|
const sc = scoreRound(round);
|
|
round.score = sc.score; round.notes = sc.notes;
|
|
ledger.push(round);
|
|
|
|
console.log(`\n Round ${i} score: ${round.score}/10`);
|
|
for (const n of round.notes) console.log(` ${n}`);
|
|
}
|
|
|
|
console.log(`\n══════════ Network viability summary ══════════`);
|
|
const total = ledger.reduce((s, r) => s + r.score, 0);
|
|
const max = ledger.length * 10;
|
|
const avg = total / ledger.length;
|
|
for (const r of ledger) console.log(` R${r.round} ${r.task.target_city.padEnd(10)} ${r.task.target_role.padEnd(20)} ${r.score}/10`);
|
|
console.log(`\n TOTAL: ${total}/${max} AVG: ${avg.toFixed(1)}/10`);
|
|
|
|
// Hard gate: at least 2/3 rounds must show the verifier is confident enough
|
|
// AND build phase succeeded
|
|
const passed = ledger.filter(r => r.build.ok && r.score >= 6).length;
|
|
if (passed < Math.ceil(ledger.length * 2 / 3)) {
|
|
throw new Error(`network proving gate failed — only ${passed}/${ledger.length} rounds passed (need ≥${Math.ceil(ledger.length * 2 / 3)})`);
|
|
}
|
|
console.log(`\n✓ Network proven over ${passed}/${ledger.length} rounds`);
|
|
process.exit(0);
|
|
}
|
|
|
|
main().catch(e => {
|
|
console.error(`\n✗ ${(e as Error).message}`);
|
|
if ((e as any).stack) console.error((e as any).stack);
|
|
process.exit(1);
|
|
});
|