v1/mode: override knobs + staffing native runner + pass 2/3/4 harnesses
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Setup for the corpus-tightening experiment sweep (J 2026-04-26 — "now
is the only cheap window before the corpus gets large and refactoring
costs go up").
Override params on /v1/mode/execute (additive — old callers unaffected):
force_matrix_corpus — Pass 2: try alternate corpora per call
force_relevance_threshold — Pass 2: sweep filter strictness
force_temperature — Pass 3: variance test
New native mode `staffing_inference_lakehouse` (Pass 4):
- Same composer architecture as codereview_lakehouse
- Staffing framing: coordinator producing fillable|contingent|
unfillable verdict + ranked candidate list with playbook citations
- matrix_corpus = workers_500k_v8
- Validates that modes-as-prompt-molders generalizes beyond code
- Framing explicitly says "do NOT fabricate workers" — the staffing
analog of the lakehouse mode's symbol-grounding requirement
Three sweep harnesses:
scripts/mode_pass2_corpus_sweep.ts — 4 corpora × 4 thresholds × 5 files
scripts/mode_pass3_variance.ts — 3 files × 3 temps × 5 reps
scripts/mode_pass4_staffing.ts — 5 fill requests through staffing mode
Each appends per-call rows to data/_kb/mode_experiments.jsonl which
mode_compare.ts already aggregates with grounding column.
Pass 1 (10 files × 5 modes broad sweep) currently running via the
existing scripts/mode_experiment.ts — gateway restart deferred until
it completes so the new override knobs aren't enabled mid-experiment.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
52bb216c2d
commit
56bf30cfd8
@ -31,9 +31,13 @@ matrix_corpus = "chicago_permits_v1"
|
|||||||
|
|
||||||
[[task_class]]
|
[[task_class]]
|
||||||
name = "staffing_inference"
|
name = "staffing_inference"
|
||||||
preferred_mode = "ladder"
|
# Staffing-domain native enrichment runner — Pass 4 (2026-04-26).
|
||||||
fallback_modes = ["consensus", "pipeline"]
|
# Same composer architecture as codereview_lakehouse but with staffing
|
||||||
default_model = "gpt-oss:120b"
|
# framing + workers corpus. Validates that the modes-as-prompt-molders
|
||||||
|
# pattern generalizes beyond code review.
|
||||||
|
preferred_mode = "staffing_inference_lakehouse"
|
||||||
|
fallback_modes = ["ladder", "consensus", "pipeline"]
|
||||||
|
default_model = "openai/gpt-oss-120b:free"
|
||||||
matrix_corpus = "workers_500k_v8"
|
matrix_corpus = "workers_500k_v8"
|
||||||
|
|
||||||
[[task_class]]
|
[[task_class]]
|
||||||
|
|||||||
@ -51,6 +51,7 @@ const VALID_MODES: &[&str] = &[
|
|||||||
"codereview_isolation", // file + pathway only (no matrix)
|
"codereview_isolation", // file + pathway only (no matrix)
|
||||||
"codereview_matrix_only", // file + matrix only (no pathway)
|
"codereview_matrix_only", // file + matrix only (no pathway)
|
||||||
"codereview_playbook_only", // pathway only, NO file content (lossy ceiling)
|
"codereview_playbook_only", // pathway only, NO file content (lossy ceiling)
|
||||||
|
"staffing_inference_lakehouse", // staffing-domain composer (Pass 4)
|
||||||
];
|
];
|
||||||
|
|
||||||
/// Whether a mode is handled natively in this gateway vs proxied to
|
/// Whether a mode is handled natively in this gateway vs proxied to
|
||||||
@ -63,6 +64,7 @@ fn is_native_mode(mode: &str) -> bool {
|
|||||||
| "codereview_isolation"
|
| "codereview_isolation"
|
||||||
| "codereview_matrix_only"
|
| "codereview_matrix_only"
|
||||||
| "codereview_playbook_only"
|
| "codereview_playbook_only"
|
||||||
|
| "staffing_inference_lakehouse"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,6 +84,7 @@ pub struct EnrichmentFlags {
|
|||||||
pub enum ReviewerFraming {
|
pub enum ReviewerFraming {
|
||||||
Adversarial, // forensic, ranked findings + verdict (lakehouse default)
|
Adversarial, // forensic, ranked findings + verdict (lakehouse default)
|
||||||
Generic, // "review this" — no codebase priors (null baseline)
|
Generic, // "review this" — no codebase priors (null baseline)
|
||||||
|
Staffing, // staffing-domain coordinator framing (Pass 4)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flags_for_mode(mode: &str) -> EnrichmentFlags {
|
fn flags_for_mode(mode: &str) -> EnrichmentFlags {
|
||||||
@ -114,6 +117,18 @@ fn flags_for_mode(mode: &str) -> EnrichmentFlags {
|
|||||||
use_relevance_filter: false,
|
use_relevance_filter: false,
|
||||||
framing: ReviewerFraming::Adversarial,
|
framing: ReviewerFraming::Adversarial,
|
||||||
},
|
},
|
||||||
|
"staffing_inference_lakehouse" => EnrichmentFlags {
|
||||||
|
// Staffing reuses the same composer architecture but with
|
||||||
|
// domain-specific framing. file_content here = the request
|
||||||
|
// payload (e.g. "fill 2 welders in Toledo OH"), bug_fingerprints
|
||||||
|
// surface prior playbook patterns from this geo+role, matrix
|
||||||
|
// pulls candidate workers + city/state demand chunks.
|
||||||
|
include_file_content: true,
|
||||||
|
include_bug_fingerprints: true,
|
||||||
|
include_matrix_chunks: true,
|
||||||
|
use_relevance_filter: true,
|
||||||
|
framing: ReviewerFraming::Staffing,
|
||||||
|
},
|
||||||
// Default (codereview_lakehouse): everything on.
|
// Default (codereview_lakehouse): everything on.
|
||||||
_ => EnrichmentFlags {
|
_ => EnrichmentFlags {
|
||||||
include_file_content: true,
|
include_file_content: true,
|
||||||
@ -404,6 +419,20 @@ pub struct ExecuteRequest {
|
|||||||
/// runner uses its built-in forensic-review framing.
|
/// runner uses its built-in forensic-review framing.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub user_question: Option<String>,
|
pub user_question: Option<String>,
|
||||||
|
/// Override the matrix corpus the runner queries. Defaults to the
|
||||||
|
/// task_class's matrix_corpus from modes.toml. Use for the corpus-
|
||||||
|
/// tightening experiment (Pass 2 of the 2026-04-26 mode sweep).
|
||||||
|
#[serde(default)]
|
||||||
|
pub force_matrix_corpus: Option<String>,
|
||||||
|
/// Override the relevance filter threshold (default 0.3). Setting
|
||||||
|
/// to 0 keeps every chunk; raising rejects more aggressively. Used
|
||||||
|
/// to find the threshold sweet spot per task class.
|
||||||
|
#[serde(default)]
|
||||||
|
pub force_relevance_threshold: Option<f64>,
|
||||||
|
/// Override the LLM temperature (default 0.1). Used by Pass 3
|
||||||
|
/// variance testing to measure run-to-run stability.
|
||||||
|
#[serde(default)]
|
||||||
|
pub force_temperature: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Default)]
|
#[derive(Serialize, Debug, Default)]
|
||||||
@ -442,10 +471,20 @@ proven. Do NOT hedge.";
|
|||||||
const FRAMING_GENERIC: &str = "You are a code reviewer. Read the file below and produce a \
|
const FRAMING_GENERIC: &str = "You are a code reviewer. Read the file below and produce a \
|
||||||
markdown review with findings.";
|
markdown review with findings.";
|
||||||
|
|
||||||
|
const FRAMING_STAFFING: &str = "You are a senior staffing coordinator for a light-industrial \
|
||||||
|
labor agency. You receive a fill request (role × count × city × deadline) and have access \
|
||||||
|
to historical playbook patterns from prior fills in this geo, plus a corpus of candidate \
|
||||||
|
workers + demand signals. Produce a markdown plan with: (1) one-line verdict (fillable | \
|
||||||
|
contingent | unfillable), (2) ranked candidate list with name, city, role, distance, prior \
|
||||||
|
fill citations from the playbook, (3) risks (double-booking, eligibility gaps, geo stretch) \
|
||||||
|
with severity + confidence percent, (4) playbook reference IDs you used. Be precise — only \
|
||||||
|
recommend candidates whose names appear in the matrix data; do NOT fabricate workers.";
|
||||||
|
|
||||||
fn framing_text(f: ReviewerFraming) -> &'static str {
|
fn framing_text(f: ReviewerFraming) -> &'static str {
|
||||||
match f {
|
match f {
|
||||||
ReviewerFraming::Adversarial => FRAMING_ADVERSARIAL,
|
ReviewerFraming::Adversarial => FRAMING_ADVERSARIAL,
|
||||||
ReviewerFraming::Generic => FRAMING_GENERIC,
|
ReviewerFraming::Generic => FRAMING_GENERIC,
|
||||||
|
ReviewerFraming::Staffing => FRAMING_STAFFING,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -485,6 +524,9 @@ pub async fn execute(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Caller can override the matrix corpus per-call (Pass 2 corpus
|
||||||
|
// tightening). Falls back to modes.toml default.
|
||||||
|
let matrix_corpus = req.force_matrix_corpus.clone().or(matrix_corpus);
|
||||||
let flags = flags_for_mode(&mode);
|
let flags = flags_for_mode(&mode);
|
||||||
let mut sources = EnrichmentSources {
|
let mut sources = EnrichmentSources {
|
||||||
matrix_corpus: matrix_corpus.clone(),
|
matrix_corpus: matrix_corpus.clone(),
|
||||||
@ -621,7 +663,7 @@ pub async fn execute(
|
|||||||
let body = serde_json::json!({
|
let body = serde_json::json!({
|
||||||
"focus_file": { "path": req.file_path, "content": file_content },
|
"focus_file": { "path": req.file_path, "content": file_content },
|
||||||
"chunks": chunks_for_filter,
|
"chunks": chunks_for_filter,
|
||||||
"threshold": 0.3,
|
"threshold": req.force_relevance_threshold.unwrap_or(0.3),
|
||||||
});
|
});
|
||||||
match client
|
match client
|
||||||
.post("http://localhost:3800/relevance")
|
.post("http://localhost:3800/relevance")
|
||||||
@ -723,7 +765,7 @@ pub async fn execute(
|
|||||||
{ "role": "system", "content": framing_text(flags.framing) },
|
{ "role": "system", "content": framing_text(flags.framing) },
|
||||||
{ "role": "user", "content": user_prompt },
|
{ "role": "user", "content": user_prompt },
|
||||||
],
|
],
|
||||||
"temperature": 0.1,
|
"temperature": req.force_temperature.unwrap_or(0.1),
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
});
|
});
|
||||||
let chat_client = match reqwest::Client::builder()
|
let chat_client = match reqwest::Client::builder()
|
||||||
|
|||||||
121
scripts/mode_pass2_corpus_sweep.ts
Normal file
121
scripts/mode_pass2_corpus_sweep.ts
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
#!/usr/bin/env bun
|
||||||
|
/**
|
||||||
|
* Pass 2: matrix corpus + relevance threshold sweep.
|
||||||
|
*
|
||||||
|
* For each (corpus, threshold) combination, run codereview_matrix_only
|
||||||
|
* on the same N files. Compares which corpus actually adds grounded
|
||||||
|
* findings vs codereview_isolation (matrix-off baseline).
|
||||||
|
*
|
||||||
|
* Output: data/_kb/mode_experiments.jsonl gets one row per call,
|
||||||
|
* tagged via the force_matrix_corpus + force_relevance_threshold
|
||||||
|
* fields visible in `sources`. Aggregator can then group by corpus.
|
||||||
|
*
|
||||||
|
* Usage: bun run scripts/mode_pass2_corpus_sweep.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
|
||||||
|
const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free";
|
||||||
|
|
||||||
|
const FILES = (process.env.LH_FILES ?? [
|
||||||
|
"crates/queryd/src/delta.rs",
|
||||||
|
"crates/queryd/src/service.rs",
|
||||||
|
"crates/vectord/src/pathway_memory.rs",
|
||||||
|
"crates/gateway/src/v1/mode.rs",
|
||||||
|
"crates/aibridge/src/client.rs",
|
||||||
|
].join(",")).split(",");
|
||||||
|
|
||||||
|
const CORPORA = (process.env.LH_CORPORA ?? [
|
||||||
|
"distilled_procedural_v20260423102847",
|
||||||
|
"distilled_factual_v20260423095819",
|
||||||
|
"distilled_config_hint_v20260423102847",
|
||||||
|
"kb_team_runs_v1",
|
||||||
|
].join(",")).split(",");
|
||||||
|
|
||||||
|
const THRESHOLDS = (process.env.LH_THRESHOLDS ?? "0.2,0.3,0.4,0.5").split(",").map(Number);
|
||||||
|
|
||||||
|
interface Result {
|
||||||
|
corpus: string;
|
||||||
|
threshold: number;
|
||||||
|
file: string;
|
||||||
|
ok: boolean;
|
||||||
|
matrix_kept?: number;
|
||||||
|
matrix_dropped?: number;
|
||||||
|
response_chars?: number;
|
||||||
|
latency_ms?: number;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runOne(corpus: string, threshold: number, file: string): Promise<Result> {
|
||||||
|
try {
|
||||||
|
const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "content-type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
task_class: "scrum_review",
|
||||||
|
file_path: file,
|
||||||
|
force_mode: "codereview_matrix_only",
|
||||||
|
force_model: MODEL,
|
||||||
|
force_matrix_corpus: corpus,
|
||||||
|
force_relevance_threshold: threshold,
|
||||||
|
}),
|
||||||
|
signal: AbortSignal.timeout(180_000),
|
||||||
|
});
|
||||||
|
if (!r.ok) {
|
||||||
|
const body = await r.text().catch(() => "");
|
||||||
|
return { corpus, threshold, file, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 150)}` };
|
||||||
|
}
|
||||||
|
const j: any = await r.json();
|
||||||
|
return {
|
||||||
|
corpus, threshold, file, ok: true,
|
||||||
|
matrix_kept: j.sources?.matrix_chunks_kept,
|
||||||
|
matrix_dropped: j.sources?.matrix_chunks_dropped,
|
||||||
|
response_chars: (j.response ?? "").length,
|
||||||
|
latency_ms: j.latency_ms,
|
||||||
|
};
|
||||||
|
} catch (e: any) {
|
||||||
|
return { corpus, threshold, file, ok: false, error: e.message };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const total = CORPORA.length * THRESHOLDS.length * FILES.length;
|
||||||
|
console.log(`[pass2] corpora=${CORPORA.length} × thresholds=${THRESHOLDS.length} × files=${FILES.length} = ${total} runs`);
|
||||||
|
console.log(`[pass2] model=${MODEL}\n`);
|
||||||
|
let i = 0;
|
||||||
|
const results: Result[] = [];
|
||||||
|
for (const corpus of CORPORA) {
|
||||||
|
for (const threshold of THRESHOLDS) {
|
||||||
|
for (const file of FILES) {
|
||||||
|
i++;
|
||||||
|
process.stdout.write(` [${i}/${total}] corpus=${corpus.slice(0, 30).padEnd(30)} thr=${threshold.toFixed(1)} ${file.slice(-32).padStart(32)} ... `);
|
||||||
|
const r = await runOne(corpus, threshold, file);
|
||||||
|
results.push(r);
|
||||||
|
if (r.ok) {
|
||||||
|
const total_chunks = (r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0);
|
||||||
|
console.log(`✓ k=${r.matrix_kept}/${total_chunks} resp=${r.response_chars} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
|
||||||
|
} else {
|
||||||
|
console.log(`✗ ${r.error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n[pass2] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`);
|
||||||
|
|
||||||
|
// Per-corpus×threshold roll-up of kept-rate (the matrix usefulness proxy).
|
||||||
|
console.log(`\n[pass2] kept-rate by corpus × threshold (avg chunks kept per call):`);
|
||||||
|
console.log(` ${"corpus".padEnd(40)} ${THRESHOLDS.map(t => `thr=${t.toFixed(1)}`).join(" ").padStart(35)}`);
|
||||||
|
for (const corpus of CORPORA) {
|
||||||
|
const cells = THRESHOLDS.map(t => {
|
||||||
|
const matched = results.filter(r => r.ok && r.corpus === corpus && r.threshold === t);
|
||||||
|
if (matched.length === 0) return " — ";
|
||||||
|
const avgKept = matched.reduce((s, r) => s + (r.matrix_kept ?? 0), 0) / matched.length;
|
||||||
|
return avgKept.toFixed(1).padStart(5);
|
||||||
|
}).join(" ");
|
||||||
|
console.log(` ${corpus.slice(0, 40).padEnd(40)} ${cells}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n[pass2] aggregate findings/groundedness with: bun run scripts/mode_compare.ts`);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => { console.error(e); process.exit(1); });
|
||||||
109
scripts/mode_pass3_variance.ts
Normal file
109
scripts/mode_pass3_variance.ts
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#!/usr/bin/env bun
|
||||||
|
/**
|
||||||
|
* Pass 3: variance test.
|
||||||
|
*
|
||||||
|
* Runs codereview_lakehouse on the SAME file N times at each of M
|
||||||
|
* temperatures. Measures run-to-run stability of grounded finding
|
||||||
|
* count, response size, and latency. Anything <100% groundedness
|
||||||
|
* is a leak; track which symbols got hallucinated.
|
||||||
|
*
|
||||||
|
* Output appends to data/_kb/mode_experiments.jsonl. The aggregator
|
||||||
|
* can group by ts and identify variance buckets.
|
||||||
|
*
|
||||||
|
* Usage: bun run scripts/mode_pass3_variance.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
|
||||||
|
const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free";
|
||||||
|
|
||||||
|
const FILES = (process.env.LH_FILES ?? [
|
||||||
|
"crates/queryd/src/delta.rs",
|
||||||
|
"crates/vectord/src/pathway_memory.rs",
|
||||||
|
"crates/gateway/src/v1/mode.rs",
|
||||||
|
].join(",")).split(",");
|
||||||
|
|
||||||
|
const TEMPS = (process.env.LH_TEMPS ?? "0.0,0.1,0.3").split(",").map(Number);
|
||||||
|
const REPS = Number(process.env.LH_REPS ?? 5);
|
||||||
|
|
||||||
|
interface Result {
|
||||||
|
file: string;
|
||||||
|
temp: number;
|
||||||
|
rep: number;
|
||||||
|
ok: boolean;
|
||||||
|
response_chars?: number;
|
||||||
|
latency_ms?: number;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runOne(file: string, temp: number, rep: number): Promise<Result> {
|
||||||
|
try {
|
||||||
|
const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "content-type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
task_class: "scrum_review",
|
||||||
|
file_path: file,
|
||||||
|
force_mode: "codereview_lakehouse",
|
||||||
|
force_model: MODEL,
|
||||||
|
force_temperature: temp,
|
||||||
|
}),
|
||||||
|
signal: AbortSignal.timeout(180_000),
|
||||||
|
});
|
||||||
|
if (!r.ok) {
|
||||||
|
const body = await r.text().catch(() => "");
|
||||||
|
return { file, temp, rep, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 150)}` };
|
||||||
|
}
|
||||||
|
const j: any = await r.json();
|
||||||
|
return {
|
||||||
|
file, temp, rep, ok: true,
|
||||||
|
response_chars: (j.response ?? "").length,
|
||||||
|
latency_ms: j.latency_ms,
|
||||||
|
};
|
||||||
|
} catch (e: any) {
|
||||||
|
return { file, temp, rep, ok: false, error: e.message };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const total = FILES.length * TEMPS.length * REPS;
|
||||||
|
console.log(`[pass3] files=${FILES.length} × temps=${TEMPS.length} × reps=${REPS} = ${total} runs`);
|
||||||
|
console.log(`[pass3] model=${MODEL}\n`);
|
||||||
|
let i = 0;
|
||||||
|
const results: Result[] = [];
|
||||||
|
for (const file of FILES) {
|
||||||
|
for (const temp of TEMPS) {
|
||||||
|
for (let rep = 1; rep <= REPS; rep++) {
|
||||||
|
i++;
|
||||||
|
process.stdout.write(` [${i}/${total}] temp=${temp.toFixed(1)} rep=${rep}/${REPS} ${file.slice(-32).padStart(32)} ... `);
|
||||||
|
const r = await runOne(file, temp, rep);
|
||||||
|
results.push(r);
|
||||||
|
if (r.ok) {
|
||||||
|
console.log(`✓ resp=${r.response_chars} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
|
||||||
|
} else {
|
||||||
|
console.log(`✗ ${r.error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n[pass3] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded`);
|
||||||
|
|
||||||
|
// Per-file × temp variance summary (response_chars stddev as a quick
|
||||||
|
// proxy for output instability).
|
||||||
|
console.log(`\n[pass3] response_chars variance (mean ± stddev) by file × temp:`);
|
||||||
|
console.log(` ${"file".padEnd(40)} ${TEMPS.map(t => `temp=${t.toFixed(1)}`.padStart(20)).join(" ")}`);
|
||||||
|
for (const file of FILES) {
|
||||||
|
const cells = TEMPS.map(t => {
|
||||||
|
const xs = results.filter(r => r.ok && r.file === file && r.temp === t).map(r => r.response_chars ?? 0);
|
||||||
|
if (xs.length === 0) return " — ";
|
||||||
|
const mean = xs.reduce((s, x) => s + x, 0) / xs.length;
|
||||||
|
const sd = Math.sqrt(xs.reduce((s, x) => s + Math.pow(x - mean, 2), 0) / xs.length);
|
||||||
|
return `${Math.round(mean).toString().padStart(7)} ± ${Math.round(sd).toString().padEnd(6)}`.padStart(20);
|
||||||
|
}).join(" ");
|
||||||
|
console.log(` ${file.slice(0, 40).padEnd(40)} ${cells}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n[pass3] grounding variance via: bun run scripts/mode_compare.ts (look for grounded-N column drift)`);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => { console.error(e); process.exit(1); });
|
||||||
127
scripts/mode_pass4_staffing.ts
Normal file
127
scripts/mode_pass4_staffing.ts
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
#!/usr/bin/env bun
|
||||||
|
/**
|
||||||
|
* Pass 4: staffing_inference_lakehouse cross-domain validation.
|
||||||
|
*
|
||||||
|
* Runs the staffing-domain mode against synthetic fill requests.
|
||||||
|
* Validates that the modes-as-prompt-molders architecture generalizes
|
||||||
|
* beyond code review — the composer pattern (file_content + bug
|
||||||
|
* fingerprints + relevance-filtered matrix + domain framing) should
|
||||||
|
* produce grounded staffing recommendations the same way it produces
|
||||||
|
* grounded code reviews.
|
||||||
|
*
|
||||||
|
* Each fill request is posted as `file_content` (since the runner's
|
||||||
|
* shape expects file content; for staffing it's the request payload).
|
||||||
|
* file_path is set to a synthetic path under requests/ so pathway
|
||||||
|
* memory bucketing groups requests by geo+role.
|
||||||
|
*
|
||||||
|
* Usage: bun run scripts/mode_pass4_staffing.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
const GATEWAY = process.env.LH_GATEWAY ?? "http://localhost:3100";
|
||||||
|
const MODEL = process.env.LH_MODEL ?? "openai/gpt-oss-120b:free";
|
||||||
|
|
||||||
|
interface FillRequest {
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
role: string;
|
||||||
|
count: number;
|
||||||
|
deadline: string;
|
||||||
|
notes?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const REQUESTS: FillRequest[] = [
|
||||||
|
{ city: "Toledo", state: "OH", role: "Welder", count: 2, deadline: "2026-04-29", notes: "OSHA 10 required" },
|
||||||
|
{ city: "Nashville", state: "TN", role: "Forklift Operator", count: 3, deadline: "2026-05-01" },
|
||||||
|
{ city: "Chicago", state: "IL", role: "Assembler", count: 5, deadline: "2026-04-30", notes: "second shift" },
|
||||||
|
{ city: "South Bend", state: "IN", role: "Electrician", count: 1, deadline: "2026-04-28", notes: "journeyman license" },
|
||||||
|
{ city: "Murfreesboro", state: "TN", role: "Packaging Operator", count: 4, deadline: "2026-05-02" },
|
||||||
|
];
|
||||||
|
|
||||||
|
function requestToPayload(req: FillRequest): string {
|
||||||
|
return [
|
||||||
|
`# Fill Request`,
|
||||||
|
`Role: ${req.role} × ${req.count}`,
|
||||||
|
`Location: ${req.city}, ${req.state}`,
|
||||||
|
`Deadline: ${req.deadline}`,
|
||||||
|
req.notes ? `Notes: ${req.notes}` : "",
|
||||||
|
"",
|
||||||
|
"Recommend candidates from the matrix data. Cite playbook references.",
|
||||||
|
].filter(Boolean).join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Result {
|
||||||
|
req: FillRequest;
|
||||||
|
ok: boolean;
|
||||||
|
response_chars?: number;
|
||||||
|
bug_fingerprints?: number;
|
||||||
|
matrix_kept?: number;
|
||||||
|
matrix_dropped?: number;
|
||||||
|
latency_ms?: number;
|
||||||
|
error?: string;
|
||||||
|
preview?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runOne(req: FillRequest): Promise<Result> {
|
||||||
|
const payload = requestToPayload(req);
|
||||||
|
const file_path = `requests/${req.role.toLowerCase().replace(/\s+/g, "_")}_${req.city.toLowerCase().replace(/\s+/g, "_")}_${req.state}.md`;
|
||||||
|
try {
|
||||||
|
const r = await fetch(`${GATEWAY}/v1/mode/execute`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "content-type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
task_class: "staffing_inference",
|
||||||
|
file_path,
|
||||||
|
file_content: payload,
|
||||||
|
force_mode: "staffing_inference_lakehouse",
|
||||||
|
force_model: MODEL,
|
||||||
|
}),
|
||||||
|
signal: AbortSignal.timeout(180_000),
|
||||||
|
});
|
||||||
|
if (!r.ok) {
|
||||||
|
const body = await r.text().catch(() => "");
|
||||||
|
return { req, ok: false, error: `HTTP ${r.status}: ${body.slice(0, 200)}` };
|
||||||
|
}
|
||||||
|
const j: any = await r.json();
|
||||||
|
return {
|
||||||
|
req, ok: true,
|
||||||
|
response_chars: (j.response ?? "").length,
|
||||||
|
bug_fingerprints: j.sources?.bug_fingerprints_count,
|
||||||
|
matrix_kept: j.sources?.matrix_chunks_kept,
|
||||||
|
matrix_dropped: j.sources?.matrix_chunks_dropped,
|
||||||
|
latency_ms: j.latency_ms,
|
||||||
|
preview: (j.response ?? "").slice(0, 400),
|
||||||
|
};
|
||||||
|
} catch (e: any) {
|
||||||
|
return { req, ok: false, error: e.message };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log(`[pass4] requests=${REQUESTS.length} model=${MODEL} mode=staffing_inference_lakehouse\n`);
|
||||||
|
let i = 0;
|
||||||
|
const results: Result[] = [];
|
||||||
|
for (const req of REQUESTS) {
|
||||||
|
i++;
|
||||||
|
process.stdout.write(` [${i}/${REQUESTS.length}] ${req.role.padEnd(22)} × ${req.count} in ${req.city}, ${req.state} ... `);
|
||||||
|
const r = await runOne(req);
|
||||||
|
results.push(r);
|
||||||
|
if (r.ok) {
|
||||||
|
console.log(`✓ resp=${r.response_chars} bug=${r.bug_fingerprints ?? 0} mtx=${r.matrix_kept ?? 0}/${(r.matrix_kept ?? 0) + (r.matrix_dropped ?? 0)} ${((r.latency_ms ?? 0) / 1000).toFixed(1)}s`);
|
||||||
|
} else {
|
||||||
|
console.log(`✗ ${r.error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n[pass4] complete · ${results.filter(r => r.ok).length}/${results.length} succeeded\n`);
|
||||||
|
|
||||||
|
// Show first successful response head to verify the framing actually
|
||||||
|
// produced staffing-style output (verdict + ranked candidates) not
|
||||||
|
// generic prose.
|
||||||
|
const first = results.find(r => r.ok && r.preview);
|
||||||
|
if (first) {
|
||||||
|
console.log(`[pass4] first successful response preview (${first.req.city} ${first.req.role}):`);
|
||||||
|
console.log(first.preview!.split("\n").map(l => " | " + l).join("\n"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => { console.error(e); process.exit(1); });
|
||||||
Loading…
x
Reference in New Issue
Block a user