observer: KB enrichment preamble before failure-cluster escalation
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
escalateFailureClusterToLLMTeam now calls a new buildKbPreamble() that mirrors what scrum_master_pipeline does on every per-file review: queries /vectors/pathway/bug_fingerprints + /vectors/search against the lakehouse_arch_v1 corpus, then asks local qwen3.5:latest (provider=ollama) to synthesize a tight briefing. The synthesized preamble prepends the existing escalation prompt so the cloud reviewer sees historical context the same way scrum reviewers do. Reuses existing KB primitives — no new corpora, no new endpoints, no new abstractions. Same code path scrum already exercises 3+ times per review; observer joins the same compounding loop. Audit row gains kb_preamble_chars so we can later track enrichment yield per escalation. Empty preamble (both fingerprints + matrix return nothing) → empty string, prompt unchanged. Verified: qwen3.5:latest synthesis fires for every escalation with non-empty matrix hits (gateway log: 445→72 tokens, 3.1s). Matrix retrieval correctly surfaces PRD Phase 40/44 chunks for chat_completion clusters. Pathway memory stays consistent with scrum (84→87 traces); chat_completion task_class doesn't have fingerprints yet — graceful. Local-model synthesis was J's explicit ask: compress the raw bundle before the cloud call so the briefing is actionable, not a dump. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
69919d9d57
commit
d9bd4c9bdf
@ -160,6 +160,105 @@ const LLM_TEAM = process.env.LH_LLM_TEAM_URL ?? "http://localhost:5000";
|
||||
const LLM_TEAM_ESCALATIONS = "/home/profit/lakehouse/data/_kb/observer_escalations.jsonl";
|
||||
const ESCALATION_THRESHOLD = 3; // N+ failures on same sig_hash triggers
|
||||
|
||||
// ─── KB enrichment helper (2026-04-26) ────────────────────────────
|
||||
// Mirrors what scrum_master_pipeline already does on every per-file
|
||||
// review: queries pathway_memory bug fingerprints + the lakehouse_arch
|
||||
// matrix corpus, then asks qwen3.5:latest to synthesize a tight
|
||||
// briefing. We reuse the same primitives so observer escalations carry
|
||||
// the same compounding context the scrum loop builds — no new index
|
||||
// surfaces, no new corpora.
|
||||
//
|
||||
// `task_class` is derived from the cluster (most ops use the same one);
|
||||
// pathway/bug_fingerprints is permissive about a null file_path, so
|
||||
// non-code clusters (scenario fills, v1.chat events) just see broader
|
||||
// matches via task_class alone.
|
||||
//
|
||||
// Returns "" when there's no useful signal — caller treats empty as
|
||||
// "no preamble" and skips the prepend.
|
||||
async function buildKbPreamble(sigHash: string, cluster: ObservedOp[]): Promise<string> {
|
||||
const sample = cluster[0];
|
||||
const taskClass = sample?.event_kind
|
||||
?? (sample?.source === "scenario" ? "scenario_fill" : "observer_escalation");
|
||||
|
||||
// Step 1: pathway bug fingerprints. Best-effort; null filePath just
|
||||
// widens the query at the matrix-index level.
|
||||
let fingerprints: { flag: { kind: string }; pattern_key: string; example: string; occurrences: number }[] = [];
|
||||
try {
|
||||
const r = await fetch(`${LAKEHOUSE}/vectors/pathway/bug_fingerprints`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({ task_class: taskClass, file_path: null, signal_class: null, limit: 5 }),
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (r.ok) fingerprints = (await r.json() as any).fingerprints ?? [];
|
||||
} catch {}
|
||||
|
||||
// Step 2: matrix retrieval against the architectural corpus we
|
||||
// already maintain. Cluster summary is the search query.
|
||||
const clusterSummary = cluster.slice(-5).map(o =>
|
||||
`${o.endpoint ?? "?"} ${o.input_summary ?? ""} ${o.error ?? ""}`
|
||||
).join(" | ");
|
||||
let matrixChunks: { doc_id?: string; chunk_text?: string; score?: number }[] = [];
|
||||
try {
|
||||
const r = await fetch(`${LAKEHOUSE}/vectors/search`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({ index_name: "lakehouse_arch_v1", query: `${taskClass} ${clusterSummary}`, top_k: 5 }),
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (r.ok) matrixChunks = (await r.json() as any).results ?? [];
|
||||
} catch {}
|
||||
|
||||
if (fingerprints.length === 0 && matrixChunks.length === 0) return "";
|
||||
|
||||
// Step 3: synthesis via local model (qwen3.5:latest, provider=ollama).
|
||||
// Compresses the raw bundle to a 1-2 sentence briefing the cloud
|
||||
// reviewer can actually use. If local model is down/slow, fall back
|
||||
// to the raw dump rather than blocking the escalation path.
|
||||
const rawBundle = [
|
||||
fingerprints.length > 0
|
||||
? "PRIOR BUG PATTERNS (pathway memory):\n" + fingerprints.map((fp, i) =>
|
||||
`${i + 1}. [${fp.flag.kind}] ${fp.pattern_key} (×${fp.occurrences}) e.g. ${fp.example.slice(0, 120)}`
|
||||
).join("\n")
|
||||
: "",
|
||||
matrixChunks.length > 0
|
||||
? "RELATED ARCHITECTURE CONTEXT:\n" + matrixChunks.map((c, i) =>
|
||||
`${i + 1}. [${c.doc_id ?? "?"}] ${(c.chunk_text ?? "").slice(0, 200)}`
|
||||
).join("\n")
|
||||
: "",
|
||||
].filter(Boolean).join("\n\n");
|
||||
|
||||
const synthPrompt = `A failure cluster (sig_hash=${sigHash.slice(0, 8)}, ${cluster.length} occurrences, task_class=${taskClass}) is about to be escalated for diagnosis. Here are prior signals from our knowledge base:
|
||||
|
||||
${rawBundle}
|
||||
|
||||
Output a single paragraph (≤300 chars) briefing the cloud reviewer on which prior signals are most likely relevant to this cluster. If nothing matches, say so plainly. No preamble, no markdown.`;
|
||||
|
||||
let synthesized = "";
|
||||
try {
|
||||
const r = await fetch(`${LAKEHOUSE}/v1/chat`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
provider: "ollama",
|
||||
model: "qwen3.5:latest",
|
||||
messages: [{ role: "user", content: synthPrompt }],
|
||||
max_tokens: 200,
|
||||
temperature: 0.1,
|
||||
think: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (r.ok) {
|
||||
const j = await r.json() as any;
|
||||
synthesized = (j?.choices?.[0]?.message?.content ?? "").trim();
|
||||
}
|
||||
} catch {}
|
||||
|
||||
const body = synthesized.length > 0 ? synthesized : rawBundle;
|
||||
return `═══ KB CONTEXT — prior signals on this task class (synthesized by qwen3.5:latest) ═══\n${body}\n═══\n\n`;
|
||||
}
|
||||
|
||||
async function escalateFailureClusterToLLMTeam(sigHash: string, cluster: ObservedOp[]) {
|
||||
// Package the failure cluster as a single context blob. Originally
|
||||
// I routed this to LLM Team's `code_review` mode at /api/run, but
|
||||
@ -172,7 +271,8 @@ async function escalateFailureClusterToLLMTeam(sigHash: string, cluster: Observe
|
||||
const context = cluster.slice(-8).map((o, i) =>
|
||||
`[${i + 1}] endpoint=${o.endpoint} input=${o.input_summary} error=${o.error ?? "?"}`
|
||||
).join("\n");
|
||||
const prompt = `sig_hash=${sigHash} · ${cluster.length} failures on the same signature:\n\n${context}\n\nReview this failure cluster. Identify:\n1. Likely root cause (single sentence).\n2. Files most likely responsible (path hints).\n3. Concrete fix direction (under 3 sentences).\n4. Confidence: NN%\n\nBe specific, not generic.`;
|
||||
const kbPreamble = await buildKbPreamble(sigHash, cluster);
|
||||
const prompt = `${kbPreamble}sig_hash=${sigHash} · ${cluster.length} failures on the same signature:\n\n${context}\n\nReview this failure cluster. Identify:\n1. Likely root cause (single sentence).\n2. Files most likely responsible (path hints).\n3. Concrete fix direction (under 3 sentences).\n4. Confidence: NN%\n\nBe specific, not generic.`;
|
||||
|
||||
try {
|
||||
const resp = await fetch(`${LAKEHOUSE}/v1/chat`, {
|
||||
@ -209,6 +309,7 @@ async function escalateFailureClusterToLLMTeam(sigHash: string, cluster: Observe
|
||||
cluster_endpoint: cluster[0]?.endpoint,
|
||||
prompt_tokens: j?.usage?.prompt_tokens ?? 0,
|
||||
completion_tokens: j?.usage?.completion_tokens ?? 0,
|
||||
kb_preamble_chars: kbPreamble.length,
|
||||
analysis: analysis.slice(0, 4000),
|
||||
};
|
||||
const { appendFile } = await import("node:fs/promises");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user