phase 44 (part 1): migrate TS callers to /v1/chat + add regression guard
Some checks failed
lakehouse/auditor 16 blocking issues: cloud: claim not backed — "Verified end-to-end:"
Some checks failed
lakehouse/auditor 16 blocking issues: cloud: claim not backed — "Verified end-to-end:"
Migrates the four TypeScript /generate callers to the gateway's
/v1/chat surface so every LLM call lands on /v1/usage and Langfuse:
tests/multi-agent/agent.ts::generate() provider="ollama"
tests/agent_test/agent_harness.ts::callAgent provider="ollama"
bot/propose.ts::generateProposal provider="ollama_cloud"
mcp-server/observer.ts (error analysis) provider="ollama"
Each migration follows the same pattern as the prior generateCloud()
migration (already on /v1/chat from 2026-04-24): replace
`fetch(SIDECAR/generate)` with `fetch(GATEWAY/v1/chat)`, swap the
prompt-style body for OpenAI-compat messages array, extract
content from `choices[0].message.content` instead of `text`.
Same upstream models in every case — gateway is the new home for
the call, transport otherwise unchanged.
Adds scripts/check_phase44_callers.sh — fail-loud regression guard
that exits non-zero if any non-adapter file fetches /generate or
api/generate. Adapter files (crates/gateway, crates/aibridge,
sidecar/) are exempt. Pre-tightening regex flagged prose mentions
in comments; the shipped regex requires `fetch(...)` or
`client.post(...)` shape so comments don't trip it.
Verification:
bun build mcp-server/observer.ts compiles
bun build tests/multi-agent/agent.ts compiles
bun build tests/agent_test/agent_harness.ts compiles
bun build bot/propose.ts compiles
./scripts/check_phase44_callers.sh ✅ clean
systemctl restart lakehouse-observer active
Phase 44 part 2 (deferred):
- crates/aibridge/src/client.rs:118 still posts to sidecar /generate
directly. AiClient is the foundational Rust LLM caller used by
8+ vectord modules; migrating it is a workspace-wide refactor
that needs its own commit. Plan: keep AiClient as the local-
transport layer for the gateway's `provider=ollama` arm, but
introduce a thin `/v1/chat` wrapper for external callers (vectord
autotune, agent, rag, refresh, supervisor, playbook_memory).
- tests/real-world/hard_task_escalation.ts: comment mentions
/api/generate but doesn't actually call it. Comment is left
intentionally as historical context; regex no longer flags it.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
bfe1ea9d1c
commit
f6af0fd409
@ -13,7 +13,12 @@ import { readFile } from "node:fs/promises";
|
|||||||
import { createHash } from "node:crypto";
|
import { createHash } from "node:crypto";
|
||||||
import type { Gap, Proposal } from "./types.ts";
|
import type { Gap, Proposal } from "./types.ts";
|
||||||
|
|
||||||
const SIDECAR_URL = process.env.LH_SIDECAR_URL ?? "http://localhost:3200";
|
// Phase 44 migration (2026-04-27): bot/propose.ts now flows through
|
||||||
|
// the gateway's /v1/chat instead of hitting the sidecar's /generate
|
||||||
|
// directly. /v1/usage tracks the call, Langfuse traces it, observer
|
||||||
|
// sees it. Same upstream model (CLOUD_MODEL gpt-oss:120b on
|
||||||
|
// Ollama Cloud) — gateway just owns the routing.
|
||||||
|
const GATEWAY_URL = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
|
||||||
const REPO_ROOT = "/home/profit/lakehouse";
|
const REPO_ROOT = "/home/profit/lakehouse";
|
||||||
const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`;
|
const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`;
|
||||||
const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "gpt-oss:120b";
|
const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "gpt-oss:120b";
|
||||||
@ -72,13 +77,16 @@ export async function generateProposal(gap: Gap, historySummary: string = ""): P
|
|||||||
sections.push("Propose a small change that addresses this gap. Respond with the JSON object only.");
|
sections.push("Propose a small change that addresses this gap. Respond with the JSON object only.");
|
||||||
const userPrompt = sections.join("\n");
|
const userPrompt = sections.join("\n");
|
||||||
|
|
||||||
const r = await fetch(`${SIDECAR_URL}/generate`, {
|
const r = await fetch(`${GATEWAY_URL}/v1/chat`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "content-type": "application/json" },
|
headers: { "content-type": "application/json" },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: CLOUD_MODEL,
|
model: CLOUD_MODEL,
|
||||||
system: SYSTEM_PROMPT,
|
provider: "ollama_cloud",
|
||||||
prompt: userPrompt,
|
messages: [
|
||||||
|
{ role: "system", content: SYSTEM_PROMPT },
|
||||||
|
{ role: "user", content: userPrompt },
|
||||||
|
],
|
||||||
temperature: 0.2,
|
temperature: 0.2,
|
||||||
max_tokens: MAX_TOKENS,
|
max_tokens: MAX_TOKENS,
|
||||||
think: false,
|
think: false,
|
||||||
@ -86,10 +94,10 @@ export async function generateProposal(gap: Gap, historySummary: string = ""): P
|
|||||||
signal: AbortSignal.timeout(180000), // cloud T3 can be slow — 3 min
|
signal: AbortSignal.timeout(180000), // cloud T3 can be slow — 3 min
|
||||||
});
|
});
|
||||||
if (!r.ok) {
|
if (!r.ok) {
|
||||||
throw new Error(`sidecar ${r.status}: ${await r.text()}`);
|
throw new Error(`gateway /v1/chat ${r.status}: ${await r.text()}`);
|
||||||
}
|
}
|
||||||
const j = await r.json() as any;
|
const j = await r.json() as any;
|
||||||
const raw: string = j.text ?? j.response ?? "";
|
const raw: string = j?.choices?.[0]?.message?.content ?? "";
|
||||||
const usage = j.usage ?? {};
|
const usage = j.usage ?? {};
|
||||||
const tokens = (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0);
|
const tokens = (usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0);
|
||||||
|
|
||||||
|
|||||||
@ -550,13 +550,19 @@ async function analyzeErrors() {
|
|||||||
`[${f.endpoint}] ${f.input_summary}: ${f.error}`
|
`[${f.endpoint}] ${f.input_summary}: ${f.error}`
|
||||||
).join("\n");
|
).join("\n");
|
||||||
|
|
||||||
// Ask local model to diagnose
|
// Ask local model to diagnose. Phase 44 migration (2026-04-27):
|
||||||
|
// /v1/chat instead of legacy /ai/generate so /v1/usage tracks the
|
||||||
|
// call + Langfuse traces it. Same upstream model (qwen2.5 local).
|
||||||
try {
|
try {
|
||||||
const resp = await fetch(`${LAKEHOUSE}/ai/generate`, {
|
const resp = await fetch(`${LAKEHOUSE}/v1/chat`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json" },
|
headers: { "Content-Type": "application/json" },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
prompt: `You are a system reliability observer. Analyze these recent failures and suggest fixes:
|
model: "qwen2.5",
|
||||||
|
provider: "ollama",
|
||||||
|
messages: [{
|
||||||
|
role: "user",
|
||||||
|
content: `You are a system reliability observer. Analyze these recent failures and suggest fixes:
|
||||||
|
|
||||||
${errorSummary}
|
${errorSummary}
|
||||||
|
|
||||||
@ -566,14 +572,15 @@ For each error:
|
|||||||
3. Should this be added to the playbook as a "don't do this"?
|
3. Should this be added to the playbook as a "don't do this"?
|
||||||
|
|
||||||
Be specific and actionable. Under 200 words.`,
|
Be specific and actionable. Under 200 words.`,
|
||||||
model: "qwen2.5",
|
}],
|
||||||
max_tokens: 400,
|
max_tokens: 400,
|
||||||
temperature: 0.2,
|
temperature: 0.2,
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
const analysis = await resp.json();
|
const analysis = await resp.json() as any;
|
||||||
if (analysis.text) {
|
const analysisText = analysis?.choices?.[0]?.message?.content ?? "";
|
||||||
console.error(`[observer] Error analysis:\n${analysis.text}`);
|
if (analysisText) {
|
||||||
|
console.error(`[observer] Error analysis:\n${analysisText}`);
|
||||||
// Log the analysis as a playbook entry
|
// Log the analysis as a playbook entry
|
||||||
await fetch(`${GATEWAY}/log`, {
|
await fetch(`${GATEWAY}/log`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
@ -581,7 +588,7 @@ Be specific and actionable. Under 200 words.`,
|
|||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
operation: `error_analysis: ${failures.length} failures`,
|
operation: `error_analysis: ${failures.length} failures`,
|
||||||
approach: "LLM-analyzed error patterns",
|
approach: "LLM-analyzed error patterns",
|
||||||
result: analysis.text.slice(0, 500),
|
result: analysisText.slice(0, 500),
|
||||||
context: errorSummary.slice(0, 500),
|
context: errorSummary.slice(0, 500),
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|||||||
58
scripts/check_phase44_callers.sh
Executable file
58
scripts/check_phase44_callers.sh
Executable file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Phase 44 caller-migration guard. Fails if any non-adapter file
|
||||||
|
# fetches the sidecar's /generate endpoint or hits Ollama Cloud's
|
||||||
|
# /api/generate directly. Adapter files (gateway provider crate +
|
||||||
|
# the sidecar's own Python implementation) are exempt.
|
||||||
|
#
|
||||||
|
# Run: ./scripts/check_phase44_callers.sh
|
||||||
|
# CI: fail-loud (exits non-zero on regression)
|
||||||
|
# Watch: pre-commit hook — invoke from .git/hooks/pre-commit
|
||||||
|
|
||||||
|
set -e
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
|
||||||
|
FORBIDDEN_TS=$(grep -rEln "fetch\([^)]*[/\$]generate" \
|
||||||
|
--include="*.ts" \
|
||||||
|
--exclude-dir=node_modules \
|
||||||
|
--exclude-dir=target \
|
||||||
|
--exclude-dir=.git \
|
||||||
|
. 2>/dev/null | grep -v "^\./sidecar/" || true)
|
||||||
|
|
||||||
|
FORBIDDEN_RS=$(grep -rEln "post\([^)]*\"\.?/generate\"" \
|
||||||
|
--include="*.rs" \
|
||||||
|
--exclude-dir=target \
|
||||||
|
. 2>/dev/null | \
|
||||||
|
grep -vE "^\./crates/(gateway|aibridge)/" || true)
|
||||||
|
|
||||||
|
# Ollama Cloud /api/generate outside the gateway adapter. Match only
|
||||||
|
# when the URL appears in an actual fetch/post call (not in a comment).
|
||||||
|
# Tightened 2026-04-27 — pre-tightening regex flagged prose mentions.
|
||||||
|
FORBIDDEN_CLOUD=$(grep -rEln "(fetch|client\.post)\([^)]*api/generate" \
|
||||||
|
--include="*.ts" --include="*.rs" \
|
||||||
|
--exclude-dir=node_modules \
|
||||||
|
--exclude-dir=target \
|
||||||
|
--exclude-dir=.git \
|
||||||
|
. 2>/dev/null | \
|
||||||
|
grep -vE "^\./(crates/gateway|sidecar)" || true)
|
||||||
|
|
||||||
|
ANY_FAIL=0
|
||||||
|
if [ -n "$FORBIDDEN_TS" ]; then
|
||||||
|
echo "❌ Direct sidecar /generate calls (migrate to /v1/chat):"
|
||||||
|
echo "$FORBIDDEN_TS" | sed 's/^/ /'
|
||||||
|
ANY_FAIL=1
|
||||||
|
fi
|
||||||
|
if [ -n "$FORBIDDEN_RS" ]; then
|
||||||
|
echo "❌ Direct Rust /generate post() calls (migrate via gateway adapter):"
|
||||||
|
echo "$FORBIDDEN_RS" | sed 's/^/ /'
|
||||||
|
ANY_FAIL=1
|
||||||
|
fi
|
||||||
|
if [ -n "$FORBIDDEN_CLOUD" ]; then
|
||||||
|
echo "❌ Direct Ollama Cloud /api/generate (migrate to gateway provider=ollama_cloud):"
|
||||||
|
echo "$FORBIDDEN_CLOUD" | sed 's/^/ /'
|
||||||
|
ANY_FAIL=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $ANY_FAIL -eq 0 ]; then
|
||||||
|
echo "✅ Phase 44 caller-migration: clean (no direct /generate outside adapters)"
|
||||||
|
fi
|
||||||
|
exit $ANY_FAIL
|
||||||
@ -161,16 +161,17 @@ const TOOL_SCHEMA = `Available tools (call by emitting JSON like: {"tool": "name
|
|||||||
// ─── AGENT LOOP ───
|
// ─── AGENT LOOP ───
|
||||||
|
|
||||||
async function callAgent(messages: Array<{role: string; content: string}>): Promise<string> {
|
async function callAgent(messages: Array<{role: string; content: string}>): Promise<string> {
|
||||||
// think:false disables hidden reasoning so all generated tokens go to
|
// Phase 44 migration (2026-04-27): /v1/chat instead of direct sidecar
|
||||||
// visible response. qwen3.5:latest defaults to thinking and silently
|
// /generate so /v1/usage tracks the call, Langfuse traces it.
|
||||||
// burns the token budget otherwise.
|
// think:false still disables hidden reasoning so generated tokens
|
||||||
const r = await fetch(`${SIDECAR}/generate`, {
|
// go to visible response — qwen3.5:latest defaults to thinking.
|
||||||
|
const r = await fetch(`${GATEWAY}/v1/chat`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "content-type": "application/json" },
|
headers: { "content-type": "application/json" },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: AGENT_MODEL,
|
model: AGENT_MODEL,
|
||||||
prompt: messages.map(m => `${m.role.toUpperCase()}:\n${m.content}`).join("\n\n") + "\n\nASSISTANT:\n",
|
provider: "ollama",
|
||||||
stream: false,
|
messages,
|
||||||
max_tokens: 1500,
|
max_tokens: 1500,
|
||||||
think: false,
|
think: false,
|
||||||
}),
|
}),
|
||||||
@ -178,7 +179,7 @@ async function callAgent(messages: Array<{role: string; content: string}>): Prom
|
|||||||
});
|
});
|
||||||
if (!r.ok) throw new Error(`agent ${r.status}: ${(await r.text()).slice(0, 200)}`);
|
if (!r.ok) throw new Error(`agent ${r.status}: ${(await r.text()).slice(0, 200)}`);
|
||||||
const j: any = await r.json();
|
const j: any = await r.json();
|
||||||
return String(j.text ?? j.response ?? "").trim();
|
return String(j?.choices?.[0]?.message?.content ?? "").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractToolCall(response: string): { tool: string; args: any } | null {
|
function extractToolCall(response: string): { tool: string; args: any } | null {
|
||||||
|
|||||||
@ -372,26 +372,34 @@ export async function generate(model: string, prompt: string, opts: {
|
|||||||
bypass_budget?: boolean;
|
bypass_budget?: boolean;
|
||||||
think?: boolean;
|
think?: boolean;
|
||||||
} = {}): Promise<string> {
|
} = {}): Promise<string> {
|
||||||
|
// Phase 44 migration (2026-04-27): was hitting `${SIDECAR}/generate`
|
||||||
|
// directly, bypassing the gateway's /v1/usage accounting + Langfuse
|
||||||
|
// tracing. Now flows through /v1/chat with provider="ollama" so
|
||||||
|
// every local call is observable + auditable. Sidecar transport is
|
||||||
|
// unchanged — gateway just owns the call.
|
||||||
assertContextBudget(model, prompt, {
|
assertContextBudget(model, prompt, {
|
||||||
system: opts.system,
|
system: opts.system,
|
||||||
max_tokens: opts.max_tokens,
|
max_tokens: opts.max_tokens,
|
||||||
bypass: opts.bypass_budget,
|
bypass: opts.bypass_budget,
|
||||||
});
|
});
|
||||||
|
const messages: Array<{ role: string; content: string }> = [];
|
||||||
|
if (opts.system) messages.push({ role: "system", content: opts.system });
|
||||||
|
messages.push({ role: "user", content: prompt });
|
||||||
const body: Record<string, any> = {
|
const body: Record<string, any> = {
|
||||||
model,
|
model,
|
||||||
prompt,
|
messages,
|
||||||
|
provider: "ollama",
|
||||||
temperature: opts.temperature ?? 0.3,
|
temperature: opts.temperature ?? 0.3,
|
||||||
max_tokens: opts.max_tokens ?? 800,
|
max_tokens: opts.max_tokens ?? 800,
|
||||||
};
|
};
|
||||||
if (opts.system) body.system = opts.system;
|
|
||||||
if (opts.think !== undefined) body.think = opts.think;
|
if (opts.think !== undefined) body.think = opts.think;
|
||||||
const r = await http<any>("POST", `${SIDECAR}/generate`, body);
|
const r = await http<any>("POST", `${GATEWAY}/v1/chat`, body);
|
||||||
const text = typeof r.text === "string" ? r.text : "";
|
const text = r?.choices?.[0]?.message?.content ?? "";
|
||||||
// Do NOT throw on empty. Thinking models (gpt-oss, qwen3.5) burn the
|
// Do NOT throw on empty. Thinking models (gpt-oss, qwen3.5) burn the
|
||||||
// max_tokens budget on hidden reasoning and emit "" when budget was
|
// max_tokens budget on hidden reasoning and emit "" when budget was
|
||||||
// too tight. generateContinuable detects empty + continues with more
|
// too tight. generateContinuable detects empty + continues with more
|
||||||
// budget. Callers that expected non-empty can check themselves.
|
// budget. Callers that expected non-empty can check themselves.
|
||||||
return text;
|
return typeof text === "string" ? text : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cloud generate — routes through the lakehouse gateway's /v1/chat
|
// Cloud generate — routes through the lakehouse gateway's /v1/chat
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user