Compare commits
2 Commits
f4dc1b29e3
...
8de94eba08
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8de94eba08 | ||
|
|
d475fc7fff |
@ -16,12 +16,14 @@ import type { Gap, Proposal } from "./types.ts";
|
|||||||
// Phase 44 migration (2026-04-27): bot/propose.ts now flows through
|
// Phase 44 migration (2026-04-27): bot/propose.ts now flows through
|
||||||
// the gateway's /v1/chat instead of hitting the sidecar's /generate
|
// the gateway's /v1/chat instead of hitting the sidecar's /generate
|
||||||
// directly. /v1/usage tracks the call, Langfuse traces it, observer
|
// directly. /v1/usage tracks the call, Langfuse traces it, observer
|
||||||
// sees it. Same upstream model (CLOUD_MODEL gpt-oss:120b on
|
// sees it. Gateway owns the routing.
|
||||||
// Ollama Cloud) — gateway just owns the routing.
|
//
|
||||||
|
// 2026-04-28: gpt-oss:120b → deepseek-v3.2 via Ollama Pro. Newer
|
||||||
|
// DeepSeek revision, faster, still on the same OLLAMA_CLOUD_KEY.
|
||||||
const GATEWAY_URL = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
|
const GATEWAY_URL = process.env.LH_GATEWAY_URL ?? "http://localhost:3100";
|
||||||
const REPO_ROOT = "/home/profit/lakehouse";
|
const REPO_ROOT = "/home/profit/lakehouse";
|
||||||
const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`;
|
const PRD_PATH = `${REPO_ROOT}/docs/PRD.md`;
|
||||||
const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "gpt-oss:120b";
|
const CLOUD_MODEL = process.env.LH_BOT_MODEL ?? "deepseek-v3.2";
|
||||||
const MAX_TOKENS = 6000;
|
const MAX_TOKENS = 6000;
|
||||||
|
|
||||||
export async function findGaps(): Promise<Gap[]> {
|
export async function findGaps(): Promise<Gap[]> {
|
||||||
|
|||||||
@ -44,7 +44,10 @@ name = "staffing_inference"
|
|||||||
# pattern generalizes beyond code review.
|
# pattern generalizes beyond code review.
|
||||||
preferred_mode = "staffing_inference_lakehouse"
|
preferred_mode = "staffing_inference_lakehouse"
|
||||||
fallback_modes = ["ladder", "consensus", "pipeline"]
|
fallback_modes = ["ladder", "consensus", "pipeline"]
|
||||||
default_model = "openai/gpt-oss-120b:free"
|
# 2026-04-28: gpt-oss-120b:free → kimi-k2.6 via Ollama Pro. Coding-
|
||||||
|
# specialized, faster than gpt-oss, on the same OLLAMA_CLOUD_KEY so
|
||||||
|
# no extra provider hop.
|
||||||
|
default_model = "kimi-k2.6"
|
||||||
matrix_corpus = "workers_500k_v8"
|
matrix_corpus = "workers_500k_v8"
|
||||||
|
|
||||||
[[task_class]]
|
[[task_class]]
|
||||||
@ -58,7 +61,9 @@ matrix_corpus = "kb_team_runs_v1"
|
|||||||
name = "doc_drift_check"
|
name = "doc_drift_check"
|
||||||
preferred_mode = "drift"
|
preferred_mode = "drift"
|
||||||
fallback_modes = ["validator"]
|
fallback_modes = ["validator"]
|
||||||
default_model = "gpt-oss:120b"
|
# 2026-04-28: gpt-oss:120b → gemini-3-flash-preview via Ollama Pro.
|
||||||
|
# Speed leader on factual checking, same OLLAMA_CLOUD_KEY.
|
||||||
|
default_model = "gemini-3-flash-preview"
|
||||||
matrix_corpus = "distilled_factual_v20260423095819"
|
matrix_corpus = "distilled_factual_v20260423095819"
|
||||||
|
|
||||||
[[task_class]]
|
[[task_class]]
|
||||||
|
|||||||
@ -27,10 +27,15 @@ name = "ollama_cloud"
|
|||||||
base_url = "https://ollama.com"
|
base_url = "https://ollama.com"
|
||||||
auth = "bearer"
|
auth = "bearer"
|
||||||
auth_env = "OLLAMA_CLOUD_KEY"
|
auth_env = "OLLAMA_CLOUD_KEY"
|
||||||
default_model = "gpt-oss:120b"
|
default_model = "deepseek-v3.2"
|
||||||
# Cloud-tier Ollama. Key resolved from OLLAMA_CLOUD_KEY env at gateway
|
# Cloud-tier Ollama (Pro plan as of 2026-04-28). Key resolved from
|
||||||
# boot. Model-prefix routing: "cloud/<model>" auto-routes here
|
# OLLAMA_CLOUD_KEY at gateway boot; Pro tier upgraded the account so
|
||||||
# (see gateway::v1::resolve_provider).
|
# rate limits + model access widen without a key change. Model-prefix
|
||||||
|
# routing: "cloud/<model>" auto-routes here. 39-model fleet now
|
||||||
|
# includes deepseek-v3.2, deepseek-v4-{flash,pro}, gemini-3-flash-
|
||||||
|
# preview, glm-{5,5.1}, kimi-k2.6, qwen3-coder-next.
|
||||||
|
# 2026-04-28: default upgraded gpt-oss:120b → deepseek-v3.2 (newest
|
||||||
|
# DeepSeek revision; kimi-k2:1t still upstream-broken with HTTP 500).
|
||||||
|
|
||||||
[[provider]]
|
[[provider]]
|
||||||
name = "openrouter"
|
name = "openrouter"
|
||||||
@ -38,7 +43,7 @@ base_url = "https://openrouter.ai/api/v1"
|
|||||||
auth = "bearer"
|
auth = "bearer"
|
||||||
auth_env = "OPENROUTER_API_KEY"
|
auth_env = "OPENROUTER_API_KEY"
|
||||||
auth_fallback_files = ["/home/profit/.env", "/root/llm_team_config.json"]
|
auth_fallback_files = ["/home/profit/.env", "/root/llm_team_config.json"]
|
||||||
default_model = "openai/gpt-oss-120b:free"
|
default_model = "x-ai/grok-4.1-fast"
|
||||||
# Multi-provider gateway. Covers Anthropic, Google, OpenAI, MiniMax,
|
# Multi-provider gateway. Covers Anthropic, Google, OpenAI, MiniMax,
|
||||||
# Qwen, Gemma, etc. Key resolved via crates/gateway/src/v1/openrouter.rs
|
# Qwen, Gemma, etc. Key resolved via crates/gateway/src/v1/openrouter.rs
|
||||||
# resolve_openrouter_key() — env first, then fallback files.
|
# resolve_openrouter_key() — env first, then fallback files.
|
||||||
|
|||||||
@ -582,10 +582,10 @@ impl ExecutionLoop {
|
|||||||
/// Phase 20 step (8) — T3 overseer escalation.
|
/// Phase 20 step (8) — T3 overseer escalation.
|
||||||
///
|
///
|
||||||
/// When the local executor/reviewer loop can't self-correct, call
|
/// When the local executor/reviewer loop can't self-correct, call
|
||||||
/// the cloud overseer (`gpt-oss:120b` via Ollama Cloud) with (a)
|
/// the cloud overseer (`claude-opus-4-7` via OpenCode Zen) with
|
||||||
/// the KB context — recent outcomes + prior corrections for this
|
/// (a) the KB context — recent outcomes + prior corrections for
|
||||||
/// sig_hash + task_class, across every profile that has run it —
|
/// this sig_hash + task_class, across every profile that has run
|
||||||
/// and (b) the recent log tail. Its output is appended as a
|
/// it — and (b) the recent log tail. Its output is appended as a
|
||||||
/// `system` role turn so the next executor generation sees it,
|
/// `system` role turn so the next executor generation sees it,
|
||||||
/// AND written to `data/_kb/overseer_corrections.jsonl` so every
|
/// AND written to `data/_kb/overseer_corrections.jsonl` so every
|
||||||
/// future profile activation reads from the same learning pool.
|
/// future profile activation reads from the same learning pool.
|
||||||
@ -593,9 +593,16 @@ impl ExecutionLoop {
|
|||||||
/// This is the "pipe to the overviewer" piece from 2026-04-23 —
|
/// This is the "pipe to the overviewer" piece from 2026-04-23 —
|
||||||
/// the overseer is now a first-class KB consumer AND producer, not
|
/// the overseer is now a first-class KB consumer AND producer, not
|
||||||
/// a one-shot correction oracle.
|
/// a one-shot correction oracle.
|
||||||
|
///
|
||||||
|
/// 2026-04-28: routed through OpenCode (Zen tier) for Claude Opus
|
||||||
|
/// 4.7. Frontier reasoning matters here because the overseer fires
|
||||||
|
/// only after local self-correction has failed twice — by that
|
||||||
|
/// point we need the strongest reasoning available, not the
|
||||||
|
/// cheapest token. Frequency is low so the Zen pay-per-token cost
|
||||||
|
/// stays bounded.
|
||||||
async fn escalate_to_overseer(&mut self, turn: u32, reason: &str) -> Result<(), String> {
|
async fn escalate_to_overseer(&mut self, turn: u32, reason: &str) -> Result<(), String> {
|
||||||
let Some(cloud_key) = self.state.ollama_cloud_key.clone() else {
|
let Some(opencode_key) = self.state.opencode_key.clone() else {
|
||||||
return Err("OLLAMA_CLOUD_KEY not configured — skipping escalation".into());
|
return Err("OPENCODE_API_KEY not configured — skipping escalation".into());
|
||||||
};
|
};
|
||||||
|
|
||||||
let kb = KbContext::load_for(&sig_hash(&self.req), &self.req.task_class).await;
|
let kb = KbContext::load_for(&sig_hash(&self.req), &self.req.task_class).await;
|
||||||
@ -604,16 +611,18 @@ impl ExecutionLoop {
|
|||||||
let started = std::time::Instant::now();
|
let started = std::time::Instant::now();
|
||||||
let start_time = chrono::Utc::now();
|
let start_time = chrono::Utc::now();
|
||||||
let chat_req = crate::v1::ChatRequest {
|
let chat_req = crate::v1::ChatRequest {
|
||||||
model: "gpt-oss:120b".to_string(),
|
model: "claude-opus-4-7".to_string(),
|
||||||
messages: vec![crate::v1::Message::new_text("user", prompt.clone())],
|
messages: vec![crate::v1::Message::new_text("user", prompt.clone())],
|
||||||
temperature: Some(0.1),
|
temperature: Some(0.1),
|
||||||
max_tokens: None,
|
max_tokens: None,
|
||||||
stream: Some(false),
|
stream: Some(false),
|
||||||
think: Some(true), // overseer KEEPS thinking (Phase 20 rule)
|
// Anthropic models on opencode reject `think` (handled in
|
||||||
provider: Some("ollama_cloud".into()),
|
// the adapter), but we keep the intent flag for parity.
|
||||||
|
think: Some(true),
|
||||||
|
provider: Some("opencode".into()),
|
||||||
};
|
};
|
||||||
let resp = crate::v1::ollama_cloud::chat(&cloud_key, &chat_req).await
|
let resp = crate::v1::opencode::chat(&opencode_key, &chat_req).await
|
||||||
.map_err(|e| format!("ollama_cloud: {e}"))?;
|
.map_err(|e| format!("opencode: {e}"))?;
|
||||||
let latency_ms = started.elapsed().as_millis() as u64;
|
let latency_ms = started.elapsed().as_millis() as u64;
|
||||||
let end_time = chrono::Utc::now();
|
let end_time = chrono::Utc::now();
|
||||||
let correction_text: String = resp.choices.into_iter().next()
|
let correction_text: String = resp.choices.into_iter().next()
|
||||||
@ -633,8 +642,8 @@ impl ExecutionLoop {
|
|||||||
if let Some(lf) = &self.state.langfuse {
|
if let Some(lf) = &self.state.langfuse {
|
||||||
use crate::v1::langfuse_trace::ChatTrace;
|
use crate::v1::langfuse_trace::ChatTrace;
|
||||||
lf.emit_chat(ChatTrace {
|
lf.emit_chat(ChatTrace {
|
||||||
provider: "ollama_cloud".into(),
|
provider: "opencode".into(),
|
||||||
model: "gpt-oss:120b".into(),
|
model: "claude-opus-4-7".into(),
|
||||||
input: vec![crate::v1::Message::new_text("user", prompt.clone())],
|
input: vec![crate::v1::Message::new_text("user", prompt.clone())],
|
||||||
output: correction_text.clone(),
|
output: correction_text.clone(),
|
||||||
prompt_tokens: resp.usage.prompt_tokens,
|
prompt_tokens: resp.usage.prompt_tokens,
|
||||||
@ -650,7 +659,7 @@ impl ExecutionLoop {
|
|||||||
|
|
||||||
// Append to the transcript so the next executor turn sees it.
|
// Append to the transcript so the next executor turn sees it.
|
||||||
self.append(LogEntry::new(
|
self.append(LogEntry::new(
|
||||||
turn, "system", "gpt-oss:120b", "overseer_correction",
|
turn, "system", "claude-opus-4-7", "overseer_correction",
|
||||||
serde_json::json!({
|
serde_json::json!({
|
||||||
"reason": reason,
|
"reason": reason,
|
||||||
"correction": correction_text,
|
"correction": correction_text,
|
||||||
@ -672,7 +681,7 @@ impl ExecutionLoop {
|
|||||||
"task_class": self.req.task_class,
|
"task_class": self.req.task_class,
|
||||||
"operation": self.req.operation,
|
"operation": self.req.operation,
|
||||||
"reason": reason,
|
"reason": reason,
|
||||||
"model": "gpt-oss:120b",
|
"model": "claude-opus-4-7",
|
||||||
"correction": correction_text,
|
"correction": correction_text,
|
||||||
"applied_at_turn": turn,
|
"applied_at_turn": turn,
|
||||||
"kb_context_used": kb,
|
"kb_context_used": kb,
|
||||||
|
|||||||
@ -163,7 +163,11 @@ pub async fn query(
|
|||||||
// production caller of the Phase 21 primitives — see audit finding
|
// production caller of the Phase 21 primitives — see audit finding
|
||||||
// "Phase 21 Rust primitives are wired but not CALLED by any
|
// "Phase 21 Rust primitives are wired but not CALLED by any
|
||||||
// production surface" from 2026-04-21.
|
// production surface" from 2026-04-21.
|
||||||
let mut cont_opts = ContinuableOpts::new("qwen2.5:latest");
|
// 2026-04-30 model bump: qwen2.5:latest → qwen3.5:latest to match
|
||||||
|
// the small-model-pipeline local-tier default. Same JSON-clean
|
||||||
|
// property, more capacity. think=Some(false) preserved — RAG hot
|
||||||
|
// path doesn't need reasoning traces; direct answers only.
|
||||||
|
let mut cont_opts = ContinuableOpts::new("qwen3.5:latest");
|
||||||
cont_opts.max_tokens = Some(512);
|
cont_opts.max_tokens = Some(512);
|
||||||
cont_opts.temperature = Some(0.2);
|
cont_opts.temperature = Some(0.2);
|
||||||
cont_opts.shape = ResponseShape::Text;
|
cont_opts.shape = ResponseShape::Text;
|
||||||
@ -176,7 +180,7 @@ pub async fn query(
|
|||||||
// echoes whatever Ollama loaded). Use the configured tier model
|
// echoes whatever Ollama loaded). Use the configured tier model
|
||||||
// for now; if RAG needs to report the actual resolved model,
|
// for now; if RAG needs to report the actual resolved model,
|
||||||
// the runner can add a post-call ps probe later.
|
// the runner can add a post-call ps probe later.
|
||||||
model: "qwen2.5:latest".to_string(),
|
model: "qwen3.5:latest".to_string(),
|
||||||
sources: results,
|
sources: results,
|
||||||
tokens_generated: None,
|
tokens_generated: None,
|
||||||
})
|
})
|
||||||
|
|||||||
@ -48,8 +48,13 @@ url = "http://localhost:3200"
|
|||||||
|
|
||||||
[ai]
|
[ai]
|
||||||
embed_model = "nomic-embed-text"
|
embed_model = "nomic-embed-text"
|
||||||
gen_model = "qwen2.5"
|
# Local-tier defaults bumped 2026-04-30: qwen3.5:latest is the
|
||||||
rerank_model = "qwen2.5"
|
# stronger local rung in the 5-loop substrate (per
|
||||||
|
# project_small_model_pipeline_vision.md). Same JSON-clean property
|
||||||
|
# as qwen2.5, more capacity. Ollama still serves both — bump back
|
||||||
|
# in this file if a workload regressed.
|
||||||
|
gen_model = "qwen3.5:latest"
|
||||||
|
rerank_model = "qwen3.5:latest"
|
||||||
|
|
||||||
[auth]
|
[auth]
|
||||||
enabled = false
|
enabled = false
|
||||||
@ -72,7 +77,9 @@ min_recall = 0.9 # never promote below this
|
|||||||
max_trials_per_hour = 20 # hard budget cap
|
max_trials_per_hour = 20 # hard budget cap
|
||||||
|
|
||||||
# Model roster — available for profile hot-swap
|
# Model roster — available for profile hot-swap
|
||||||
|
# qwen3.5:latest: stronger local rung — JSON-clean, 8K+ context,
|
||||||
|
# default for gen_model and rerank_model
|
||||||
# qwen3: 8.2B, 40K context, thinking+tools, best for reasoning tasks
|
# qwen3: 8.2B, 40K context, thinking+tools, best for reasoning tasks
|
||||||
# qwen2.5: 7B, 8K context, fast, good for SQL generation
|
# qwen2.5: 7B, 8K context, fast — kept loaded for the 2026-04 era
|
||||||
# mistral: 7B, 8K context, good for general generation
|
# comparison runs; new defaults use qwen3.5:latest
|
||||||
# nomic-embed-text: 137M, embedding-only, used by all profiles
|
# nomic-embed-text: 137M, embedding-only, used by all profiles
|
||||||
|
|||||||
@ -313,9 +313,9 @@ ${(buckets as any[] || []).map((b: any) => `- ${b.name}: ${b.backend} (${b.reach
|
|||||||
- Ollama: :11434
|
- Ollama: :11434
|
||||||
|
|
||||||
## Available Models
|
## Available Models
|
||||||
|
- qwen3.5:latest: stronger local rung, JSON-clean (default for gen + rerank)
|
||||||
- qwen3: 8.2B, 40K context, thinking+tools (best for reasoning)
|
- qwen3: 8.2B, 40K context, thinking+tools (best for reasoning)
|
||||||
- qwen2.5: 7B, 8K context (best for fast SQL generation)
|
- qwen2.5: 7B, 8K context (legacy — 2026-04 era comparison runs only)
|
||||||
- mistral: 7B, 8K context (general generation)
|
|
||||||
- nomic-embed-text: 137M (embedding, automatic)
|
- nomic-embed-text: 137M (embedding, automatic)
|
||||||
`;
|
`;
|
||||||
return { contents: [{ uri: uri.href, mimeType: "text/plain", text }] };
|
return { contents: [{ uri: uri.href, mimeType: "text/plain", text }] };
|
||||||
|
|||||||
@ -146,15 +146,16 @@ async function persistOp(op: ObservedOp) {
|
|||||||
// ─── LLM Team escalation (code_review mode) ───
|
// ─── LLM Team escalation (code_review mode) ───
|
||||||
//
|
//
|
||||||
// When recent failures on a single sig_hash cross a threshold the
|
// When recent failures on a single sig_hash cross a threshold the
|
||||||
// local qwen2.5 analysis is probably insufficient. J's 2026-04-24
|
// local-model analysis is probably insufficient. J's 2026-04-24
|
||||||
// direction: "the observer would trigger to give more context" —
|
// direction: "the observer would trigger to give more context" —
|
||||||
// route failure clusters to LLM Team's specialized code_review mode
|
// route failure clusters to LLM Team's specialized code_review mode
|
||||||
// (via /api/run) so richer structured signal lands in the KB for
|
// (via /api/run) so richer structured signal lands in the KB for
|
||||||
// scrum + auditor + playbook memory to consume next pass.
|
// scrum + auditor + playbook memory to consume next pass.
|
||||||
//
|
//
|
||||||
// Non-destructive: runs in parallel to the existing qwen2.5 analysis,
|
// Non-destructive: runs in parallel to the existing local diagnose
|
||||||
// never replaces it. Writes to data/_kb/observer_escalations.jsonl
|
// call (qwen3.5:latest after the 2026-04-30 bump), never replaces
|
||||||
// as a dedicated audit surface.
|
// it. Writes to data/_kb/observer_escalations.jsonl as a dedicated
|
||||||
|
// audit surface.
|
||||||
|
|
||||||
const LLM_TEAM = process.env.LH_LLM_TEAM_URL ?? "http://localhost:5000";
|
const LLM_TEAM = process.env.LH_LLM_TEAM_URL ?? "http://localhost:5000";
|
||||||
const LLM_TEAM_ESCALATIONS = "/home/profit/lakehouse/data/_kb/observer_escalations.jsonl";
|
const LLM_TEAM_ESCALATIONS = "/home/profit/lakehouse/data/_kb/observer_escalations.jsonl";
|
||||||
@ -542,7 +543,7 @@ async function analyzeErrors() {
|
|||||||
if (failures.length === 0) return;
|
if (failures.length === 0) return;
|
||||||
|
|
||||||
// NEW 2026-04-24: escalate recurring sig_hash clusters to LLM Team
|
// NEW 2026-04-24: escalate recurring sig_hash clusters to LLM Team
|
||||||
// code_review mode. Runs in parallel to the local qwen2.5 analysis
|
// code_review mode. Runs in parallel to the local diagnose call
|
||||||
// below — non-blocking, richer downstream signal for scrum/auditor.
|
// below — non-blocking, richer downstream signal for scrum/auditor.
|
||||||
maybeEscalate(failures).catch(() => {});
|
maybeEscalate(failures).catch(() => {});
|
||||||
|
|
||||||
@ -552,13 +553,14 @@ async function analyzeErrors() {
|
|||||||
|
|
||||||
// Ask local model to diagnose. Phase 44 migration (2026-04-27):
|
// Ask local model to diagnose. Phase 44 migration (2026-04-27):
|
||||||
// /v1/chat instead of legacy /ai/generate so /v1/usage tracks the
|
// /v1/chat instead of legacy /ai/generate so /v1/usage tracks the
|
||||||
// call + Langfuse traces it. Same upstream model (qwen2.5 local).
|
// call + Langfuse traces it. 2026-04-30 model bump: qwen2.5 →
|
||||||
|
// qwen3.5:latest to match the small-model-pipeline local-tier default.
|
||||||
try {
|
try {
|
||||||
const resp = await fetch(`${LAKEHOUSE}/v1/chat`, {
|
const resp = await fetch(`${LAKEHOUSE}/v1/chat`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json" },
|
headers: { "Content-Type": "application/json" },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: "qwen2.5",
|
model: "qwen3.5:latest",
|
||||||
provider: "ollama",
|
provider: "ollama",
|
||||||
messages: [{
|
messages: [{
|
||||||
role: "user",
|
role: "user",
|
||||||
@ -769,7 +771,7 @@ async function tailOverseerCorrections(): Promise<number> {
|
|||||||
try { row = JSON.parse(line); } catch { continue; }
|
try { row = JSON.parse(line); } catch { continue; }
|
||||||
const op: ObservedOp = {
|
const op: ObservedOp = {
|
||||||
timestamp: row.created_at ?? new Date().toISOString(),
|
timestamp: row.created_at ?? new Date().toISOString(),
|
||||||
endpoint: `overseer:${row.model ?? "gpt-oss:120b"}`,
|
endpoint: `overseer:${row.model ?? "claude-opus-4-7"}`,
|
||||||
input_summary: `${row.task_class ?? "?"}: ${row.reason ?? "escalation"}`,
|
input_summary: `${row.task_class ?? "?"}: ${row.reason ?? "escalation"}`,
|
||||||
// Correction itself is neither success nor failure — it's a
|
// Correction itself is neither success nor failure — it's a
|
||||||
// mitigation attempt. We mark success=true so analyzeErrors
|
// mitigation attempt. We mark success=true so analyzeErrors
|
||||||
|
|||||||
@ -1143,9 +1143,15 @@ Format each as a code-fenced block with the byte offset within the shard:
|
|||||||
EXACT LINE OF SOURCE — DO NOT PARAPHRASE, DO NOT TRUNCATE
|
EXACT LINE OF SOURCE — DO NOT PARAPHRASE, DO NOT TRUNCATE
|
||||||
\`\`\`
|
\`\`\`
|
||||||
Pick the most reviewer-relevant lines: route definitions (e.g. \`@app.route(...)\`), function signatures, security-sensitive calls (auth/SQL/exec/template/secrets), hardcoded credentials/defaults, exception handlers, sensitive imports. The reviewer will REFUSE to act on any claim not backed by a verbatim anchor — so anchors are how you prove findings are real.`;
|
Pick the most reviewer-relevant lines: route definitions (e.g. \`@app.route(...)\`), function signatures, security-sensitive calls (auth/SQL/exec/template/secrets), hardcoded credentials/defaults, exception handlers, sensitive imports. The reviewer will REFUSE to act on any claim not backed by a verbatim anchor — so anchors are how you prove findings are real.`;
|
||||||
|
// 2026-04-28: gpt-oss:120b → gemini-3-flash-preview via Ollama
|
||||||
|
// Pro. Tree-split MAP fires once per shard (potentially 5-20×
|
||||||
|
// per file), so latency dominates total scrum time. Gemini 3
|
||||||
|
// flash returns shard digests substantially faster than the old
|
||||||
|
// 120B free model while staying strong enough for byte-anchored
|
||||||
|
// extraction.
|
||||||
const r = await chat({
|
const r = await chat({
|
||||||
provider: "ollama_cloud",
|
provider: "ollama_cloud",
|
||||||
model: "gpt-oss:120b",
|
model: "gemini-3-flash-preview",
|
||||||
prompt,
|
prompt,
|
||||||
max_tokens: 900,
|
max_tokens: 900,
|
||||||
});
|
});
|
||||||
@ -1195,9 +1201,14 @@ COPY EVERY anchor block from the piece notes IN ORDER, character-perfect. DO NOT
|
|||||||
|
|
||||||
Output the anchor blocks under their original \`\`\`@offset...\`\`\` fences, each on its own with a blank line between. The reviewer rejects findings that don't quote a string from this anchors block, so completeness here directly determines review quality.`;
|
Output the anchor blocks under their original \`\`\`@offset...\`\`\` fences, each on its own with a blank line between. The reviewer rejects findings that don't quote a string from this anchors block, so completeness here directly determines review quality.`;
|
||||||
|
|
||||||
|
// 2026-04-28: gpt-oss:120b → gemini-3-flash-preview via Ollama
|
||||||
|
// Pro. The reducer runs once per file (vs once per shard for MAP)
|
||||||
|
// but on a much larger context (all shard digests stacked), so
|
||||||
|
// throughput per token still matters. Same model as MAP for
|
||||||
|
// consistency in tree-split outputs.
|
||||||
const reduced = await chat({
|
const reduced = await chat({
|
||||||
provider: "ollama_cloud",
|
provider: "ollama_cloud",
|
||||||
model: "gpt-oss:120b",
|
model: "gemini-3-flash-preview",
|
||||||
prompt: reducePrompt,
|
prompt: reducePrompt,
|
||||||
max_tokens: 2400,
|
max_tokens: 2400,
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user