lakehouse/config/modes.toml
root d054c0b8b1 REVERT cloud routing on hot path — back to local Ollama per PRD line 70
PRD line 70: "Everything runs locally — no cloud APIs, total data privacy."
Yesterday's PR #13 (feb638e) violated this by routing customer-facing
inference paths to opencode + ollama_cloud + openrouter. Reverting the
hot-path routes only; cloud providers stay configured in providers.toml
for explicit dev-tool opt-in.

Reverted:
- modes.toml staffing_inference: kimi-k2.6 → qwen3.5:latest (local Ollama)
- modes.toml doc_drift_check: gemini-3-flash-preview → qwen3.5:latest
- execution_loop overseer: opencode/claude-opus-4-7 → ollama/qwen3.5:latest
  Was a paid Anthropic call on every overseer escalation; now local + free.

Gateway compiles + restarts clean. Lance smoke 10/10. Live providers list
unchanged (kimi/ollama_cloud/opencode/openrouter all still CONFIGURED;
they just aren't ROUTED to from the staffing inference path anymore).

This stops the API meter on customer requests. Cloud providers remain
opt-in via explicit provider= caller hint, which the scrum tool +
auditor pipeline + bot/propose use deliberately.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 01:57:20 -05:00

90 lines
3.8 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Mode router config — task_class → mode mapping
#
# `preferred_mode` is the first choice for a task class; `fallback_modes`
# get tried in order if the preferred one isn't available (LLM Team can
# return Unknown mode for some, OR the matrix has stronger signal for a
# fallback). `default_model` seeds the mode runner's model field if the
# caller doesn't override.
#
# Modes are dispatched against LLM Team UI (localhost:5000/api/run) for
# now; future Rust-native runners will short-circuit before the proxy.
# See crates/gateway/src/v1/mode.rs for the dispatch path.
[[task_class]]
name = "scrum_review"
# 2026-04-26 pass5 variance test (5 reps × 4 conditions, grok-4.1-fast,
# pathway_memory.rs): composed corpus LOST 5/5 vs isolation (Δ 1.8
# grounded findings, p=0.031). See docs/MODE_RUNNER_TUNING_PLAN.md.
# Default is now isolation — bug fingerprints + adversarial framing +
# file content carries strong models without matrix noise. The
# `codereview_lakehouse` matrix path remains available via force_mode
# (auto-downgrades to isolation on strong models — see the
# is_strong_model gate in crates/gateway/src/v1/mode.rs).
preferred_mode = "codereview_isolation"
fallback_modes = ["codereview_lakehouse", "codereview", "consensus", "ladder"]
default_model = "qwen3-coder:480b"
# Corpora kept defined so experimental modes (codereview_matrix_only,
# pass2/pass5 sweeps) and weak-model rescue rungs can still pull them.
# scrum_findings_v1 is built but EXCLUDED — bake-off showed 24% OOB
# line citations from cross-file drift, only safe with same-file gating.
matrix_corpus = ["lakehouse_arch_v1", "lakehouse_symbols_v1"]
[[task_class]]
name = "contract_analysis"
preferred_mode = "deep_analysis"
fallback_modes = ["research", "extract"]
default_model = "kimi-k2:1t"
matrix_corpus = "chicago_permits_v1"
[[task_class]]
name = "staffing_inference"
# Staffing-domain native enrichment runner — Pass 4 (2026-04-26).
# Same composer architecture as codereview_lakehouse but with staffing
# framing + workers corpus.
preferred_mode = "staffing_inference_lakehouse"
fallback_modes = ["ladder", "consensus", "pipeline"]
# 2026-05-03: REVERTED to local. PRD line 70 — everything runs locally,
# no cloud APIs on the customer hot path. Cloud models stay available
# in providers.toml for explicit dev-tool opt-in (scrum, auditor).
default_model = "qwen3.5:latest"
matrix_corpus = "workers_500k_v8"
[[task_class]]
name = "fact_extract"
preferred_mode = "extract"
fallback_modes = ["distill"]
default_model = "qwen2.5"
matrix_corpus = "kb_team_runs_v1"
[[task_class]]
name = "doc_drift_check"
preferred_mode = "drift"
fallback_modes = ["validator"]
# 2026-05-03: REVERTED to local per PRD line 70.
default_model = "qwen3.5:latest"
matrix_corpus = "distilled_factual_v20260423095819"
[[task_class]]
name = "pr_audit"
# Auditor's claim-vs-diff verification mode (2026-04-26 rebuild).
# Replaces the auditor's hand-rolled inference check with the mode-runner
# composer: pathway memory (PR-level patterns) + lakehouse_answers_v1
# corpus (prior accepted reviews + observer escalations) + adversarial
# JSON-shaped framing. Default model is paid Ollama Cloud kimi-k2:1t for
# strong claim-grounding; tie-breaker via auditor-side env override.
preferred_mode = "pr_audit"
fallback_modes = ["consensus", "ladder"]
# kimi-k2:1t broken upstream 2026-04-27 (Ollama Cloud 500 ISE, multi-hour
# sustained outage verified by repeated probes). deepseek-v3.1:671b is
# the drop-in substitute — proven working end-to-end through pr_audit
# during Phase 5 distillation acceptance testing.
default_model = "deepseek-v3.1:671b"
matrix_corpus = "lakehouse_answers_v1"
# Fallback when task_class isn't in the table — useful for ad-hoc calls
# during development that don't yet have a mapped mode.
[default]
preferred_mode = "pipeline"
fallback_modes = ["consensus", "ladder"]
default_model = "qwen3.5:latest"