Compare commits
2 Commits
7bb66f08c3
...
e9d17f7d5a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e9d17f7d5a | ||
|
|
ac7c996596 |
@ -35,7 +35,9 @@ default_model = "deepseek-v3.2"
|
|||||||
# includes deepseek-v3.2, deepseek-v4-{flash,pro}, gemini-3-flash-
|
# includes deepseek-v3.2, deepseek-v4-{flash,pro}, gemini-3-flash-
|
||||||
# preview, glm-{5,5.1}, kimi-k2.6, qwen3-coder-next.
|
# preview, glm-{5,5.1}, kimi-k2.6, qwen3-coder-next.
|
||||||
# 2026-04-28: default upgraded gpt-oss:120b → deepseek-v3.2 (newest
|
# 2026-04-28: default upgraded gpt-oss:120b → deepseek-v3.2 (newest
|
||||||
# DeepSeek revision; kimi-k2:1t still upstream-broken with HTTP 500).
|
# DeepSeek revision). NOTE: kimi-k2:1t is upstream-broken (HTTP 500
|
||||||
|
# on Ollama Pro probe 2026-04-28) — do not route to it. Use kimi-k2.6
|
||||||
|
# instead, which is what staffing_inference points at.
|
||||||
|
|
||||||
[[provider]]
|
[[provider]]
|
||||||
name = "openrouter"
|
name = "openrouter"
|
||||||
@ -79,8 +81,10 @@ auth_env = "KIMI_API_KEY"
|
|||||||
default_model = "kimi-for-coding"
|
default_model = "kimi-for-coding"
|
||||||
# Direct Kimi For Coding provider. `api.kimi.com` is a SEPARATE account
|
# Direct Kimi For Coding provider. `api.kimi.com` is a SEPARATE account
|
||||||
# system from `api.moonshot.ai` and `api.moonshot.cn` — keys are NOT
|
# system from `api.moonshot.ai` and `api.moonshot.cn` — keys are NOT
|
||||||
# interchangeable. Used when Ollama Cloud's `kimi-k2:1t` is upstream-
|
# interchangeable. Used as a fallback when Ollama Cloud's kimi-k2.6 is
|
||||||
# broken and OpenRouter's `moonshotai/kimi-k2.6` is rate-limited.
|
# unavailable and OpenRouter's `moonshotai/kimi-k2.6` is rate-limited.
|
||||||
|
# (Was `kimi-k2:1t` here pre-2026-05-03 — that model is upstream-broken
|
||||||
|
# and removed from operator guidance.)
|
||||||
# Model id: `kimi-for-coding` (kimi-k2.6 underneath).
|
# Model id: `kimi-for-coding` (kimi-k2.6 underneath).
|
||||||
# Key file: /etc/lakehouse/kimi.env (loaded via systemd EnvironmentFile).
|
# Key file: /etc/lakehouse/kimi.env (loaded via systemd EnvironmentFile).
|
||||||
# Model-prefix routing: "kimi/<model>" auto-routes here, prefix stripped.
|
# Model-prefix routing: "kimi/<model>" auto-routes here, prefix stripped.
|
||||||
|
|||||||
@ -620,13 +620,23 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
fn temp_path(label: &str) -> String {
|
fn temp_path(label: &str) -> String {
|
||||||
let n = std::time::SystemTime::now()
|
// Per-process atomic counter — guarantees uniqueness regardless
|
||||||
|
// of clock resolution or test scheduling. Combined with pid, the
|
||||||
|
// result is unique within and across processes for any practical
|
||||||
|
// test workload. Nanosecond timestamps were not enough on their
|
||||||
|
// own: opus WARN at lib.rs:622 from the 2026-05-02 scrum noted
|
||||||
|
// that under tokio scheduling, multiple tests in the same cargo
|
||||||
|
// process can hit the same nanos bucket.
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
static COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||||
|
let seq = COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||||
|
let pid = std::process::id();
|
||||||
|
let nanos = std::time::SystemTime::now()
|
||||||
.duration_since(std::time::UNIX_EPOCH)
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
.map(|d| d.subsec_nanos())
|
.map(|d| d.subsec_nanos())
|
||||||
.unwrap_or(0);
|
.unwrap_or(0);
|
||||||
let pid = std::process::id();
|
|
||||||
std::env::temp_dir()
|
std::env::temp_dir()
|
||||||
.join(format!("vlance_test_{label}_{pid}_{n}"))
|
.join(format!("vlance_test_{label}_{pid}_{nanos}_{seq}"))
|
||||||
.to_string_lossy()
|
.to_string_lossy()
|
||||||
.to_string()
|
.to_string()
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1932,18 +1932,18 @@ fn default_subvectors() -> u32 { 48 } // 768/48 = 16 dims per subvector
|
|||||||
/// search returned 500 + leaked the lakehouse data path AND the
|
/// search returned 500 + leaked the lakehouse data path AND the
|
||||||
/// .cargo/registry path with crate versions.
|
/// .cargo/registry path with crate versions.
|
||||||
fn sanitize_lance_err(err: String, index_name: &str) -> (StatusCode, String) {
|
fn sanitize_lance_err(err: String, index_name: &str) -> (StatusCode, String) {
|
||||||
// 404 detection — narrowed per the 2026-05-02 scrum (opus WARN at
|
// 404 detection — narrowed across two 2026-05-02→03 scrum waves.
|
||||||
// service.rs:1908). The previous `lower.contains("not found")` heuristic
|
// First wave (opus WARN service.rs:1908): the original `lower.contains
|
||||||
// was too broad: Lance/Arrow surface many non-dataset-missing errors
|
// ("not found")` was too broad — caught "column not found" /
|
||||||
// containing "not found" (e.g. "column not found", "field not found in
|
// "field not found in schema" which are real 500s. Second wave (opus
|
||||||
// schema"). Those are real 500s, not lookup misses. Match dataset-shape
|
// WARN service.rs:1949): the looser `mentions_path_missing` branch I
|
||||||
// phrasing — Lance's actual format is "Dataset at path X was not found"
|
// added would 404 on a registry-file error like "/root/.cargo/.../x.rs:
|
||||||
// or "no such file or directory". Excludes "column not found" and
|
// no such file or directory" because it triggers without dataset
|
||||||
// "field not found in schema" which are valid 500s.
|
// context. Drop the standalone path-missing branch; require dataset
|
||||||
|
// context AND a missing-shape phrase. Lance's actual error format
|
||||||
|
// ("Dataset at path X was not found") satisfies this.
|
||||||
let lower = err.to_lowercase();
|
let lower = err.to_lowercase();
|
||||||
let mentions_dataset = lower.contains("dataset");
|
let mentions_dataset = lower.contains("dataset");
|
||||||
let mentions_path_missing = lower.contains("no such file or directory")
|
|
||||||
|| lower.contains("does not exist");
|
|
||||||
let lance_dataset_missing = mentions_dataset && (
|
let lance_dataset_missing = mentions_dataset && (
|
||||||
lower.contains("not found") || lower.contains("does not exist")
|
lower.contains("not found") || lower.contains("does not exist")
|
||||||
);
|
);
|
||||||
@ -1951,7 +1951,7 @@ fn sanitize_lance_err(err: String, index_name: &str) -> (StatusCode, String) {
|
|||||||
let column_or_field = lower.contains("column not found")
|
let column_or_field = lower.contains("column not found")
|
||||||
|| lower.contains("field not found")
|
|| lower.contains("field not found")
|
||||||
|| lower.contains("schema not found");
|
|| lower.contains("schema not found");
|
||||||
let is_not_found = (lance_dataset_missing || mentions_path_missing) && !column_or_field;
|
let is_not_found = lance_dataset_missing && !column_or_field;
|
||||||
if is_not_found {
|
if is_not_found {
|
||||||
return (StatusCode::NOT_FOUND, format!("lance dataset not found: {index_name}"));
|
return (StatusCode::NOT_FOUND, format!("lance dataset not found: {index_name}"));
|
||||||
}
|
}
|
||||||
@ -2009,9 +2009,15 @@ fn redact_paths(s: &str) -> String {
|
|||||||
let b = bytes[i - 1];
|
let b = bytes[i - 1];
|
||||||
!(b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'-')
|
!(b.is_ascii_alphanumeric() || b == b'_' || b == b'.' || b == b'-')
|
||||||
}
|
}
|
||||||
|
// Walk by byte index but slice the original &str when emitting, never
|
||||||
|
// cast bytes to char (that would corrupt multi-byte UTF-8 — opus WARN
|
||||||
|
// at service.rs:2018 from the 2026-05-03 re-scrum). Path prefixes are
|
||||||
|
// pure ASCII so byte-level matching is sound; what matters is that
|
||||||
|
// we emit non-matched stretches as &str slices, not byte-by-byte.
|
||||||
let bytes = s.as_bytes();
|
let bytes = s.as_bytes();
|
||||||
let mut out = String::with_capacity(s.len());
|
let mut out = String::with_capacity(s.len());
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
|
let mut copy_start = 0usize; // start of an in-progress unmatched run
|
||||||
while i < bytes.len() {
|
while i < bytes.len() {
|
||||||
let mut matched_len: Option<usize> = None;
|
let mut matched_len: Option<usize> = None;
|
||||||
// Try absolute prefixes first (always allowed).
|
// Try absolute prefixes first (always allowed).
|
||||||
@ -2036,20 +2042,42 @@ fn redact_paths(s: &str) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let Some(prefix_len) = matched_len {
|
if let Some(prefix_len) = matched_len {
|
||||||
|
// Flush any pending unmatched run as a UTF-8-safe slice.
|
||||||
|
if copy_start < i {
|
||||||
|
out.push_str(&s[copy_start..i]);
|
||||||
|
}
|
||||||
out.push_str("[REDACTED]");
|
out.push_str("[REDACTED]");
|
||||||
|
// Skip past the prefix and the path body (until terminator).
|
||||||
let mut j = i + prefix_len;
|
let mut j = i + prefix_len;
|
||||||
while j < bytes.len() && !is_path_term(bytes[j]) {
|
while j < bytes.len() && !is_path_term(bytes[j]) {
|
||||||
j += 1;
|
j += 1;
|
||||||
}
|
}
|
||||||
i = j;
|
i = j;
|
||||||
|
copy_start = i;
|
||||||
} else {
|
} else {
|
||||||
out.push(bytes[i] as char);
|
// Advance one CHAR (not one byte) so multi-byte UTF-8 sequences
|
||||||
i += 1;
|
// stay intact in the eventual slice. Look up the next char
|
||||||
|
// boundary using the public API.
|
||||||
|
i += utf8_char_len(bytes, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if copy_start < bytes.len() {
|
||||||
|
out.push_str(&s[copy_start..]);
|
||||||
|
}
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Length in bytes of the UTF-8 character starting at byte `i`. Bytes are
|
||||||
|
/// guaranteed to be a valid UTF-8 sequence start (callers ensure that).
|
||||||
|
fn utf8_char_len(bytes: &[u8], i: usize) -> usize {
|
||||||
|
let b = bytes[i];
|
||||||
|
if b < 0x80 { 1 }
|
||||||
|
else if b < 0xC0 { 1 } // continuation byte — defensive, shouldn't start here
|
||||||
|
else if b < 0xE0 { 2 }
|
||||||
|
else if b < 0xF0 { 3 }
|
||||||
|
else { 4 }
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod sanitize_tests {
|
mod sanitize_tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -2147,6 +2175,40 @@ mod sanitize_tests {
|
|||||||
assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR, "{err}");
|
assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR, "{err}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn is_not_found_does_not_match_unrelated_path_missing() {
|
||||||
|
// Regression: opus WARN at service.rs:1949 from the 2026-05-03
|
||||||
|
// re-scrum. A registry-file error from inside a Lance internal
|
||||||
|
// module should NOT be coerced to 404 just because it contains
|
||||||
|
// "no such file or directory" — it's a real 500.
|
||||||
|
let (status, _) = sanitize_lance_err(
|
||||||
|
"/root/.cargo/registry/src/index.crates.io-foo/lance-table-4.0.0/src/io/commit.rs: no such file or directory".into(),
|
||||||
|
"test_idx",
|
||||||
|
);
|
||||||
|
assert_eq!(status, StatusCode::INTERNAL_SERVER_ERROR);
|
||||||
|
// (And the path is still redacted in the message.)
|
||||||
|
let (_, msg) = sanitize_lance_err(
|
||||||
|
"/root/.cargo/registry/src/lance-foo/x.rs: no such file or directory".into(),
|
||||||
|
"test_idx",
|
||||||
|
);
|
||||||
|
assert!(!msg.contains("/root/.cargo"), "path leak: {msg}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn redact_preserves_multibyte_utf8() {
|
||||||
|
// Regression: opus WARN at service.rs:2018 from the 2026-05-03
|
||||||
|
// re-scrum. Old impl did `out.push(bytes[i] as char)` which
|
||||||
|
// corrupted multi-byte UTF-8 (e.g. a path containing user-supplied
|
||||||
|
// names with non-ASCII characters) into Latin-1 mojibake.
|
||||||
|
let input = "Failed to open /home/profit/工作/data — café not found";
|
||||||
|
let out = redact_paths(input);
|
||||||
|
// The path is redacted...
|
||||||
|
assert!(!out.contains("/home/profit"), "path leak: {out}");
|
||||||
|
// ...AND the multi-byte characters elsewhere are preserved verbatim.
|
||||||
|
assert!(out.contains("café"), "lost UTF-8: {out}");
|
||||||
|
assert!(out.contains("not found"), "lost trailing context: {out}");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build the IVF_PQ index on the Lance dataset.
|
/// Build the IVF_PQ index on the Lance dataset.
|
||||||
|
|||||||
@ -33,9 +33,18 @@ PROBE "gateway /v1/health responds" \
|
|||||||
bash -c "curl -sf -m 3 $GATEWAY/v1/health -o /dev/null"
|
bash -c "curl -sf -m 3 $GATEWAY/v1/health -o /dev/null"
|
||||||
|
|
||||||
# ── 1. Search returns IVF_PQ results on existing dataset ────────
|
# ── 1. Search returns IVF_PQ results on existing dataset ────────
|
||||||
|
# Capture curl status separately so a transport-level failure (gateway
|
||||||
|
# down, network broken, timeout) shows up as its own probe — instead of
|
||||||
|
# being swallowed by `|| echo '{}'` which would surface as the next jq
|
||||||
|
# probe failing with a misleading "no method field" message. Per opus
|
||||||
|
# INFO at lance_smoke.sh:38 from the 2026-05-02 scrum.
|
||||||
RESP=$(curl -sS -m 30 -X POST "$PREFIX/search/$DATASET" \
|
RESP=$(curl -sS -m 30 -X POST "$PREFIX/search/$DATASET" \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{"query":"forklift operator","top_k":3}' 2>/dev/null || echo '{}')
|
-d '{"query":"forklift operator","top_k":3}' 2>/dev/null)
|
||||||
|
CURL_RC=$?
|
||||||
|
PROBE "search/$DATASET curl reachable (exit 0)" \
|
||||||
|
test "$CURL_RC" = "0"
|
||||||
|
[ "$CURL_RC" != "0" ] && RESP='{}'
|
||||||
PROBE "search/$DATASET returns top-3 lance_ivf_pq results" \
|
PROBE "search/$DATASET returns top-3 lance_ivf_pq results" \
|
||||||
bash -c "echo '$RESP' | jq -e '.method == \"lance_ivf_pq\" and (.results | length) == 3' >/dev/null"
|
bash -c "echo '$RESP' | jq -e '.method == \"lance_ivf_pq\" and (.results | length) == 3' >/dev/null"
|
||||||
|
|
||||||
|
|||||||
@ -37,6 +37,13 @@ const MAX_ATTEMPTS = 9;
|
|||||||
// crates/<crate>/src/*.rs.
|
// crates/<crate>/src/*.rs.
|
||||||
const FILE_TREE_SPLIT_THRESHOLD = Number(process.env.LH_SCRUM_TREE_SPLIT_THRESHOLD ?? 6000);
|
const FILE_TREE_SPLIT_THRESHOLD = Number(process.env.LH_SCRUM_TREE_SPLIT_THRESHOLD ?? 6000);
|
||||||
const FILE_SHARD_SIZE = Number(process.env.LH_SCRUM_SHARD_SIZE ?? 3500);
|
const FILE_SHARD_SIZE = Number(process.env.LH_SCRUM_SHARD_SIZE ?? 3500);
|
||||||
|
|
||||||
|
// Centralized to keep MAP and REDUCE phases in lockstep — diverging the two
|
||||||
|
// breaks tree-split consistency (per-shard digests must come from the same
|
||||||
|
// model the reducer collapses). Surfaced 2026-05-02 by kimi scrum WARN at
|
||||||
|
// scrum_master_pipeline.ts:1143.
|
||||||
|
const TREE_SPLIT_MODEL = "gemini-3-flash-preview";
|
||||||
|
const TREE_SPLIT_PROVIDER = "ollama_cloud";
|
||||||
// Same-model retry budget after observer rejection. After this many
|
// Same-model retry budget after observer rejection. After this many
|
||||||
// quality rejects on the current model, advance to the next provider-
|
// quality rejects on the current model, advance to the next provider-
|
||||||
// error fallback. Counts ONLY observer/quality rejects, not provider
|
// error fallback. Counts ONLY observer/quality rejects, not provider
|
||||||
@ -1143,15 +1150,15 @@ Format each as a code-fenced block with the byte offset within the shard:
|
|||||||
EXACT LINE OF SOURCE — DO NOT PARAPHRASE, DO NOT TRUNCATE
|
EXACT LINE OF SOURCE — DO NOT PARAPHRASE, DO NOT TRUNCATE
|
||||||
\`\`\`
|
\`\`\`
|
||||||
Pick the most reviewer-relevant lines: route definitions (e.g. \`@app.route(...)\`), function signatures, security-sensitive calls (auth/SQL/exec/template/secrets), hardcoded credentials/defaults, exception handlers, sensitive imports. The reviewer will REFUSE to act on any claim not backed by a verbatim anchor — so anchors are how you prove findings are real.`;
|
Pick the most reviewer-relevant lines: route definitions (e.g. \`@app.route(...)\`), function signatures, security-sensitive calls (auth/SQL/exec/template/secrets), hardcoded credentials/defaults, exception handlers, sensitive imports. The reviewer will REFUSE to act on any claim not backed by a verbatim anchor — so anchors are how you prove findings are real.`;
|
||||||
// 2026-04-28: gpt-oss:120b → gemini-3-flash-preview via Ollama
|
// 2026-04-28: gpt-oss:120b → TREE_SPLIT_MODEL via Ollama Pro.
|
||||||
// Pro. Tree-split MAP fires once per shard (potentially 5-20×
|
// Tree-split MAP fires once per shard (potentially 5-20× per
|
||||||
// per file), so latency dominates total scrum time. Gemini 3
|
// file), so latency dominates total scrum time. Gemini 3 flash
|
||||||
// flash returns shard digests substantially faster than the old
|
// returns shard digests substantially faster than the old 120B
|
||||||
// 120B free model while staying strong enough for byte-anchored
|
// free model while staying strong enough for byte-anchored
|
||||||
// extraction.
|
// extraction.
|
||||||
const r = await chat({
|
const r = await chat({
|
||||||
provider: "ollama_cloud",
|
provider: TREE_SPLIT_PROVIDER,
|
||||||
model: "gemini-3-flash-preview",
|
model: TREE_SPLIT_MODEL,
|
||||||
prompt,
|
prompt,
|
||||||
max_tokens: 900,
|
max_tokens: 900,
|
||||||
});
|
});
|
||||||
@ -1201,14 +1208,14 @@ COPY EVERY anchor block from the piece notes IN ORDER, character-perfect. DO NOT
|
|||||||
|
|
||||||
Output the anchor blocks under their original \`\`\`@offset...\`\`\` fences, each on its own with a blank line between. The reviewer rejects findings that don't quote a string from this anchors block, so completeness here directly determines review quality.`;
|
Output the anchor blocks under their original \`\`\`@offset...\`\`\` fences, each on its own with a blank line between. The reviewer rejects findings that don't quote a string from this anchors block, so completeness here directly determines review quality.`;
|
||||||
|
|
||||||
// 2026-04-28: gpt-oss:120b → gemini-3-flash-preview via Ollama
|
// 2026-04-28: gpt-oss:120b → TREE_SPLIT_MODEL via Ollama Pro. The
|
||||||
// Pro. The reducer runs once per file (vs once per shard for MAP)
|
// reducer runs once per file (vs once per shard for MAP) but on a
|
||||||
// but on a much larger context (all shard digests stacked), so
|
// much larger context (all shard digests stacked), so throughput
|
||||||
// throughput per token still matters. Same model as MAP for
|
// per token still matters. Must match MAP model exactly — diverging
|
||||||
// consistency in tree-split outputs.
|
// the two breaks tree-split coherence.
|
||||||
const reduced = await chat({
|
const reduced = await chat({
|
||||||
provider: "ollama_cloud",
|
provider: TREE_SPLIT_PROVIDER,
|
||||||
model: "gemini-3-flash-preview",
|
model: TREE_SPLIT_MODEL,
|
||||||
prompt: reducePrompt,
|
prompt: reducePrompt,
|
||||||
max_tokens: 2400,
|
max_tokens: 2400,
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user