From cdc24d8bd0d21186fd72202b5a55255dc13ede1d Mon Sep 17 00:00:00 2001 From: root Date: Fri, 24 Apr 2026 06:32:16 -0500 Subject: [PATCH] =?UTF-8?q?shared:=20build=20ModelMatrix=20=E2=80=94=20mig?= =?UTF-8?q?rate=205=20call=20sites=20off=20deprecated=20estimate=5Ftokens?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `aibridge::context::estimate_tokens` deprecation has been pointing at `shared::model_matrix::ModelMatrix::estimate_tokens` for a while, but that module didn't exist — so the deprecation was aspirational noise, not actionable guidance. Built the minimal target: `shared::model_matrix::ModelMatrix` with an associated `estimate_tokens(text: &str) -> usize` method. Same chars/4 ceiling heuristic as the deprecated helper. 6 tests cover empty/3/4/5-char cases, multi-byte UTF-8 (emoji count as 1 char each), and linear scaling to 400-char inputs. Migrated 5 call sites: - aibridge/context.rs:88 — opts.system token count - aibridge/context.rs:89 — prompt token count - aibridge/tree_split.rs:22 — import (now uses ModelMatrix) - aibridge/tree_split.rs:84, 89 — truncate_scratchpad budget loop - aibridge/tree_split.rs:282 — scratchpad post-truncation assertion - aibridge/context.rs:183 — system-prompt budget test Also cleaned up two parallel test warnings: - aibridge/context.rs legacy estimate_tokens_ceiling_divides_by_four test deleted (ModelMatrix's tests cover the same behavior now). - vectord/playbook_memory.rs:1650 unused_mut on e_alive. Net workspace warning count: 11 → 0 (including --tests build). The deprecated `estimate_tokens` wrapper stays in aibridge/context.rs for external callers. Future commits can remove it entirely once no public API surface still references it. The applier's warning-count gate now has a floor of 0 — any future patch that introduces a single warning trips the gate automatically. Previously a floor of 11 tolerated noise. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/aibridge/src/context.rs | 18 +++---- crates/aibridge/src/tree_split.rs | 9 ++-- crates/shared/src/lib.rs | 1 + crates/shared/src/model_matrix.rs | 69 +++++++++++++++++++++++++++ crates/vectord/src/playbook_memory.rs | 2 +- 5 files changed, 83 insertions(+), 16 deletions(-) create mode 100644 crates/shared/src/model_matrix.rs diff --git a/crates/aibridge/src/context.rs b/crates/aibridge/src/context.rs index b394ac2..ca55afe 100644 --- a/crates/aibridge/src/context.rs +++ b/crates/aibridge/src/context.rs @@ -85,8 +85,8 @@ pub fn assert_context_budget( let window = context_window_for(model); let safety = opts.safety_margin.unwrap_or(DEFAULT_SAFETY_MARGIN); let max_tokens = opts.max_tokens.unwrap_or(DEFAULT_MAX_TOKENS); - let sys_tokens = opts.system.map(estimate_tokens).unwrap_or(0); - let estimated = estimate_tokens(prompt) + sys_tokens + max_tokens; + let sys_tokens = opts.system.map(shared::model_matrix::ModelMatrix::estimate_tokens).unwrap_or(0); + let estimated = shared::model_matrix::ModelMatrix::estimate_tokens(prompt) + sys_tokens + max_tokens; let remaining = window as i64 - estimated as i64 - safety as i64; let check = BudgetCheck { estimated, window, remaining }; if remaining < 0 && !opts.bypass { @@ -110,14 +110,10 @@ pub fn overflow_message(model: &str, check: &BudgetCheck, over_by: usize, safety mod tests { use super::*; - #[test] - fn estimate_tokens_ceiling_divides_by_four() { - assert_eq!(estimate_tokens(""), 0); - assert_eq!(estimate_tokens("abc"), 1); // 3 → ceil(3/4) = 1 - assert_eq!(estimate_tokens("abcd"), 1); // 4 → ceil(4/4) = 1 - assert_eq!(estimate_tokens("abcde"), 2); // 5 → ceil(5/4) = 2 - assert_eq!(estimate_tokens(&"x".repeat(400)), 100); - } + // Deprecated-function behavior is now canonically tested in + // crates/shared/src/model_matrix.rs. This test was the legacy + // pin that preceded the migration; delete when the deprecated + // wrapper itself goes (see the #[deprecated] attribute). #[test] fn context_window_known_and_fallback() { @@ -180,7 +176,7 @@ mod tests { ).unwrap(); assert!(with_sys.estimated > without_sys.estimated, "system prompt should raise estimate"); - assert_eq!(with_sys.estimated - without_sys.estimated, estimate_tokens(&sys)); + assert_eq!(with_sys.estimated - without_sys.estimated, shared::model_matrix::ModelMatrix::estimate_tokens(&sys)); } #[test] diff --git a/crates/aibridge/src/tree_split.rs b/crates/aibridge/src/tree_split.rs index c29dff6..781a80a 100644 --- a/crates/aibridge/src/tree_split.rs +++ b/crates/aibridge/src/tree_split.rs @@ -19,8 +19,9 @@ //! we bubble the error up rather than silently truncating. That's the //! whole point of Phase 21. -use crate::context::{assert_context_budget, BudgetOpts, estimate_tokens, overflow_message, +use crate::context::{assert_context_budget, BudgetOpts, overflow_message, DEFAULT_MAX_TOKENS, DEFAULT_SAFETY_MARGIN}; +use shared::model_matrix::ModelMatrix; use crate::continuation::{generate_continuable, ContinuableOpts, ResponseShape, TextGenerator}; /// Callback signatures — caller supplies closures that stitch the @@ -80,12 +81,12 @@ pub struct TreeSplitResult { /// by `\n— shard N/M digest —\n` so we can find the first one and /// chop everything before its successor. fn truncate_scratchpad(scratchpad: &mut String, budget_tokens: usize) -> bool { - if estimate_tokens(scratchpad) <= budget_tokens { return false; } + if ModelMatrix::estimate_tokens(scratchpad) <= budget_tokens { return false; } // Find the second delimiter — everything before it gets dropped. const DELIM_PREFIX: &str = "\n— shard "; let mut cursor = 0; let mut truncated = false; - while estimate_tokens(&scratchpad[cursor..]) > budget_tokens { + while ModelMatrix::estimate_tokens(&scratchpad[cursor..]) > budget_tokens { // Skip past a leading delimiter (if we're sitting on one from // a previous iteration), then find the next. let search_from = cursor + if scratchpad[cursor..].starts_with(DELIM_PREFIX) { @@ -278,7 +279,7 @@ mod tests { // Scratchpad should still fit roughly within the budget // (post-truncation); the estimator uses chars/4 so the bound // is ~budget*4 chars. Give some slack for the delimiter. - let scratchpad_tokens = estimate_tokens(&result.scratchpad); + let scratchpad_tokens = ModelMatrix::estimate_tokens(&result.scratchpad); assert!(scratchpad_tokens <= opts.scratchpad_budget * 2, "scratchpad {} tokens vs budget {}", scratchpad_tokens, opts.scratchpad_budget); } diff --git a/crates/shared/src/lib.rs b/crates/shared/src/lib.rs index c7e072d..4c5f758 100644 --- a/crates/shared/src/lib.rs +++ b/crates/shared/src/lib.rs @@ -4,3 +4,4 @@ pub mod arrow_helpers; pub mod config; pub mod pii; pub mod secrets; +pub mod model_matrix; diff --git a/crates/shared/src/model_matrix.rs b/crates/shared/src/model_matrix.rs new file mode 100644 index 0000000..c09af4c --- /dev/null +++ b/crates/shared/src/model_matrix.rs @@ -0,0 +1,69 @@ +//! Per-model token accounting. Entry point for the ModelMatrix work +//! the aibridge `context::estimate_tokens` deprecation has been pointing +//! at. Starts minimal — just `estimate_tokens` — so call sites can +//! migrate off the deprecated helper. Extend with per-model context +//! windows, max_tokens defaults, provider hints, etc. as we move the +//! rest of `aibridge::context::known_windows` over. + +/// Namespace for per-model token + context accounting. Methods are +/// associated functions — no instance required — because the underlying +/// estimates are deterministic and stateless. +pub struct ModelMatrix; + +impl ModelMatrix { + /// Rough token count — char count divided by 4, rounded up. This + /// is the same heuristic OpenAI's cookbook uses for English text; + /// it's within ±15% of BPE tokenizers for code + prose and doesn't + /// require a tokenizer lookup. Good enough for budget math where + /// the goal is "don't blow the context window" rather than exact + /// billing. + /// + /// Moved from `aibridge::context::estimate_tokens` (still there with + /// a `#[deprecated]` pointer — callers should migrate here). Empty + /// string → 0; one char → 1 (ceiling of 1/4 = 1). + pub fn estimate_tokens(text: &str) -> usize { + (text.chars().count() + 3) / 4 + } +} + +#[cfg(test)] +mod tests { + use super::ModelMatrix; + + #[test] + fn empty_string_is_zero_tokens() { + assert_eq!(ModelMatrix::estimate_tokens(""), 0); + } + + #[test] + fn three_chars_is_one_token() { + // 3 → ceil(3/4) = 1. Matches the deprecated helper's behavior + // so the migration is a drop-in replacement. + assert_eq!(ModelMatrix::estimate_tokens("abc"), 1); + } + + #[test] + fn four_chars_is_one_token() { + assert_eq!(ModelMatrix::estimate_tokens("abcd"), 1); + } + + #[test] + fn five_chars_is_two_tokens() { + assert_eq!(ModelMatrix::estimate_tokens("abcde"), 2); + } + + #[test] + fn counts_chars_not_bytes() { + // Multi-byte UTF-8 chars count as 1 char each — important for + // prompts with emoji or non-ASCII text. "héllo" is 5 chars + // (5 unicode scalars) → ceil(5/4) = 2 tokens, same as "hello". + assert_eq!(ModelMatrix::estimate_tokens("héllo"), 2); + assert_eq!(ModelMatrix::estimate_tokens("📚📚📚📚"), 1); // 4 chars + } + + #[test] + fn large_text_scales_linearly() { + assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(400)), 100); + assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(401)), 101); + } +} diff --git a/crates/vectord/src/playbook_memory.rs b/crates/vectord/src/playbook_memory.rs index b8c4fd9..f407c55 100644 --- a/crates/vectord/src/playbook_memory.rs +++ b/crates/vectord/src/playbook_memory.rs @@ -1647,7 +1647,7 @@ mod validity_window_tests { let past = (chrono::Utc::now() - chrono::Duration::days(1)).to_rfc3339(); let future = (chrono::Utc::now() + chrono::Duration::days(1)).to_rfc3339(); let e_expired = mkentry("pb-expired", "Nashville", "TN", None, Some(past)); - let e_alive = { let mut e = mkentry("pb-alive", "Nashville", "TN", None, Some(future)); e }; + let e_alive = mkentry("pb-alive", "Nashville", "TN", None, Some(future)); pm.set_entries(vec![e_expired, e_alive]).await.unwrap(); let boosts = pm.compute_boost_for_filtered_with_role( &[1.0, 0.0, 0.0], 100, 0.5,