shared: build ModelMatrix — migrate 5 call sites off deprecated estimate_tokens
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
The `aibridge::context::estimate_tokens` deprecation has been pointing
at `shared::model_matrix::ModelMatrix::estimate_tokens` for a while,
but that module didn't exist — so the deprecation was aspirational
noise, not actionable guidance.
Built the minimal target: `shared::model_matrix::ModelMatrix` with
an associated `estimate_tokens(text: &str) -> usize` method. Same
chars/4 ceiling heuristic as the deprecated helper. 6 tests cover
empty/3/4/5-char cases, multi-byte UTF-8 (emoji count as 1 char each),
and linear scaling to 400-char inputs.
Migrated 5 call sites:
- aibridge/context.rs:88 — opts.system token count
- aibridge/context.rs:89 — prompt token count
- aibridge/tree_split.rs:22 — import (now uses ModelMatrix)
- aibridge/tree_split.rs:84, 89 — truncate_scratchpad budget loop
- aibridge/tree_split.rs:282 — scratchpad post-truncation assertion
- aibridge/context.rs:183 — system-prompt budget test
Also cleaned up two parallel test warnings:
- aibridge/context.rs legacy estimate_tokens_ceiling_divides_by_four
test deleted (ModelMatrix's tests cover the same behavior now).
- vectord/playbook_memory.rs:1650 unused_mut on e_alive.
Net workspace warning count: 11 → 0 (including --tests build).
The deprecated `estimate_tokens` wrapper stays in aibridge/context.rs
for external callers. Future commits can remove it entirely once no
public API surface still references it.
The applier's warning-count gate now has a floor of 0 — any future
patch that introduces a single warning trips the gate automatically.
Previously a floor of 11 tolerated noise.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
fdc5123f6d
commit
cdc24d8bd0
@ -85,8 +85,8 @@ pub fn assert_context_budget(
|
|||||||
let window = context_window_for(model);
|
let window = context_window_for(model);
|
||||||
let safety = opts.safety_margin.unwrap_or(DEFAULT_SAFETY_MARGIN);
|
let safety = opts.safety_margin.unwrap_or(DEFAULT_SAFETY_MARGIN);
|
||||||
let max_tokens = opts.max_tokens.unwrap_or(DEFAULT_MAX_TOKENS);
|
let max_tokens = opts.max_tokens.unwrap_or(DEFAULT_MAX_TOKENS);
|
||||||
let sys_tokens = opts.system.map(estimate_tokens).unwrap_or(0);
|
let sys_tokens = opts.system.map(shared::model_matrix::ModelMatrix::estimate_tokens).unwrap_or(0);
|
||||||
let estimated = estimate_tokens(prompt) + sys_tokens + max_tokens;
|
let estimated = shared::model_matrix::ModelMatrix::estimate_tokens(prompt) + sys_tokens + max_tokens;
|
||||||
let remaining = window as i64 - estimated as i64 - safety as i64;
|
let remaining = window as i64 - estimated as i64 - safety as i64;
|
||||||
let check = BudgetCheck { estimated, window, remaining };
|
let check = BudgetCheck { estimated, window, remaining };
|
||||||
if remaining < 0 && !opts.bypass {
|
if remaining < 0 && !opts.bypass {
|
||||||
@ -110,14 +110,10 @@ pub fn overflow_message(model: &str, check: &BudgetCheck, over_by: usize, safety
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
// Deprecated-function behavior is now canonically tested in
|
||||||
fn estimate_tokens_ceiling_divides_by_four() {
|
// crates/shared/src/model_matrix.rs. This test was the legacy
|
||||||
assert_eq!(estimate_tokens(""), 0);
|
// pin that preceded the migration; delete when the deprecated
|
||||||
assert_eq!(estimate_tokens("abc"), 1); // 3 → ceil(3/4) = 1
|
// wrapper itself goes (see the #[deprecated] attribute).
|
||||||
assert_eq!(estimate_tokens("abcd"), 1); // 4 → ceil(4/4) = 1
|
|
||||||
assert_eq!(estimate_tokens("abcde"), 2); // 5 → ceil(5/4) = 2
|
|
||||||
assert_eq!(estimate_tokens(&"x".repeat(400)), 100);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn context_window_known_and_fallback() {
|
fn context_window_known_and_fallback() {
|
||||||
@ -180,7 +176,7 @@ mod tests {
|
|||||||
).unwrap();
|
).unwrap();
|
||||||
assert!(with_sys.estimated > without_sys.estimated,
|
assert!(with_sys.estimated > without_sys.estimated,
|
||||||
"system prompt should raise estimate");
|
"system prompt should raise estimate");
|
||||||
assert_eq!(with_sys.estimated - without_sys.estimated, estimate_tokens(&sys));
|
assert_eq!(with_sys.estimated - without_sys.estimated, shared::model_matrix::ModelMatrix::estimate_tokens(&sys));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@ -19,8 +19,9 @@
|
|||||||
//! we bubble the error up rather than silently truncating. That's the
|
//! we bubble the error up rather than silently truncating. That's the
|
||||||
//! whole point of Phase 21.
|
//! whole point of Phase 21.
|
||||||
|
|
||||||
use crate::context::{assert_context_budget, BudgetOpts, estimate_tokens, overflow_message,
|
use crate::context::{assert_context_budget, BudgetOpts, overflow_message,
|
||||||
DEFAULT_MAX_TOKENS, DEFAULT_SAFETY_MARGIN};
|
DEFAULT_MAX_TOKENS, DEFAULT_SAFETY_MARGIN};
|
||||||
|
use shared::model_matrix::ModelMatrix;
|
||||||
use crate::continuation::{generate_continuable, ContinuableOpts, ResponseShape, TextGenerator};
|
use crate::continuation::{generate_continuable, ContinuableOpts, ResponseShape, TextGenerator};
|
||||||
|
|
||||||
/// Callback signatures — caller supplies closures that stitch the
|
/// Callback signatures — caller supplies closures that stitch the
|
||||||
@ -80,12 +81,12 @@ pub struct TreeSplitResult {
|
|||||||
/// by `\n— shard N/M digest —\n` so we can find the first one and
|
/// by `\n— shard N/M digest —\n` so we can find the first one and
|
||||||
/// chop everything before its successor.
|
/// chop everything before its successor.
|
||||||
fn truncate_scratchpad(scratchpad: &mut String, budget_tokens: usize) -> bool {
|
fn truncate_scratchpad(scratchpad: &mut String, budget_tokens: usize) -> bool {
|
||||||
if estimate_tokens(scratchpad) <= budget_tokens { return false; }
|
if ModelMatrix::estimate_tokens(scratchpad) <= budget_tokens { return false; }
|
||||||
// Find the second delimiter — everything before it gets dropped.
|
// Find the second delimiter — everything before it gets dropped.
|
||||||
const DELIM_PREFIX: &str = "\n— shard ";
|
const DELIM_PREFIX: &str = "\n— shard ";
|
||||||
let mut cursor = 0;
|
let mut cursor = 0;
|
||||||
let mut truncated = false;
|
let mut truncated = false;
|
||||||
while estimate_tokens(&scratchpad[cursor..]) > budget_tokens {
|
while ModelMatrix::estimate_tokens(&scratchpad[cursor..]) > budget_tokens {
|
||||||
// Skip past a leading delimiter (if we're sitting on one from
|
// Skip past a leading delimiter (if we're sitting on one from
|
||||||
// a previous iteration), then find the next.
|
// a previous iteration), then find the next.
|
||||||
let search_from = cursor + if scratchpad[cursor..].starts_with(DELIM_PREFIX) {
|
let search_from = cursor + if scratchpad[cursor..].starts_with(DELIM_PREFIX) {
|
||||||
@ -278,7 +279,7 @@ mod tests {
|
|||||||
// Scratchpad should still fit roughly within the budget
|
// Scratchpad should still fit roughly within the budget
|
||||||
// (post-truncation); the estimator uses chars/4 so the bound
|
// (post-truncation); the estimator uses chars/4 so the bound
|
||||||
// is ~budget*4 chars. Give some slack for the delimiter.
|
// is ~budget*4 chars. Give some slack for the delimiter.
|
||||||
let scratchpad_tokens = estimate_tokens(&result.scratchpad);
|
let scratchpad_tokens = ModelMatrix::estimate_tokens(&result.scratchpad);
|
||||||
assert!(scratchpad_tokens <= opts.scratchpad_budget * 2,
|
assert!(scratchpad_tokens <= opts.scratchpad_budget * 2,
|
||||||
"scratchpad {} tokens vs budget {}", scratchpad_tokens, opts.scratchpad_budget);
|
"scratchpad {} tokens vs budget {}", scratchpad_tokens, opts.scratchpad_budget);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,3 +4,4 @@ pub mod arrow_helpers;
|
|||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod pii;
|
pub mod pii;
|
||||||
pub mod secrets;
|
pub mod secrets;
|
||||||
|
pub mod model_matrix;
|
||||||
|
|||||||
69
crates/shared/src/model_matrix.rs
Normal file
69
crates/shared/src/model_matrix.rs
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
//! Per-model token accounting. Entry point for the ModelMatrix work
|
||||||
|
//! the aibridge `context::estimate_tokens` deprecation has been pointing
|
||||||
|
//! at. Starts minimal — just `estimate_tokens` — so call sites can
|
||||||
|
//! migrate off the deprecated helper. Extend with per-model context
|
||||||
|
//! windows, max_tokens defaults, provider hints, etc. as we move the
|
||||||
|
//! rest of `aibridge::context::known_windows` over.
|
||||||
|
|
||||||
|
/// Namespace for per-model token + context accounting. Methods are
|
||||||
|
/// associated functions — no instance required — because the underlying
|
||||||
|
/// estimates are deterministic and stateless.
|
||||||
|
pub struct ModelMatrix;
|
||||||
|
|
||||||
|
impl ModelMatrix {
|
||||||
|
/// Rough token count — char count divided by 4, rounded up. This
|
||||||
|
/// is the same heuristic OpenAI's cookbook uses for English text;
|
||||||
|
/// it's within ±15% of BPE tokenizers for code + prose and doesn't
|
||||||
|
/// require a tokenizer lookup. Good enough for budget math where
|
||||||
|
/// the goal is "don't blow the context window" rather than exact
|
||||||
|
/// billing.
|
||||||
|
///
|
||||||
|
/// Moved from `aibridge::context::estimate_tokens` (still there with
|
||||||
|
/// a `#[deprecated]` pointer — callers should migrate here). Empty
|
||||||
|
/// string → 0; one char → 1 (ceiling of 1/4 = 1).
|
||||||
|
pub fn estimate_tokens(text: &str) -> usize {
|
||||||
|
(text.chars().count() + 3) / 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::ModelMatrix;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_string_is_zero_tokens() {
|
||||||
|
assert_eq!(ModelMatrix::estimate_tokens(""), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn three_chars_is_one_token() {
|
||||||
|
// 3 → ceil(3/4) = 1. Matches the deprecated helper's behavior
|
||||||
|
// so the migration is a drop-in replacement.
|
||||||
|
assert_eq!(ModelMatrix::estimate_tokens("abc"), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn four_chars_is_one_token() {
|
||||||
|
assert_eq!(ModelMatrix::estimate_tokens("abcd"), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn five_chars_is_two_tokens() {
|
||||||
|
assert_eq!(ModelMatrix::estimate_tokens("abcde"), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn counts_chars_not_bytes() {
|
||||||
|
// Multi-byte UTF-8 chars count as 1 char each — important for
|
||||||
|
// prompts with emoji or non-ASCII text. "héllo" is 5 chars
|
||||||
|
// (5 unicode scalars) → ceil(5/4) = 2 tokens, same as "hello".
|
||||||
|
assert_eq!(ModelMatrix::estimate_tokens("héllo"), 2);
|
||||||
|
assert_eq!(ModelMatrix::estimate_tokens("📚📚📚📚"), 1); // 4 chars
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn large_text_scales_linearly() {
|
||||||
|
assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(400)), 100);
|
||||||
|
assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(401)), 101);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1647,7 +1647,7 @@ mod validity_window_tests {
|
|||||||
let past = (chrono::Utc::now() - chrono::Duration::days(1)).to_rfc3339();
|
let past = (chrono::Utc::now() - chrono::Duration::days(1)).to_rfc3339();
|
||||||
let future = (chrono::Utc::now() + chrono::Duration::days(1)).to_rfc3339();
|
let future = (chrono::Utc::now() + chrono::Duration::days(1)).to_rfc3339();
|
||||||
let e_expired = mkentry("pb-expired", "Nashville", "TN", None, Some(past));
|
let e_expired = mkentry("pb-expired", "Nashville", "TN", None, Some(past));
|
||||||
let e_alive = { let mut e = mkentry("pb-alive", "Nashville", "TN", None, Some(future)); e };
|
let e_alive = mkentry("pb-alive", "Nashville", "TN", None, Some(future));
|
||||||
pm.set_entries(vec![e_expired, e_alive]).await.unwrap();
|
pm.set_entries(vec![e_expired, e_alive]).await.unwrap();
|
||||||
let boosts = pm.compute_boost_for_filtered_with_role(
|
let boosts = pm.compute_boost_for_filtered_with_role(
|
||||||
&[1.0, 0.0, 0.0], 100, 0.5,
|
&[1.0, 0.0, 0.0], 100, 0.5,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user