From cdc24d8bd0d21186fd72202b5a55255dc13ede1d Mon Sep 17 00:00:00 2001
From: root <root@island37.com>
Date: Fri, 24 Apr 2026 06:32:16 -0500
Subject: [PATCH] =?UTF-8?q?shared:=20build=20ModelMatrix=20=E2=80=94=20mig?=
 =?UTF-8?q?rate=205=20call=20sites=20off=20deprecated=20estimate=5Ftokens?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `aibridge::context::estimate_tokens` deprecation has been pointing
at `shared::model_matrix::ModelMatrix::estimate_tokens` for a while,
but that module didn't exist — so the deprecation was aspirational
noise, not actionable guidance.

Built the minimal target: `shared::model_matrix::ModelMatrix` with
an associated `estimate_tokens(text: &str) -> usize` method. Same
chars/4 ceiling heuristic as the deprecated helper. 6 tests cover
empty/3/4/5-char cases, multi-byte UTF-8 (emoji count as 1 char each),
and linear scaling to 400-char inputs.

Migrated 5 call sites:
  - aibridge/context.rs:88 — opts.system token count
  - aibridge/context.rs:89 — prompt token count
  - aibridge/tree_split.rs:22 — import (now uses ModelMatrix)
  - aibridge/tree_split.rs:84, 89 — truncate_scratchpad budget loop
  - aibridge/tree_split.rs:282 — scratchpad post-truncation assertion
  - aibridge/context.rs:183 — system-prompt budget test

Also cleaned up two parallel test warnings:
  - aibridge/context.rs legacy estimate_tokens_ceiling_divides_by_four
    test deleted (ModelMatrix's tests cover the same behavior now).
  - vectord/playbook_memory.rs:1650 unused_mut on e_alive.

Net workspace warning count: 11 → 0 (including --tests build).

The deprecated `estimate_tokens` wrapper stays in aibridge/context.rs
for external callers. Future commits can remove it entirely once no
public API surface still references it.

The applier's warning-count gate now has a floor of 0 — any future
patch that introduces a single warning trips the gate automatically.
Previously a floor of 11 tolerated noise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/aibridge/src/context.rs        | 18 +++----
 crates/aibridge/src/tree_split.rs     |  9 ++--
 crates/shared/src/lib.rs              |  1 +
 crates/shared/src/model_matrix.rs     | 69 +++++++++++++++++++++++++++
 crates/vectord/src/playbook_memory.rs |  2 +-
 5 files changed, 83 insertions(+), 16 deletions(-)
 create mode 100644 crates/shared/src/model_matrix.rs

diff --git a/crates/aibridge/src/context.rs b/crates/aibridge/src/context.rs
index b394ac2..ca55afe 100644
--- a/crates/aibridge/src/context.rs
+++ b/crates/aibridge/src/context.rs
@@ -85,8 +85,8 @@ pub fn assert_context_budget(
     let window = context_window_for(model);
     let safety = opts.safety_margin.unwrap_or(DEFAULT_SAFETY_MARGIN);
     let max_tokens = opts.max_tokens.unwrap_or(DEFAULT_MAX_TOKENS);
-    let sys_tokens = opts.system.map(estimate_tokens).unwrap_or(0);
-    let estimated = estimate_tokens(prompt) + sys_tokens + max_tokens;
+    let sys_tokens = opts.system.map(shared::model_matrix::ModelMatrix::estimate_tokens).unwrap_or(0);
+    let estimated = shared::model_matrix::ModelMatrix::estimate_tokens(prompt) + sys_tokens + max_tokens;
     let remaining = window as i64 - estimated as i64 - safety as i64;
     let check = BudgetCheck { estimated, window, remaining };
     if remaining < 0 && !opts.bypass {
@@ -110,14 +110,10 @@ pub fn overflow_message(model: &str, check: &BudgetCheck, over_by: usize, safety
 mod tests {
     use super::*;
 
-    #[test]
-    fn estimate_tokens_ceiling_divides_by_four() {
-        assert_eq!(estimate_tokens(""), 0);
-        assert_eq!(estimate_tokens("abc"), 1);    // 3 → ceil(3/4) = 1
-        assert_eq!(estimate_tokens("abcd"), 1);   // 4 → ceil(4/4) = 1
-        assert_eq!(estimate_tokens("abcde"), 2);  // 5 → ceil(5/4) = 2
-        assert_eq!(estimate_tokens(&"x".repeat(400)), 100);
-    }
+    // Deprecated-function behavior is now canonically tested in
+    // crates/shared/src/model_matrix.rs. This test was the legacy
+    // pin that preceded the migration; delete when the deprecated
+    // wrapper itself goes (see the #[deprecated] attribute).
 
     #[test]
     fn context_window_known_and_fallback() {
@@ -180,7 +176,7 @@ mod tests {
         ).unwrap();
         assert!(with_sys.estimated > without_sys.estimated,
             "system prompt should raise estimate");
-        assert_eq!(with_sys.estimated - without_sys.estimated, estimate_tokens(&sys));
+        assert_eq!(with_sys.estimated - without_sys.estimated, shared::model_matrix::ModelMatrix::estimate_tokens(&sys));
     }
 
     #[test]
diff --git a/crates/aibridge/src/tree_split.rs b/crates/aibridge/src/tree_split.rs
index c29dff6..781a80a 100644
--- a/crates/aibridge/src/tree_split.rs
+++ b/crates/aibridge/src/tree_split.rs
@@ -19,8 +19,9 @@
 //! we bubble the error up rather than silently truncating. That's the
 //! whole point of Phase 21.
 
-use crate::context::{assert_context_budget, BudgetOpts, estimate_tokens, overflow_message,
+use crate::context::{assert_context_budget, BudgetOpts, overflow_message,
                      DEFAULT_MAX_TOKENS, DEFAULT_SAFETY_MARGIN};
+use shared::model_matrix::ModelMatrix;
 use crate::continuation::{generate_continuable, ContinuableOpts, ResponseShape, TextGenerator};
 
 /// Callback signatures — caller supplies closures that stitch the
@@ -80,12 +81,12 @@ pub struct TreeSplitResult {
 /// by `\n— shard N/M digest —\n` so we can find the first one and
 /// chop everything before its successor.
 fn truncate_scratchpad(scratchpad: &mut String, budget_tokens: usize) -> bool {
-    if estimate_tokens(scratchpad) <= budget_tokens { return false; }
+    if ModelMatrix::estimate_tokens(scratchpad) <= budget_tokens { return false; }
     // Find the second delimiter — everything before it gets dropped.
     const DELIM_PREFIX: &str = "\n— shard ";
     let mut cursor = 0;
     let mut truncated = false;
-    while estimate_tokens(&scratchpad[cursor..]) > budget_tokens {
+    while ModelMatrix::estimate_tokens(&scratchpad[cursor..]) > budget_tokens {
         // Skip past a leading delimiter (if we're sitting on one from
         // a previous iteration), then find the next.
         let search_from = cursor + if scratchpad[cursor..].starts_with(DELIM_PREFIX) {
@@ -278,7 +279,7 @@ mod tests {
         // Scratchpad should still fit roughly within the budget
         // (post-truncation); the estimator uses chars/4 so the bound
         // is ~budget*4 chars. Give some slack for the delimiter.
-        let scratchpad_tokens = estimate_tokens(&result.scratchpad);
+        let scratchpad_tokens = ModelMatrix::estimate_tokens(&result.scratchpad);
         assert!(scratchpad_tokens <= opts.scratchpad_budget * 2,
             "scratchpad {} tokens vs budget {}", scratchpad_tokens, opts.scratchpad_budget);
     }
diff --git a/crates/shared/src/lib.rs b/crates/shared/src/lib.rs
index c7e072d..4c5f758 100644
--- a/crates/shared/src/lib.rs
+++ b/crates/shared/src/lib.rs
@@ -4,3 +4,4 @@ pub mod arrow_helpers;
 pub mod config;
 pub mod pii;
 pub mod secrets;
+pub mod model_matrix;
diff --git a/crates/shared/src/model_matrix.rs b/crates/shared/src/model_matrix.rs
new file mode 100644
index 0000000..c09af4c
--- /dev/null
+++ b/crates/shared/src/model_matrix.rs
@@ -0,0 +1,69 @@
+//! Per-model token accounting. Entry point for the ModelMatrix work
+//! the aibridge `context::estimate_tokens` deprecation has been pointing
+//! at. Starts minimal — just `estimate_tokens` — so call sites can
+//! migrate off the deprecated helper. Extend with per-model context
+//! windows, max_tokens defaults, provider hints, etc. as we move the
+//! rest of `aibridge::context::known_windows` over.
+
+/// Namespace for per-model token + context accounting. Methods are
+/// associated functions — no instance required — because the underlying
+/// estimates are deterministic and stateless.
+pub struct ModelMatrix;
+
+impl ModelMatrix {
+    /// Rough token count — char count divided by 4, rounded up. This
+    /// is the same heuristic OpenAI's cookbook uses for English text;
+    /// it's within ±15% of BPE tokenizers for code + prose and doesn't
+    /// require a tokenizer lookup. Good enough for budget math where
+    /// the goal is "don't blow the context window" rather than exact
+    /// billing.
+    ///
+    /// Moved from `aibridge::context::estimate_tokens` (still there with
+    /// a `#[deprecated]` pointer — callers should migrate here). Empty
+    /// string → 0; one char → 1 (ceiling of 1/4 = 1).
+    pub fn estimate_tokens(text: &str) -> usize {
+        (text.chars().count() + 3) / 4
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ModelMatrix;
+
+    #[test]
+    fn empty_string_is_zero_tokens() {
+        assert_eq!(ModelMatrix::estimate_tokens(""), 0);
+    }
+
+    #[test]
+    fn three_chars_is_one_token() {
+        // 3 → ceil(3/4) = 1. Matches the deprecated helper's behavior
+        // so the migration is a drop-in replacement.
+        assert_eq!(ModelMatrix::estimate_tokens("abc"), 1);
+    }
+
+    #[test]
+    fn four_chars_is_one_token() {
+        assert_eq!(ModelMatrix::estimate_tokens("abcd"), 1);
+    }
+
+    #[test]
+    fn five_chars_is_two_tokens() {
+        assert_eq!(ModelMatrix::estimate_tokens("abcde"), 2);
+    }
+
+    #[test]
+    fn counts_chars_not_bytes() {
+        // Multi-byte UTF-8 chars count as 1 char each — important for
+        // prompts with emoji or non-ASCII text. "héllo" is 5 chars
+        // (5 unicode scalars) → ceil(5/4) = 2 tokens, same as "hello".
+        assert_eq!(ModelMatrix::estimate_tokens("héllo"), 2);
+        assert_eq!(ModelMatrix::estimate_tokens("📚📚📚📚"), 1);  // 4 chars
+    }
+
+    #[test]
+    fn large_text_scales_linearly() {
+        assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(400)), 100);
+        assert_eq!(ModelMatrix::estimate_tokens(&"x".repeat(401)), 101);
+    }
+}
diff --git a/crates/vectord/src/playbook_memory.rs b/crates/vectord/src/playbook_memory.rs
index b8c4fd9..f407c55 100644
--- a/crates/vectord/src/playbook_memory.rs
+++ b/crates/vectord/src/playbook_memory.rs
@@ -1647,7 +1647,7 @@ mod validity_window_tests {
         let past = (chrono::Utc::now() - chrono::Duration::days(1)).to_rfc3339();
         let future = (chrono::Utc::now() + chrono::Duration::days(1)).to_rfc3339();
         let e_expired = mkentry("pb-expired", "Nashville", "TN", None, Some(past));
-        let e_alive = { let mut e = mkentry("pb-alive", "Nashville", "TN", None, Some(future)); e };
+        let e_alive = mkentry("pb-alive", "Nashville", "TN", None, Some(future));
         pm.set_entries(vec![e_expired, e_alive]).await.unwrap();
         let boosts = pm.compute_boost_for_filtered_with_role(
             &[1.0, 0.0, 0.0], 100, 0.5,