From ad0edbe29ce9a26e55de2e7a480afc9a9d8e4ed8 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 20 Apr 2026 23:20:07 -0500 Subject: [PATCH] Cloud kimi-k2.5 executor for weak tiers + multi-strategy playbook retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two coupled changes from the 2026 agent-memory research + tool asymmetry findings. SCENARIO (weak-tier cloud substitute): qwen2.5 collapsed to 0/14 across the basic/minimal tool_levels. Replace with cloud kimi-k2.5 on Ollama Cloud — same family as k2.6 (pro-tier locked today, on J's upgrade path). Plumb cloud flag through ACTIVE_EXECUTOR_CLOUD / ACTIVE_REVIEWER_CLOUD into generateContinuable so executor/reviewer can route to cloud when tool_level requires. think:false supported by Kimi family. Tool level mapping (revised): full — qwen3.5 local + qwen3 local + cloud gpt-oss:120b T3 + rescue local — qwen3.5 local + qwen3 local + local gpt-oss:20b T3 + rescue basic — kimi-k2.5 cloud + qwen3 local + local T3, no rescue minimal — kimi-k2.5 cloud + qwen3 local, no T3, no rescue. Playbook inheritance alone on the decision path. This is the honest version of J's "minimal tools still works via inheritance" hypothesis — with the executor no longer broken at the tokenizer level, we can actually measure whether playbook retrieval substitutes for missing overseers. PLAYBOOK_MEMORY (multi-strategy retrieval): Zep / Mem0 research shows multi-strategy rerank (semantic + keyword + graph + temporal) outperforms single-strategy cosine. Lakehouse now has a two-tier: 1. Exact (role, city, state) match: skip cosine, assign similarity=1.0, take up to top_k/2+1 slots. These are identity-class neighbors — the strongest possible signal. 2. Cosine fallback within the same (city, state) but different role: fills remaining slots. Exposed as compute_boost_for_filtered_with_role(target_geo, target_role). Backwards-compatible: compute_boost_for_filtered forwards with role=None so existing callers keep their current behavior. Service.rs wires both: extract_target_geo and extract_target_role pull from the executor's SQL filter. grab_eq_value is factored out of extract_target_geo so both lookups share one parser. Diagnostic log now prints target_role alongside target_geo for every hybrid_search: playbook_boost: boosts=88 sources=39 parsed=39 matched=5 target_geo=Some(("Nashville", "TN")) target_role=Some("Welder") Verified: Nashville Welder query returns 5/10 boosted workers in top_k with clean role+geo provenance. Research sources: atlan.com Agent Memory Frameworks 2026, Mem0 paper (arxiv 2504.19413), Zep/Graphiti LongMemEval comparison, ossinsight Agent Memory Race 2026. kimi-k2.6 on current key returns 403 — pro-tier upgrade required. kimi-k2.5 is the substitute today; swap to k2.6 by renaming one line in applyToolLevel once the subscription lands. --- crates/vectord/src/playbook_memory.rs | 71 ++++++++++++++++--- crates/vectord/src/service.rs | 98 ++++++++++++++------------- tests/multi-agent/scenario.ts | 22 ++++-- 3 files changed, 133 insertions(+), 58 deletions(-) diff --git a/crates/vectord/src/playbook_memory.rs b/crates/vectord/src/playbook_memory.rs index dd5ae5e..d13f71a 100644 --- a/crates/vectord/src/playbook_memory.rs +++ b/crates/vectord/src/playbook_memory.rs @@ -235,6 +235,26 @@ impl PlaybookMemory { top_k_playbooks: usize, base_weight: f32, target_geo: Option<(&str, &str)>, + ) -> HashMap<(String, String, String), BoostEntry> { + self.compute_boost_for_filtered_with_role(query_embedding, top_k_playbooks, base_weight, target_geo, None).await + } + + /// Variant that also accepts a target role for pre-filtering. + /// Multi-strategy retrieval: exact (role, city, state) matches skip + /// cosine entirely and earn the maximum boost, since identity on + /// those three fields is the strongest possible similarity signal. + /// Remaining entries (within the same city+state but different + /// role, or unknown role) go through the normal cosine path as a + /// fallback. This addresses the 2026 agent-memory finding that + /// multi-strategy parallel retrieval with rerank outperforms + /// single-strategy semantic search. + pub async fn compute_boost_for_filtered_with_role( + &self, + query_embedding: &[f32], + top_k_playbooks: usize, + base_weight: f32, + target_geo: Option<(&str, &str)>, + target_role: Option<&str>, ) -> HashMap<(String, String, String), BoostEntry> { let state = self.state.read().await; let entries = state.entries.clone(); @@ -246,11 +266,11 @@ impl PlaybookMemory { } drop(state); - // Brute-force cosine. Empty / missing embeddings just skip. - // When target_geo is set, pre-filter to matching playbooks BEFORE - // cosine sort — that way top-k is within the city, not across - // all cities. - let mut scored: Vec<(f32, &PlaybookEntry)> = entries + // Pre-filter by target_geo (city, state) before cosine. When + // target_geo is set, only playbooks from that city go into the + // ranking pool — prevents globally-popular semantic neighbors + // from drowning out the city's local successful playbooks. + let geo_filtered: Vec<&PlaybookEntry> = entries .iter() .filter(|e| match (target_geo, &e.city, &e.state) { (None, _, _) => true, @@ -259,10 +279,45 @@ impl PlaybookMemory { } _ => false, }) - .filter_map(|e| e.embedding.as_ref().map(|v| (cosine(query_embedding, v), e))) .collect(); - scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); - scored.truncate(top_k_playbooks.max(1)); + + // Multi-strategy: split the geo-filtered pool into (exact role + // match) vs (other). Exact matches skip cosine — they're already + // the strongest signal possible. Operations are shaped + // "fill: Welder x3 in Toledo, OH" so we match role by checking + // whether `fill: {role} ` appears in the operation string, + // case-insensitive. + let mut exact_matches: Vec<&PlaybookEntry> = Vec::new(); + let mut cosine_pool: Vec<(f32, &PlaybookEntry)> = Vec::new(); + let role_needle = target_role + .map(|r| format!("fill: {} ", r).to_ascii_lowercase()); + for e in geo_filtered { + let is_exact = role_needle.as_ref() + .map(|needle| e.operation.to_ascii_lowercase().contains(needle)) + .unwrap_or(false); + if is_exact { + exact_matches.push(e); + } else if let Some(v) = &e.embedding { + cosine_pool.push((cosine(query_embedding, v), e)); + } + } + cosine_pool.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + + // Allocate top_k across the two pools — exact matches get first + // priority (up to min(exact_count, top_k/2) slots), then cosine + // fills the rest. This is rerank with hard preference for + // identity matches. + let exact_take = exact_matches.len().min(top_k_playbooks.max(1) / 2 + 1); + let cosine_take = top_k_playbooks.saturating_sub(exact_take); + + // Score exact matches with max similarity (1.0) so downstream + // weighting treats them as the strongest possible signal. + let mut scored: Vec<(f32, &PlaybookEntry)> = exact_matches + .into_iter() + .take(exact_take) + .map(|e| (1.0_f32, e)) + .collect(); + scored.extend(cosine_pool.into_iter().take(cosine_take)); let now = chrono::Utc::now(); let mut boosts: HashMap<(String, String, String), BoostEntry> = HashMap::new(); diff --git a/crates/vectord/src/service.rs b/crates/vectord/src/service.rs index 73ea114..977575f 100644 --- a/crates/vectord/src/service.rs +++ b/crates/vectord/src/service.rs @@ -803,20 +803,23 @@ async fn hybrid_search( // set. Additive boost on the existing vector score, then re-sort. if req.use_playbook_memory { let boost_k = req.playbook_memory_k.unwrap_or(playbook_memory::DEFAULT_TOP_K_PLAYBOOKS); - // Extract target (city, state) from the SQL filter so - // compute_boost_for can skip playbooks from other cities that - // would never intersect the candidate pool. The executor's - // filter shape is stable: `... city = 'Toledo' AND state = 'OH' ...`. - // Case-insensitive match, tolerant of single quotes and spaces. + // Extract target (city, state, role) from the SQL filter so + // compute_boost_for can skip playbooks from other cities AND + // prioritize exact role matches via the multi-strategy path. + // The executor's filter shape is stable: + // `... role = 'Welder' AND city = 'Toledo' AND state = 'OH' ...`. + // Case-insensitive match, tolerant of single quotes. let target_geo = req.sql_filter.as_deref().and_then(extract_target_geo); + let target_role = req.sql_filter.as_deref().and_then(extract_target_role); // We embedded the question as `qv` above — reuse it for the // playbook similarity lookup so we don't double-pay Ollama. let boosts = state.playbook_memory - .compute_boost_for_filtered( + .compute_boost_for_filtered_with_role( &qv, boost_k, 0.5, target_geo.as_ref().map(|(c, s)| (c.as_str(), s.as_str())), + target_role.as_deref(), ) .await; @@ -850,12 +853,13 @@ async fn hybrid_search( } } tracing::info!( - "playbook_boost: boosts={} sources={} parsed={} matched={} target_geo={:?} (query='{}')", + "playbook_boost: boosts={} sources={} parsed={} matched={} target_geo={:?} target_role={:?} (query='{}')", boosts.len(), sources.len(), parsed_count, matched_count, target_geo, + target_role, req.question.chars().take(60).collect::(), ); // Re-rank: boosted scores can flip ordering. @@ -2098,51 +2102,53 @@ struct LanceRecallQuery { /// "{Name} — {Role} in {City}, {State}. Skills: …". /// Returns None if the chunk doesn't match the shape; callers simply /// skip the boost for that hit. +/// Extract role from an SQL filter matching `role = 'Welder'` style. +/// Case-insensitive on the column name. Quoted value; quotes not +/// included in returned string. +fn extract_target_role(sql_filter: &str) -> Option { + grab_eq_value(sql_filter, "role") +} + +/// Shared equality-value extractor for (city, state, role) lookups. +fn grab_eq_value(src: &str, col: &str) -> Option { + let lower = src.to_ascii_lowercase(); + let col_lower = col.to_ascii_lowercase(); + let mut search_from = 0usize; + while let Some(off) = lower[search_from..].find(&col_lower) { + let pos = search_from + off; + let prior_ok = pos == 0 + || !lower.as_bytes()[pos - 1].is_ascii_alphanumeric() + && lower.as_bytes()[pos - 1] != b'_'; + let after = pos + col_lower.len(); + if !prior_ok || after >= src.len() { + search_from = pos + col_lower.len(); + continue; + } + let mut i = after; + while i < src.len() && src.as_bytes()[i] == b' ' { i += 1; } + if i >= src.len() || src.as_bytes()[i] != b'=' { search_from = pos + col_lower.len(); continue; } + i += 1; + while i < src.len() && src.as_bytes()[i] == b' ' { i += 1; } + if i >= src.len() || src.as_bytes()[i] != b'\'' { search_from = pos + col_lower.len(); continue; } + i += 1; + let start = i; + while i < src.len() && src.as_bytes()[i] != b'\'' { i += 1; } + if i > start { + return Some(src[start..i].to_string()); + } + search_from = pos + col_lower.len(); + } + None +} + /// Pull (city, state) out of a SQL filter that uses /// `city = 'Toledo' AND state = 'OH'` style equality. Returns None if /// either is missing — the caller keeps the original global boost map /// behavior (no geo narrowing). Case-insensitive on the column name /// so `CITY=` or `City =` also work. fn extract_target_geo(sql_filter: &str) -> Option<(String, String)> { - fn grab_eq(src: &str, col: &str) -> Option { - // Very small parser, resilient enough for the executor's - // filter shapes. Matches `col = 'value'` or `col='value'` with - // case-insensitive column name. - let lower = src.to_ascii_lowercase(); - let col_lower = col.to_ascii_lowercase(); - let mut search_from = 0usize; - while let Some(off) = lower[search_from..].find(&col_lower) { - let pos = search_from + off; - // Require word boundary before the column name so "city" - // inside "civilian_rank" doesn't false-match. - let prior_ok = pos == 0 - || !lower.as_bytes()[pos - 1].is_ascii_alphanumeric() - && lower.as_bytes()[pos - 1] != b'_'; - let after = pos + col_lower.len(); - if !prior_ok || after >= src.len() { - search_from = pos + col_lower.len(); - continue; - } - // Walk past whitespace, require '='. - let mut i = after; - while i < src.len() && src.as_bytes()[i] == b' ' { i += 1; } - if i >= src.len() || src.as_bytes()[i] != b'=' { search_from = pos + col_lower.len(); continue; } - i += 1; - while i < src.len() && src.as_bytes()[i] == b' ' { i += 1; } - // Value is single-quoted literal; extract until the next '. - if i >= src.len() || src.as_bytes()[i] != b'\'' { search_from = pos + col_lower.len(); continue; } - i += 1; - let start = i; - while i < src.len() && src.as_bytes()[i] != b'\'' { i += 1; } - if i > start { - return Some(src[start..i].to_string()); - } - search_from = pos + col_lower.len(); - } - None - } - let city = grab_eq(sql_filter, "city")?; - let state = grab_eq(sql_filter, "state")?; + let city = grab_eq_value(sql_filter, "city")?; + let state = grab_eq_value(sql_filter, "state")?; Some((city, state)) } diff --git a/tests/multi-agent/scenario.ts b/tests/multi-agent/scenario.ts index 597fd86..3fd5dc6 100644 --- a/tests/multi-agent/scenario.ts +++ b/tests/multi-agent/scenario.ts @@ -92,6 +92,8 @@ const RETRY_ON_FAIL = process.env.LH_RETRY_ON_FAIL !== "0"; // based on staffer.tool_level before calling anything else. let ACTIVE_EXECUTOR = EXECUTOR_MODEL; let ACTIVE_REVIEWER = REVIEWER_MODEL; +let ACTIVE_EXECUTOR_CLOUD = false; +let ACTIVE_REVIEWER_CLOUD = false; let ACTIVE_T3_DISABLED = T3_DISABLED; let ACTIVE_OVERVIEW_CLOUD = OVERVIEW_CLOUD; let ACTIVE_RETRY_ON_FAIL = RETRY_ON_FAIL; @@ -101,6 +103,8 @@ function applyToolLevel(level: Staffer["tool_level"] | undefined): void { // don't leak. ACTIVE_EXECUTOR = EXECUTOR_MODEL; ACTIVE_REVIEWER = REVIEWER_MODEL; + ACTIVE_EXECUTOR_CLOUD = false; + ACTIVE_REVIEWER_CLOUD = false; ACTIVE_T3_DISABLED = T3_DISABLED; ACTIVE_OVERVIEW_CLOUD = OVERVIEW_CLOUD; ACTIVE_RETRY_ON_FAIL = RETRY_ON_FAIL; @@ -113,14 +117,22 @@ function applyToolLevel(level: Staffer["tool_level"] | undefined): void { ACTIVE_OVERVIEW_CLOUD = false; break; case "basic": - ACTIVE_EXECUTOR = "qwen2.5:latest"; - ACTIVE_REVIEWER = "qwen2.5:latest"; + // qwen2.5 collapsed on this workload (0/14 fill). Replace with + // cloud kimi-k2.5 — same family as k2.6 (which requires a paid + // tier), strong at tool calling. kimi-k2.6 is targeted when the + // subscription upgrades. + ACTIVE_EXECUTOR = "kimi-k2.5"; + ACTIVE_EXECUTOR_CLOUD = true; + ACTIVE_REVIEWER = "qwen3:latest"; // local reviewer stays cheap ACTIVE_OVERVIEW_CLOUD = false; ACTIVE_RETRY_ON_FAIL = false; break; case "minimal": - ACTIVE_EXECUTOR = "qwen2.5:latest"; - ACTIVE_REVIEWER = "qwen2.5:latest"; + // Same executor as basic but strip the overseer + rescue. + // Proves whether playbook inheritance alone carries the load. + ACTIVE_EXECUTOR = "kimi-k2.5"; + ACTIVE_EXECUTOR_CLOUD = true; + ACTIVE_REVIEWER = "qwen3:latest"; ACTIVE_T3_DISABLED = true; ACTIVE_OVERVIEW_CLOUD = false; ACTIVE_RETRY_ON_FAIL = false; @@ -514,6 +526,7 @@ async function runAgentFill( shape: "json", max_continuations: 3, think: false, + cloud: ACTIVE_EXECUTOR_CLOUD, on_continuation: (n, len) => append({ turn, role: "executor", model: ACTIVE_EXECUTOR, kind: "note", content: { continuation: n, combined_chars: len } }), @@ -577,6 +590,7 @@ async function runAgentFill( shape: "json", max_continuations: 3, think: false, + cloud: ACTIVE_REVIEWER_CLOUD, on_continuation: (n, len) => append({ turn, role: "reviewer", model: ACTIVE_REVIEWER, kind: "note", content: { continuation: n, combined_chars: len } }),