From 0cf1b7c45a15219f73fa8f5883ee385628428d4c Mon Sep 17 00:00:00 2001 From: root Date: Fri, 24 Apr 2026 13:02:45 -0500 Subject: [PATCH] scrum_master: env-configurable tree-split threshold + shard size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hard-coded constants (FILE_TREE_SPLIT_THRESHOLD=6000, FILE_SHARD_SIZE=3500) were tuned for Rust source files in crates//src/*.rs. Running the pipeline against /root/llm-team-ui/llm_team_ui.py (13K lines, ~400KB) would produce ~200 shards per review at the default size — not viable. Two env vars now: - LH_SCRUM_TREE_SPLIT_THRESHOLD — when tree-split fires (default 6000) - LH_SCRUM_SHARD_SIZE — bytes per shard (default 3500) For the big-Python case the CLAUDE.md in /root/llm-team-ui/ recommends LH_SCRUM_TREE_SPLIT_THRESHOLD=20000, LH_SCRUM_SHARD_SIZE=12000 which brings the 13K-line file down to ~35 shards — same ballpark as a typical Rust file review. No default change. Existing lakehouse runs unaffected. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/real-world/scrum_master_pipeline.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/real-world/scrum_master_pipeline.ts b/tests/real-world/scrum_master_pipeline.ts index d61f9b2..4239c55 100644 --- a/tests/real-world/scrum_master_pipeline.ts +++ b/tests/real-world/scrum_master_pipeline.ts @@ -30,8 +30,13 @@ const MAX_ATTEMPTS = 9; // Files larger than this get tree-split instead of truncated. Fixes the // 6KB false-positive class (model claiming a field is "missing" when // it exists past the context cutoff). -const FILE_TREE_SPLIT_THRESHOLD = 6000; -const FILE_SHARD_SIZE = 3500; +// Env-configurable so the pipeline can adapt to different repos: +// a 13K-line Python file like /root/llm-team-ui/llm_team_ui.py needs +// larger shards to avoid producing 200+ cloud calls per review. +// Defaults stay at 6000 / 3500 — tuned for Rust source files in +// crates//src/*.rs. +const FILE_TREE_SPLIT_THRESHOLD = Number(process.env.LH_SCRUM_TREE_SPLIT_THRESHOLD ?? 6000); +const FILE_SHARD_SIZE = Number(process.env.LH_SCRUM_SHARD_SIZE ?? 3500); // Appended jsonl so auditor's kb_query can surface scrum findings for // files touched by a PR under review. Part of cohesion plan Phase C. const SCRUM_REVIEWS_JSONL = process.env.LH_SCRUM_REVIEWS_OUT