queryd/delta: fix CompactResult.base_rows unit mismatch (6-line fix)
Some checks failed
lakehouse/auditor 2 blocking issues: cloud: claim not backed — "proven review pathways."

Before: `base_rows = pre_filter_rows - delta_count` subtracted a FILE
count (delta_batches.len()) from a ROW count (pre_filter_rows), producing
a meaningless "rough" approximation the comment acknowledged.

Now: base_rows is captured directly from the pre-extend state. Same for
delta_rows, which now reports actual delta row count instead of file
count.

Workspace baseline warnings unchanged at 11. Flagged by scrum iter 4-7
as a PRD §8.6 contract gap (upsert semantics); this closes the reporting
half. Full dedup work remains queued.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-04-24 05:35:30 -05:00
parent 2f8b347f37
commit 86901f8def

View File

@ -84,14 +84,17 @@ pub async fn compact(
// Load deltas
let delta_batches = load_deltas(store, dataset_name).await?;
let delta_count = delta_batches.len();
// Row counts captured before extend; previously base_rows subtracted delta_count (files) from rows — unit mismatch.
let base_row_count: usize = base_batches.iter().map(|b| b.num_rows()).sum();
let delta_row_count: usize = delta_batches.iter().map(|b| b.num_rows()).sum();
let has_tombstones = !tombstones.is_empty();
let nothing_to_do = delta_batches.is_empty() && !has_tombstones;
if nothing_to_do {
return Ok(CompactResult {
base_rows: base_batches.iter().map(|b| b.num_rows()).sum(),
base_rows: base_row_count,
delta_rows: 0,
final_rows: base_batches.iter().map(|b| b.num_rows()).sum(),
final_rows: base_row_count,
deltas_merged: 0,
tombstones_applied: 0,
rows_dropped_by_tombstones: 0,
@ -99,7 +102,7 @@ pub async fn compact(
}
base_batches.extend(delta_batches);
let pre_filter_rows: usize = base_batches.iter().map(|b| b.num_rows()).sum();
let pre_filter_rows: usize = base_row_count + delta_row_count;
// If primary key specified, deduplicate (keep last occurrence)
let merged_batches = if let Some(_pk) = primary_key_col {
@ -183,8 +186,8 @@ pub async fn compact(
);
Ok(CompactResult {
base_rows: pre_filter_rows - delta_count, // rough base-before-deltas
delta_rows: delta_count,
base_rows: base_row_count,
delta_rows: delta_row_count,
final_rows,
deltas_merged: delta_count,
tombstones_applied: tombstones.len(),