queryd/delta: fix CompactResult.base_rows unit mismatch (6-line fix)
Some checks failed
lakehouse/auditor 2 blocking issues: cloud: claim not backed — "proven review pathways."
Some checks failed
lakehouse/auditor 2 blocking issues: cloud: claim not backed — "proven review pathways."
Before: `base_rows = pre_filter_rows - delta_count` subtracted a FILE count (delta_batches.len()) from a ROW count (pre_filter_rows), producing a meaningless "rough" approximation the comment acknowledged. Now: base_rows is captured directly from the pre-extend state. Same for delta_rows, which now reports actual delta row count instead of file count. Workspace baseline warnings unchanged at 11. Flagged by scrum iter 4-7 as a PRD §8.6 contract gap (upsert semantics); this closes the reporting half. Full dedup work remains queued. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2f8b347f37
commit
86901f8def
@ -84,14 +84,17 @@ pub async fn compact(
|
|||||||
// Load deltas
|
// Load deltas
|
||||||
let delta_batches = load_deltas(store, dataset_name).await?;
|
let delta_batches = load_deltas(store, dataset_name).await?;
|
||||||
let delta_count = delta_batches.len();
|
let delta_count = delta_batches.len();
|
||||||
|
// Row counts captured before extend; previously base_rows subtracted delta_count (files) from rows — unit mismatch.
|
||||||
|
let base_row_count: usize = base_batches.iter().map(|b| b.num_rows()).sum();
|
||||||
|
let delta_row_count: usize = delta_batches.iter().map(|b| b.num_rows()).sum();
|
||||||
|
|
||||||
let has_tombstones = !tombstones.is_empty();
|
let has_tombstones = !tombstones.is_empty();
|
||||||
let nothing_to_do = delta_batches.is_empty() && !has_tombstones;
|
let nothing_to_do = delta_batches.is_empty() && !has_tombstones;
|
||||||
if nothing_to_do {
|
if nothing_to_do {
|
||||||
return Ok(CompactResult {
|
return Ok(CompactResult {
|
||||||
base_rows: base_batches.iter().map(|b| b.num_rows()).sum(),
|
base_rows: base_row_count,
|
||||||
delta_rows: 0,
|
delta_rows: 0,
|
||||||
final_rows: base_batches.iter().map(|b| b.num_rows()).sum(),
|
final_rows: base_row_count,
|
||||||
deltas_merged: 0,
|
deltas_merged: 0,
|
||||||
tombstones_applied: 0,
|
tombstones_applied: 0,
|
||||||
rows_dropped_by_tombstones: 0,
|
rows_dropped_by_tombstones: 0,
|
||||||
@ -99,7 +102,7 @@ pub async fn compact(
|
|||||||
}
|
}
|
||||||
|
|
||||||
base_batches.extend(delta_batches);
|
base_batches.extend(delta_batches);
|
||||||
let pre_filter_rows: usize = base_batches.iter().map(|b| b.num_rows()).sum();
|
let pre_filter_rows: usize = base_row_count + delta_row_count;
|
||||||
|
|
||||||
// If primary key specified, deduplicate (keep last occurrence)
|
// If primary key specified, deduplicate (keep last occurrence)
|
||||||
let merged_batches = if let Some(_pk) = primary_key_col {
|
let merged_batches = if let Some(_pk) = primary_key_col {
|
||||||
@ -183,8 +186,8 @@ pub async fn compact(
|
|||||||
);
|
);
|
||||||
|
|
||||||
Ok(CompactResult {
|
Ok(CompactResult {
|
||||||
base_rows: pre_filter_rows - delta_count, // rough base-before-deltas
|
base_rows: base_row_count,
|
||||||
delta_rows: delta_count,
|
delta_rows: delta_row_count,
|
||||||
final_rows,
|
final_rows,
|
||||||
deltas_merged: delta_count,
|
deltas_merged: delta_count,
|
||||||
tombstones_applied: tombstones.len(),
|
tombstones_applied: tombstones.len(),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user