This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 06f9a1d1d3 Core: Fix BinPackRewriteFilePlanner producing incorrect
output file count with max-files-to-rewrite (#15576)
06f9a1d1d3 is described below
commit 06f9a1d1d383fc29c82dfba099e78a65a6a3188a
Author: hemanthboyina <[email protected]>
AuthorDate: Tue Mar 17 08:47:53 2026 +0530
Core: Fix BinPackRewriteFilePlanner producing incorrect output file count
with max-files-to-rewrite (#15576)
---
.../iceberg/actions/BinPackRewriteFilePlanner.java | 9 ++++--
.../actions/TestBinPackRewriteFilePlanner.java | 35 ++++++++++++++++++++++
2 files changed, 41 insertions(+), 3 deletions(-)
diff --git
a/core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java
b/core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java
index e74dbfe04d..ee768fcde4 100644
---
a/core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java
+++
b/core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java
@@ -241,13 +241,16 @@ public class BinPackRewriteFilePlanner
} else if (fileCountRunner.get() < maxFilesToRewrite) {
int remainingSize = maxFilesToRewrite -
fileCountRunner.get();
int scanTasksToRewrite =
Math.min(fileScanTasks.size(), remainingSize);
+ List<FileScanTask> tasksToRewrite =
+ fileScanTasks.subList(0, scanTasksToRewrite);
+ long rewriteInputSize = inputSize(tasksToRewrite);
selectedFileGroups.add(
newRewriteGroup(
ctx,
partition,
- fileScanTasks.subList(0, scanTasksToRewrite),
- inputSplitSize(inputSize),
- expectedOutputFiles(inputSize)));
+ tasksToRewrite,
+ inputSplitSize(rewriteInputSize),
+ expectedOutputFiles(rewriteInputSize)));
fileCountRunner.getAndAdd(scanTasksToRewrite);
}
});
diff --git
a/core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java
b/core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java
index 64209d7781..aa65140c0b 100644
---
a/core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java
+++
b/core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java
@@ -522,6 +522,41 @@ class TestBinPackRewriteFilePlanner {
assertThat(fileScanTasks).isEqualTo(5);
}
+ @Test
+ public void
testMaxFilesToRewriteUsesCorrectInputSizeForExpectedOutputFiles() {
+ // Create files with known sizes where truncation matters for
expectedOutputFiles
+ DataFile largeFile1 = newDataFile("data_bucket=0", 200);
+ DataFile largeFile2 = newDataFile("data_bucket=0", 200);
+ DataFile largeFile3 = newDataFile("data_bucket=0", 200);
+
table.newAppend().appendFile(largeFile1).appendFile(largeFile2).appendFile(largeFile3).commit();
+
+ BinPackRewriteFilePlanner planner = new BinPackRewriteFilePlanner(table);
+ // target=250 means:
+ // Full group (600 bytes): expectedOutputFiles = ceil(600/250) = 3
+ // Truncated to 1 file (200 bytes): expectedOutputFiles = ceil(200/250)
= 1
+ Map<String, String> options =
+ ImmutableMap.of(
+ BinPackRewriteFilePlanner.MAX_FILES_TO_REWRITE, "1",
+ BinPackRewriteFilePlanner.REWRITE_ALL, "true",
+ BinPackRewriteFilePlanner.TARGET_FILE_SIZE_BYTES, "250");
+ planner.init(options);
+
+ FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup>
plan = planner.plan();
+ List<RewriteFileGroup> groups =
Lists.newArrayList(plan.groups().iterator());
+
+ assertThat(groups).hasSize(1);
+ RewriteFileGroup group = groups.get(0);
+ // Only 1 file should be in the group due to max-files-to-rewrite=1
+ assertThat(group.inputFileNum()).isEqualTo(1);
+ // expectedOutputFiles should be based on the truncated input (200 bytes),
not full group (600)
+ // ceil(200/250) = 1, NOT ceil(600/250) = 3
+ assertThat(group.expectedOutputFiles())
+ .as(
+ "expectedOutputFiles should be computed from actual files being
rewritten (200 bytes),"
+ + " not the full group (600 bytes)")
+ .isEqualTo(1);
+ }
+
@Test
public void testRewriteMaxFilesRewriteGreaterThanTotalFiles() {
addFiles();