This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 06f9a1d1d3 Core: Fix BinPackRewriteFilePlanner producing incorrect 
output file count with max-files-to-rewrite (#15576)
06f9a1d1d3 is described below

commit 06f9a1d1d383fc29c82dfba099e78a65a6a3188a
Author: hemanthboyina <[email protected]>
AuthorDate: Tue Mar 17 08:47:53 2026 +0530

    Core: Fix BinPackRewriteFilePlanner producing incorrect output file count 
with max-files-to-rewrite (#15576)
---
 .../iceberg/actions/BinPackRewriteFilePlanner.java |  9 ++++--
 .../actions/TestBinPackRewriteFilePlanner.java     | 35 ++++++++++++++++++++++
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git 
a/core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java 
b/core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java
index e74dbfe04d..ee768fcde4 100644
--- 
a/core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java
+++ 
b/core/src/main/java/org/apache/iceberg/actions/BinPackRewriteFilePlanner.java
@@ -241,13 +241,16 @@ public class BinPackRewriteFilePlanner
                         } else if (fileCountRunner.get() < maxFilesToRewrite) {
                           int remainingSize = maxFilesToRewrite - 
fileCountRunner.get();
                           int scanTasksToRewrite = 
Math.min(fileScanTasks.size(), remainingSize);
+                          List<FileScanTask> tasksToRewrite =
+                              fileScanTasks.subList(0, scanTasksToRewrite);
+                          long rewriteInputSize = inputSize(tasksToRewrite);
                           selectedFileGroups.add(
                               newRewriteGroup(
                                   ctx,
                                   partition,
-                                  fileScanTasks.subList(0, scanTasksToRewrite),
-                                  inputSplitSize(inputSize),
-                                  expectedOutputFiles(inputSize)));
+                                  tasksToRewrite,
+                                  inputSplitSize(rewriteInputSize),
+                                  expectedOutputFiles(rewriteInputSize)));
                           fileCountRunner.getAndAdd(scanTasksToRewrite);
                         }
                       });
diff --git 
a/core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java
 
b/core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java
index 64209d7781..aa65140c0b 100644
--- 
a/core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java
+++ 
b/core/src/test/java/org/apache/iceberg/actions/TestBinPackRewriteFilePlanner.java
@@ -522,6 +522,41 @@ class TestBinPackRewriteFilePlanner {
     assertThat(fileScanTasks).isEqualTo(5);
   }
 
+  @Test
+  public void 
testMaxFilesToRewriteUsesCorrectInputSizeForExpectedOutputFiles() {
+    // Create files with known sizes where truncation matters for 
expectedOutputFiles
+    DataFile largeFile1 = newDataFile("data_bucket=0", 200);
+    DataFile largeFile2 = newDataFile("data_bucket=0", 200);
+    DataFile largeFile3 = newDataFile("data_bucket=0", 200);
+    
table.newAppend().appendFile(largeFile1).appendFile(largeFile2).appendFile(largeFile3).commit();
+
+    BinPackRewriteFilePlanner planner = new BinPackRewriteFilePlanner(table);
+    // target=250 means:
+    //   Full group (600 bytes): expectedOutputFiles = ceil(600/250) = 3
+    //   Truncated to 1 file (200 bytes): expectedOutputFiles = ceil(200/250) 
= 1
+    Map<String, String> options =
+        ImmutableMap.of(
+            BinPackRewriteFilePlanner.MAX_FILES_TO_REWRITE, "1",
+            BinPackRewriteFilePlanner.REWRITE_ALL, "true",
+            BinPackRewriteFilePlanner.TARGET_FILE_SIZE_BYTES, "250");
+    planner.init(options);
+
+    FileRewritePlan<FileGroupInfo, FileScanTask, DataFile, RewriteFileGroup> 
plan = planner.plan();
+    List<RewriteFileGroup> groups = 
Lists.newArrayList(plan.groups().iterator());
+
+    assertThat(groups).hasSize(1);
+    RewriteFileGroup group = groups.get(0);
+    // Only 1 file should be in the group due to max-files-to-rewrite=1
+    assertThat(group.inputFileNum()).isEqualTo(1);
+    // expectedOutputFiles should be based on the truncated input (200 bytes), 
not full group (600)
+    // ceil(200/250) = 1, NOT ceil(600/250) = 3
+    assertThat(group.expectedOutputFiles())
+        .as(
+            "expectedOutputFiles should be computed from actual files being 
rewritten (200 bytes),"
+                + " not the full group (600 bytes)")
+        .isEqualTo(1);
+  }
+
   @Test
   public void testRewriteMaxFilesRewriteGreaterThanTotalFiles() {
     addFiles();

Reply via email to