This is an automated email from the ASF dual-hosted git repository.

etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 9c1dd3b3a5 Core: Wrong reported length of encrypted Puffin files 
(#14645)
9c1dd3b3a5 is described below

commit 9c1dd3b3a51aded4a6a079dfd49d0f94ec88f3cd
Author: Adam Szita <[email protected]>
AuthorDate: Mon Nov 24 15:36:13 2025 +0100

    Core: Wrong reported length of encrypted Puffin files (#14645)
    
    Manifest files made for puffin files track the
    length of new files as PuffinWriter#length().
    The underlying PositionOutputStream provides this
    as getPos(), but for encrypted files this is not
    the true file length, rather the unencrypted
    content length.
---
 .../org/apache/iceberg/puffin/PuffinWriter.java    |  7 ++--
 .../apache/iceberg/puffin/TestPuffinWriter.java    | 43 ++++++++++++++++++++++
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java 
b/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java
index cd44dab03b..eb76ec8548 100644
--- a/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java
+++ b/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java
@@ -124,8 +124,6 @@ public class PuffinWriter implements FileAppender<Blob> {
     if (!finished) {
       finish();
     }
-
-    outputStream.close();
   }
 
   private void writeHeaderIfNeeded() throws IOException {
@@ -144,7 +142,10 @@ public class PuffinWriter implements FileAppender<Blob> {
     long footerOffset = outputStream.getPos();
     writeFooter();
     this.footerSize = Optional.of(Math.toIntExact(outputStream.getPos() - 
footerOffset));
-    this.fileSize = Optional.of(outputStream.getPos());
+    outputStream.close();
+    // some streams (e.g. AesGcmOutputStream) may only write the last bytes 
upon
+    // having close() invoked
+    this.fileSize = Optional.of(outputStream.storedLength());
     this.finished = true;
   }
 
diff --git a/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java 
b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java
index 2a11849871..337fff817a 100644
--- a/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java
+++ b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java
@@ -26,13 +26,25 @@ import static 
org.apache.iceberg.puffin.PuffinFormatTestUtil.readTestResource;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
+import java.io.File;
 import java.nio.ByteBuffer;
+import java.nio.file.Path;
+import java.util.Random;
+import org.apache.iceberg.Files;
+import org.apache.iceberg.encryption.AesGcmOutputFile;
 import org.apache.iceberg.inmemory.InMemoryOutputFile;
+import org.apache.iceberg.io.OutputFile;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
 
 public class TestPuffinWriter {
+
+  @TempDir private Path temp;
+
   @Test
   public void testEmptyFooterCompressed() {
     InMemoryOutputFile outputFile = new InMemoryOutputFile();
@@ -86,6 +98,37 @@ public class TestPuffinWriter {
     testWriteMetric(ZSTD, "v1/sample-metric-data-compressed-zstd.bin");
   }
 
+  @ParameterizedTest
+  @CsvSource({"true, 158", "false, 122"})
+  public void testFileSizeCalculation(boolean isEncrypted, long expectedSize) 
throws Exception {
+    final OutputFile outputFile;
+
+    if (isEncrypted) {
+      File testFile = temp.resolve("test" + System.nanoTime()).toFile();
+      Random random = new Random();
+      byte[] key = new byte[16];
+      random.nextBytes(key);
+      byte[] aadPrefix = new byte[16];
+      random.nextBytes(aadPrefix);
+      outputFile = new AesGcmOutputFile(Files.localOutput(testFile), key, 
aadPrefix);
+    } else {
+      outputFile = new InMemoryOutputFile();
+    }
+
+    PuffinWriter writer = Puffin.write(outputFile).build();
+    writer.write(
+        new Blob(
+            "blob",
+            ImmutableList.of(1),
+            2,
+            1,
+            ByteBuffer.wrap("blob".getBytes()),
+            null,
+            ImmutableMap.of()));
+    writer.close();
+    assertThat(writer.length()).isEqualTo(expectedSize);
+  }
+
   private void testWriteMetric(PuffinCompressionCodec compression, String 
expectedResource)
       throws Exception {
     InMemoryOutputFile outputFile = new InMemoryOutputFile();

Reply via email to