This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 4e1b749765 [iceberg] Use gzip by default to iceberg avro writer (#4620)
4e1b749765 is described below

commit 4e1b74976590fbe5d885614b65e038217c9266df
Author: Jingsong Lee <[email protected]>
AuthorDate: Mon Dec 2 18:13:33 2024 +0800

    [iceberg] Use gzip by default to iceberg avro writer (#4620)
---
 docs/content/migration/iceberg-compatibility.md           |  6 ++++++
 .../java/org/apache/paimon/iceberg/IcebergOptions.java    |  7 +++++++
 .../paimon/iceberg/manifest/IcebergManifestFile.java      | 15 +++++++++++----
 .../paimon/iceberg/manifest/IcebergManifestList.java      | 15 +++++++++++----
 .../apache/paimon/iceberg/IcebergCompatibilityTest.java   |  6 ++++++
 5 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/docs/content/migration/iceberg-compatibility.md 
b/docs/content/migration/iceberg-compatibility.md
index f07f78cb20..7b83936b53 100644
--- a/docs/content/migration/iceberg-compatibility.md
+++ b/docs/content/migration/iceberg-compatibility.md
@@ -371,6 +371,12 @@ you also need to set some (or all) of the following table 
options when creating
       <td>String</td>
       <td>hadoop-conf-dir for Iceberg Hive catalog.</td>
     </tr>
+    <tr>
+      <td><h5>metadata.iceberg.manifest-compression</h5></td>
+      <td style="word-wrap: break-word;">gzip</td>
+      <td>String</td>
+      <td>Compression for Iceberg manifest files.</td>
+    </tr>
     </tbody>
 </table>
 
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
index 769ce6b161..3900233d21 100644
--- a/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
+++ b/paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergOptions.java
@@ -70,6 +70,13 @@ public class IcebergOptions {
                     .noDefaultValue()
                     .withDescription("hadoop-conf-dir for Iceberg Hive 
catalog.");
 
+    public static final ConfigOption<String> MANIFEST_COMPRESSION =
+            key("metadata.iceberg.manifest-compression")
+                    .stringType()
+                    .defaultValue(
+                            "gzip") // some Iceberg reader cannot support 
zstd, for example DuckDB
+                    .withDescription("Compression for Iceberg manifest 
files.");
+
     /** Where to store Iceberg metadata. */
     public enum StorageType implements DescribedEnum {
         DISABLED("disabled", "Disable Iceberg compatibility support."),
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
index d04cf3576a..57484a1f3f 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
@@ -19,6 +19,7 @@
 package org.apache.paimon.iceberg.manifest;
 
 import org.apache.paimon.CoreOptions;
+import org.apache.paimon.annotation.VisibleForTesting;
 import org.apache.paimon.format.FileFormat;
 import org.apache.paimon.format.FormatReaderFactory;
 import org.apache.paimon.format.FormatWriterFactory;
@@ -26,6 +27,7 @@ import org.apache.paimon.format.SimpleColStats;
 import org.apache.paimon.format.SimpleStatsCollector;
 import org.apache.paimon.fs.FileIO;
 import org.apache.paimon.fs.Path;
+import org.apache.paimon.iceberg.IcebergOptions;
 import org.apache.paimon.iceberg.IcebergPathFactory;
 import org.apache.paimon.iceberg.manifest.IcebergManifestFileMeta.Content;
 import org.apache.paimon.iceberg.metadata.IcebergPartitionSpec;
@@ -82,23 +84,28 @@ public class IcebergManifestFile extends 
ObjectsFile<IcebergManifestEntry> {
         this.targetFileSize = targetFileSize;
     }
 
+    @VisibleForTesting
+    public String compression() {
+        return compression;
+    }
+
     public static IcebergManifestFile create(FileStoreTable table, 
IcebergPathFactory pathFactory) {
         RowType partitionType = table.schema().logicalPartitionType();
         RowType entryType = IcebergManifestEntry.schema(partitionType);
-        Options manifestFileAvroOptions = Options.fromMap(table.options());
+        Options avroOptions = Options.fromMap(table.options());
         // 
https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/ManifestReader.java
-        manifestFileAvroOptions.set(
+        avroOptions.set(
                 "avro.row-name-mapping",
                 "org.apache.paimon.avro.generated.record:manifest_entry,"
                         + "manifest_entry_data_file:r2,"
                         + "r2_partition:r102");
-        FileFormat manifestFileAvro = FileFormat.fromIdentifier("avro", 
manifestFileAvroOptions);
+        FileFormat manifestFileAvro = FileFormat.fromIdentifier("avro", 
avroOptions);
         return new IcebergManifestFile(
                 table.fileIO(),
                 partitionType,
                 manifestFileAvro.createReaderFactory(entryType),
                 manifestFileAvro.createWriterFactory(entryType),
-                table.coreOptions().manifestCompression(),
+                avroOptions.get(IcebergOptions.MANIFEST_COMPRESSION),
                 pathFactory.manifestFileFactory(),
                 table.coreOptions().manifestTargetSize());
     }
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestList.java
 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestList.java
index 911ebf954c..ab5cc926cd 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestList.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestList.java
@@ -18,10 +18,12 @@
 
 package org.apache.paimon.iceberg.manifest;
 
+import org.apache.paimon.annotation.VisibleForTesting;
 import org.apache.paimon.format.FileFormat;
 import org.apache.paimon.format.FormatReaderFactory;
 import org.apache.paimon.format.FormatWriterFactory;
 import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.iceberg.IcebergOptions;
 import org.apache.paimon.iceberg.IcebergPathFactory;
 import org.apache.paimon.options.Options;
 import org.apache.paimon.table.FileStoreTable;
@@ -51,19 +53,24 @@ public class IcebergManifestList extends 
ObjectsFile<IcebergManifestFileMeta> {
                 null);
     }
 
+    @VisibleForTesting
+    public String compression() {
+        return compression;
+    }
+
     public static IcebergManifestList create(FileStoreTable table, 
IcebergPathFactory pathFactory) {
-        Options manifestListAvroOptions = Options.fromMap(table.options());
+        Options avroOptions = Options.fromMap(table.options());
         // 
https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/ManifestLists.java
-        manifestListAvroOptions.set(
+        avroOptions.set(
                 "avro.row-name-mapping",
                 "org.apache.paimon.avro.generated.record:manifest_file,"
                         + "manifest_file_partitions:r508");
-        FileFormat manifestListAvro = FileFormat.fromIdentifier("avro", 
manifestListAvroOptions);
+        FileFormat manifestListAvro = FileFormat.fromIdentifier("avro", 
avroOptions);
         return new IcebergManifestList(
                 table.fileIO(),
                 
manifestListAvro.createReaderFactory(IcebergManifestFileMeta.schema()),
                 
manifestListAvro.createWriterFactory(IcebergManifestFileMeta.schema()),
-                table.coreOptions().manifestCompression(),
+                avroOptions.get(IcebergOptions.MANIFEST_COMPRESSION),
                 pathFactory.manifestListFactory());
     }
 }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
index 9a27d56184..45cfe109b9 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
@@ -30,6 +30,7 @@ import org.apache.paimon.data.Timestamp;
 import org.apache.paimon.disk.IOManagerImpl;
 import org.apache.paimon.fs.Path;
 import org.apache.paimon.fs.local.LocalFileIO;
+import org.apache.paimon.iceberg.manifest.IcebergManifestFile;
 import org.apache.paimon.iceberg.manifest.IcebergManifestFileMeta;
 import org.apache.paimon.iceberg.manifest.IcebergManifestList;
 import org.apache.paimon.iceberg.metadata.IcebergMetadata;
@@ -302,6 +303,11 @@ public class IcebergCompatibilityTest {
         IcebergPathFactory pathFactory =
                 new IcebergPathFactory(new Path(table.location(), "metadata"));
         IcebergManifestList manifestList = IcebergManifestList.create(table, 
pathFactory);
+        assertThat(manifestList.compression()).isEqualTo("gzip");
+
+        IcebergManifestFile manifestFile = IcebergManifestFile.create(table, 
pathFactory);
+        assertThat(manifestFile.compression()).isEqualTo("gzip");
+
         Set<String> usingManifests = new HashSet<>();
         for (IcebergManifestFileMeta fileMeta :
                 manifestList.read(new 
Path(metadata.currentSnapshot().manifestList()).getName())) {

Reply via email to