(doris) branch master updated: [bug](iceberg) fix can't get migrated Iceberg tables format type (#64134)

zhangstar333 Mon, 08 Jun 2026 23:13:45 -0700

This is an automated email from the ASF dual-hosted git repository.

zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 2366edffcc6 [bug](iceberg) fix can't get migrated Iceberg tables 
format type (#64134)
2366edffcc6 is described below

commit 2366edffcc638ef8ca2a49ca8694763bc6453b54
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Jun 9 14:13:27 2026 +0800

    [bug](iceberg) fix can't get migrated Iceberg tables format type (#64134)
    
    ### What problem does this PR solve?
    Problem Summary:
    
    ```
    [CORRUPTION]Invalid magic number in parquet file, bytes read: 253, file 
size: 253,
    path: /user/hive/warehouse/test_migrate_managed_...,
    read magic: ORC .
    ```
    
    The migrated Iceberg table properties don't have "write-format" or
    "write.format.default".
    so doris use the default type of parquet as the table format. but it's
    actual a ORC type.
    now add more check to infer the table format type.
---
 .../create_preinstalled_scripts/iceberg/run10.sql  |  4 +--
 .../doris/datasource/iceberg/IcebergUtils.java     | 40 ++++++++++++++++++----
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run10.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run10.sql
index 0d2b2240de4..650ffdec575 100644
--- 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run10.sql
+++ 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run10.sql
@@ -8,7 +8,7 @@ CREATE TABLE sc_drop_add_orc (
 )
 USING iceberg
 PARTITIONED BY (id)
-TBLPROPERTIES ('format'='orc');
+TBLPROPERTIES ('write.format.default' = 'orc');
 
 INSERT INTO sc_drop_add_orc VALUES (1, 'Alice', 25);
 INSERT INTO sc_drop_add_orc VALUES (2, 'Bob', 30);
@@ -32,7 +32,7 @@ CREATE TABLE sc_drop_add_parquet (
 )
 USING iceberg
 PARTITIONED BY (id)
-TBLPROPERTIES ('format'='parquet');
+TBLPROPERTIES ('write.format.default' = 'parquet');
 
 INSERT INTO sc_drop_add_parquet VALUES (1, 'Alice', 25);
 INSERT INTO sc_drop_add_parquet VALUES (2, 'Bob', 30);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 7a3be773cf0..932bb8b86da 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -1141,12 +1141,7 @@ public class IcebergUtils {
 
     public static FileFormat getFileFormat(Table icebergTable) {
         Map<String, String> properties = icebergTable.properties();
-        String fileFormatName;
-        if (properties.containsKey(WRITE_FORMAT)) {
-            fileFormatName = properties.get(WRITE_FORMAT);
-        } else {
-            fileFormatName = 
properties.getOrDefault(TableProperties.DEFAULT_FILE_FORMAT, PARQUET_NAME);
-        }
+        String fileFormatName = resolveFileFormatName(icebergTable, 
properties);
         FileFormat fileFormat;
         if (fileFormatName.toLowerCase().contains(ORC_NAME)) {
             fileFormat = FileFormat.ORC;
@@ -1158,6 +1153,39 @@ public class IcebergUtils {
         return fileFormat;
     }
 
+    private static String resolveFileFormatName(Table icebergTable, 
Map<String, String> properties) {
+        // 1. Check "write-format" (nickname in Flink and Spark)
+        if (properties.containsKey(WRITE_FORMAT)) {
+            return properties.get(WRITE_FORMAT);
+        }
+        // 2. Check "write.format.default" (standard Iceberg property)
+        if (properties.containsKey(TableProperties.DEFAULT_FILE_FORMAT)) {
+            return properties.get(TableProperties.DEFAULT_FILE_FORMAT);
+        }
+        // 3. Last resort: infer from the actual data files in the current 
snapshot.
+        //    This handles migrated tables where none of the above properties 
are set.
+        return inferFileFormatFromDataFiles(icebergTable);
+    }
+
+    private static String inferFileFormatFromDataFiles(Table icebergTable) {
+        if (icebergTable.currentSnapshot() == null) {
+            LOG.info("Iceberg table {} has no snapshot, defaulting to {}", 
icebergTable.name(), PARQUET_NAME);
+            return PARQUET_NAME;
+        }
+        try (CloseableIterable<FileScanTask> files = 
icebergTable.newScan().planFiles()) {
+            java.util.Iterator<FileScanTask> it = files.iterator();
+            if (it.hasNext()) {
+                String format = it.next().file().format().name().toLowerCase();
+                LOG.info("Iceberg table {} inferred file format {} from data 
files", icebergTable.name(), format);
+                return format;
+            }
+        } catch (Exception e) {
+            LOG.warn("Failed to infer file format from data files for table 
{}, defaulting to {}",
+                    icebergTable.name(), PARQUET_NAME, e);
+        }
+        return PARQUET_NAME;
+    }
+
 
     public static String getFileCompress(Table table) {
         Map<String, String> properties = table.properties();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [bug](iceberg) fix can't get migrated Iceberg tables format type (#64134)

Reply via email to