(impala) 03/05: IMPALA-12861: Fix mixed file format listing for Iceberg tables

michaelsmith Fri, 18 Oct 2024 10:16:11 -0700

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit df3a38096e6f230718792b384a241b0e747108fe
Author: Peter Rozsa <[email protected]>
AuthorDate: Thu Jun 27 15:59:08 2024 +0200

    IMPALA-12861: Fix mixed file format listing for Iceberg tables
    
    This change fixes file format information collection for Iceberg
    tables. Previously, all file descriptor's  file formats were collected
    from getSampledOrRawPartitions() in HdfsScanNode for Iceberg tables, now
    the collection part is extracted as a method and it's overridden in
    IcebergScanNode. Now, only the to-be-scanned file descriptor's file
    format is recorded, showing the correct file formats for each SCAN nodes
    in the plans.
    
    Tests:
     - Planner tests added for mixed file format table with partitioning.
    
    Change-Id: Ifae900914a0d255f5a4d9b8539361247dfeaad7b
    Reviewed-on: http://gerrit.cloudera.org:8080/21871
    Reviewed-by: Daniel Becker <[email protected]>
    Reviewed-by: Zoltan Borok-Nagy <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 common/fbs/IcebergObjects.fbs                      |   3 -
 .../org/apache/impala/planner/HdfsScanNode.java    |  18 +-
 .../org/apache/impala/planner/IcebergScanNode.java |  50 +++--
 .../org/apache/impala/planner/PlannerTest.java     |  11 +
 .../functional/functional_schema_template.sql      |  23 ++
 .../datasets/functional/schema_constraints.csv     |   1 +
 .../PlannerTest/iceberg-mixed-file-format.test     | 245 +++++++++++++++++++++
 7 files changed, 317 insertions(+), 34 deletions(-)

diff --git a/common/fbs/IcebergObjects.fbs b/common/fbs/IcebergObjects.fbs
index 20729f313..9de67707f 100644
--- a/common/fbs/IcebergObjects.fbs
+++ b/common/fbs/IcebergObjects.fbs
@@ -20,9 +20,6 @@ namespace org.apache.impala.fb;
 enum FbIcebergDataFileFormat: byte {
   PARQUET,
   ORC,
-  // We add AVRO here as a future possibility.
-  // The Iceberg spec allows AVRO data files, but currently Impala
-  // cannot read such Iceberg tables. See IMPALA-11158.
   AVRO
 }
 
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index f5a52f3ac..a72eb3df8 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -495,7 +495,8 @@ public class HdfsScanNode extends ScanNode {
    * Initialize sampledFiles_, sampledPartitions_, fileFormats_, and 
countStarSlot_.
    * @param analyzer Analyzer object used to init this class.
    */
-  private void checkSamplingAndCountStar(Analyzer analyzer) {
+  private void checkSamplingAndCountStar(Analyzer analyzer)
+      throws ImpalaRuntimeException {
     if (sampleParams_ != null) {
       long percentBytes = sampleParams_.getPercentBytes();
       long randomSeed;
@@ -521,12 +522,7 @@ public class HdfsScanNode extends ScanNode {
       }
     }
 
-    // Populate fileFormats_.
-    for (FeFsPartition partition : getSampledOrRawPartitions()) {
-      if (partition.getFileFormat() != HdfsFileFormat.ICEBERG) {
-        fileFormats_.add(partition.getFileFormat());
-      }
-    }
+    populateFileFormats();
 
     // Initialize countStarSlot_.
     if (canApplyCountStarOptimization(analyzer, fileFormats_)) {
@@ -536,6 +532,14 @@ public class HdfsScanNode extends ScanNode {
     }
   }
 
+  protected void populateFileFormats() throws ImpalaRuntimeException {
+    for (FeFsPartition partition : getSampledOrRawPartitions()) {
+      if (partition.getFileFormat() != HdfsFileFormat.ICEBERG) {
+        fileFormats_.add(partition.getFileFormat());
+      }
+    }
+  }
+
   /**
    * Throws NotImplementedException if we do not support scanning the 
partition.
    * Specifically:
diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
index 62c65070c..d859835b3 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
@@ -82,8 +82,7 @@ public class IcebergScanNode extends HdfsScanNode {
 
   public IcebergScanNode(PlanNodeId id, TableRef tblRef, List<Expr> conjuncts,
       MultiAggregateInfo aggInfo, List<FileDescriptor> fileDescs,
-      List<Expr> nonIdentityConjuncts, List<Expr> skippedConjuncts, long 
snapshotId)
-      throws ImpalaRuntimeException {
+      List<Expr> nonIdentityConjuncts, List<Expr> skippedConjuncts, long 
snapshotId) {
     this(id, tblRef, conjuncts, aggInfo, fileDescs, nonIdentityConjuncts,
         skippedConjuncts, null, snapshotId);
   }
@@ -91,8 +90,7 @@ public class IcebergScanNode extends HdfsScanNode {
   public IcebergScanNode(PlanNodeId id, TableRef tblRef, List<Expr> conjuncts,
       MultiAggregateInfo aggInfo, List<FileDescriptor> fileDescs,
       List<Expr> nonIdentityConjuncts, List<Expr> skippedConjuncts, PlanNodeId 
deleteId,
-      long snapshotId)
-      throws ImpalaRuntimeException {
+      long snapshotId) {
     super(id, tblRef.getDesc(), conjuncts,
         
getIcebergPartition(((FeIcebergTable)tblRef.getTable()).getFeFsTable()), tblRef,
         aggInfo, null, false);
@@ -111,26 +109,6 @@ public class IcebergScanNode extends HdfsScanNode {
     }
     nonIdentityConjuncts_ = nonIdentityConjuncts;
     snapshotId_ = snapshotId;
-    //TODO IMPALA-11577: optimize file format counting
-    boolean hasParquet = false;
-    boolean hasOrc = false;
-    boolean hasAvro = false;
-    for (FileDescriptor fileDesc : fileDescs_) {
-      byte fileFormat = 
fileDesc.getFbFileMetadata().icebergMetadata().fileFormat();
-      if (fileFormat == FbIcebergDataFileFormat.PARQUET) {
-        hasParquet = true;
-      } else if (fileFormat == FbIcebergDataFileFormat.ORC) {
-        hasOrc = true;
-      } else if (fileFormat == FbIcebergDataFileFormat.AVRO) {
-        hasAvro = true;
-      } else {
-        throw new ImpalaRuntimeException(String.format(
-            "Invalid Iceberg file format of file: %s", 
fileDesc.getAbsolutePath()));
-      }
-    }
-    if (hasParquet) fileFormats_.add(HdfsFileFormat.PARQUET);
-    if (hasOrc) fileFormats_.add(HdfsFileFormat.ORC);
-    if (hasAvro) fileFormats_.add(HdfsFileFormat.AVRO);
     this.skippedConjuncts_ = skippedConjuncts;
     this.deleteFileScanNodeId = deleteId;
   }
@@ -282,4 +260,28 @@ public class IcebergScanNode extends HdfsScanNode {
     }
     return output.toString();
   }
+
+  @Override
+  protected void populateFileFormats() throws ImpalaRuntimeException {
+    //TODO IMPALA-11577: optimize file format counting
+    boolean hasParquet = false;
+    boolean hasOrc = false;
+    boolean hasAvro = false;
+    for (FileDescriptor fileDesc : fileDescs_) {
+      byte fileFormat = 
fileDesc.getFbFileMetadata().icebergMetadata().fileFormat();
+      if (fileFormat == FbIcebergDataFileFormat.PARQUET) {
+        hasParquet = true;
+      } else if (fileFormat == FbIcebergDataFileFormat.ORC) {
+        hasOrc = true;
+      } else if (fileFormat == FbIcebergDataFileFormat.AVRO) {
+        hasAvro = true;
+      } else {
+        throw new ImpalaRuntimeException(String.format(
+            "Invalid Iceberg file format of file: %s", 
fileDesc.getAbsolutePath()));
+      }
+    }
+    if (hasParquet) fileFormats_.add(HdfsFileFormat.PARQUET);
+    if (hasOrc) fileFormats_.add(HdfsFileFormat.ORC);
+    if (hasAvro) fileFormats_.add(HdfsFileFormat.AVRO);
+  }
 }
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 18dc11639..756025de4 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -1296,6 +1296,17 @@ public class PlannerTest extends PlannerTestBase {
         ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY));
   }
 
+  /**
+   * Checks file format listing for Iceberg tables.
+   */
+  @Test
+  public void testIcebergMixedFileFormat() {
+    TQueryOptions options = defaultQueryOptions();
+    options.setExplain_level(TExplainLevel.VERBOSE);
+    runPlannerTestFile("iceberg-mixed-file-format", "functional_parquet", 
options,
+        ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY));
+  }
+
   /**
    * Checks exercising predicate pushdown with Iceberg tables, without 
predicate
    * subsetting.
diff --git a/testdata/datasets/functional/functional_schema_template.sql 
b/testdata/datasets/functional/functional_schema_template.sql
index a8541728c..641045ea7 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3872,6 +3872,29 @@ INSERT INTO TABLE {db_name}{db_suffix}.{table_name} 
values(3, 'parquet', 2.5, fa
 ---- DATASET
 functional
 ---- BASE_TABLE_NAME
+iceberg_mixed_file_format_part
+---- CREATE_HIVE
+CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
+  string_col string,
+  double_col double,
+  bool_col boolean
+)
+PARTITIONED BY (int_col int)
+STORED BY ICEBERG
+LOCATION 
'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_mixed_file_format_part';
+---- DEPENDENT_LOAD_HIVE
+-- This INSERT must run in Hive, because Impala doesn't support inserting into 
tables
+-- with avro and orc file formats.
+ALTER TABLE {db_name}{db_suffix}.{table_name} SET 
TBLPROPERTIES('write.format.default'='avro');
+INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values('avro', 0.5, true, 
1);
+ALTER TABLE {db_name}{db_suffix}.{table_name} SET 
TBLPROPERTIES('write.format.default'='orc');
+INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values('orc', 1.5, false, 
2);
+ALTER TABLE {db_name}{db_suffix}.{table_name} SET 
TBLPROPERTIES('write.format.default'='parquet');
+INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values('parquet', 2.5, 
false, 3);
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
 iceberg_query_metadata
 ---- CREATE
 CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
diff --git a/testdata/datasets/functional/schema_constraints.csv 
b/testdata/datasets/functional/schema_constraints.csv
index 4b2668ec9..a3d279df3 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -103,6 +103,7 @@ table_name:iceberg_v2_partitioned_position_deletes_orc, 
constraint:restrict_to,
 table_name:iceberg_multiple_storage_locations, constraint:restrict_to, 
table_format:parquet/none/none
 table_name:iceberg_avro_format, constraint:restrict_to, 
table_format:parquet/none/none
 table_name:iceberg_mixed_file_format, constraint:restrict_to, 
table_format:parquet/none/none
+table_name:iceberg_mixed_file_format_part, constraint:restrict_to, 
table_format:parquet/none/none
 table_name:iceberg_test_metadata, constraint:restrict_to, 
table_format:parquet/none/none
 table_name:iceberg_with_key_metadata, constraint:restrict_to, 
table_format:parquet/none/none
 table_name:iceberg_lineitem_multiblock, constraint:restrict_to, 
table_format:parquet/none/none
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-mixed-file-format.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-mixed-file-format.test
new file mode 100644
index 000000000..58d490248
--- /dev/null
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-mixed-file-format.test
@@ -0,0 +1,245 @@
+select * from iceberg_mixed_file_format_part where int_col = 1;
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=36.00MB mem-reservation=4.01MB 
thread-reservation=2
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part]
+     HDFS partitions=1/1 files=1 size=726B
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col = CAST(1 AS INT)
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=3
+     file formats: [AVRO]
+     mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+---- DISTRIBUTEDPLAN
+F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB 
thread-reservation=1
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  01:EXCHANGE [UNPARTITIONED]
+     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+
+F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
+Per-Host Resources: mem-estimate=32.11MB mem-reservation=8.00KB 
thread-reservation=2
+  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
+  |  mem-estimate=116.00KB mem-reservation=0B thread-reservation=0
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM]
+     HDFS partitions=1/1 files=1 size=726B
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col = CAST(1 AS INT)
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=3
+     file formats: [AVRO]
+     mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+====
+select * from iceberg_mixed_file_format_part where int_col = 2;
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=132.00MB mem-reservation=4.08MB 
thread-reservation=2
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part]
+     HDFS partitions=1/1 files=1 size=542B
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col = CAST(2 AS INT)
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=3
+     file formats: [ORC]
+     mem-estimate=128.00MB mem-reservation=80.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+---- DISTRIBUTEDPLAN
+F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB 
thread-reservation=1
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  01:EXCHANGE [UNPARTITIONED]
+     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+
+F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
+Per-Host Resources: mem-estimate=128.11MB mem-reservation=80.00KB 
thread-reservation=2
+  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
+  |  mem-estimate=116.00KB mem-reservation=0B thread-reservation=0
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM]
+     HDFS partitions=1/1 files=1 size=542B
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col = CAST(2 AS INT)
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=3
+     file formats: [ORC]
+     mem-estimate=128.00MB mem-reservation=80.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+====
+select * from iceberg_mixed_file_format_part where int_col = 3;
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB 
thread-reservation=2
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part]
+     HDFS partitions=1/1 files=1 size=1.14KB
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col = CAST(3 AS INT)
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=3
+     file formats: [PARQUET]
+     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+---- DISTRIBUTEDPLAN
+F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB 
thread-reservation=1
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  01:EXCHANGE [UNPARTITIONED]
+     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+
+F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
+Per-Host Resources: mem-estimate=64.11MB mem-reservation=32.00KB 
thread-reservation=2
+  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
+  |  mem-estimate=116.00KB mem-reservation=0B thread-reservation=0
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM]
+     HDFS partitions=1/1 files=1 size=1.14KB
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col = CAST(3 AS INT)
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=3
+     file formats: [PARQUET]
+     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=1
+     in pipelines: 00(GETNEXT)
+====
+select * from iceberg_mixed_file_format_part where int_col in (1, 3);
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB 
thread-reservation=2
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part]
+     HDFS partitions=1/1 files=2 size=1.84KB
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col IN (CAST(1 AS INT), CAST(3 AS INT))
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=1
+     file formats: [AVRO, PARQUET]
+     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=2
+     in pipelines: 00(GETNEXT)
+---- DISTRIBUTEDPLAN
+F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB 
thread-reservation=1
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  01:EXCHANGE [UNPARTITIONED]
+     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+     tuple-ids=0 row-size=25B cardinality=2
+     in pipelines: 00(GETNEXT)
+
+F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
+Per-Host Resources: mem-estimate=64.11MB mem-reservation=32.00KB 
thread-reservation=2
+  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
+  |  mem-estimate=116.00KB mem-reservation=0B thread-reservation=0
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM]
+     HDFS partitions=1/1 files=2 size=1.84KB
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col IN (CAST(1 AS INT), CAST(3 AS INT))
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=1
+     file formats: [AVRO, PARQUET]
+     mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=2
+     in pipelines: 00(GETNEXT)
+====
+select * from iceberg_mixed_file_format_part where int_col in (2, 3);
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=132.00MB mem-reservation=4.08MB 
thread-reservation=2
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part]
+     HDFS partitions=1/1 files=2 size=1.67KB
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col IN (CAST(2 AS INT), CAST(3 AS INT))
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=2
+     file formats: [ORC, PARQUET]
+     mem-estimate=128.00MB mem-reservation=80.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=2
+     in pipelines: 00(GETNEXT)
+---- DISTRIBUTEDPLAN
+F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB 
thread-reservation=1
+  PLAN-ROOT SINK
+  |  output exprs: 
functional_parquet.iceberg_mixed_file_format_part.string_col, 
functional_parquet.iceberg_mixed_file_format_part.double_col, 
functional_parquet.iceberg_mixed_file_format_part.bool_col, 
functional_parquet.iceberg_mixed_file_format_part.int_col
+  |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
+  |
+  01:EXCHANGE [UNPARTITIONED]
+     mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
+     tuple-ids=0 row-size=25B cardinality=2
+     in pipelines: 00(GETNEXT)
+
+F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
+Per-Host Resources: mem-estimate=128.11MB mem-reservation=80.00KB 
thread-reservation=2
+  DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED]
+  |  mem-estimate=116.00KB mem-reservation=0B thread-reservation=0
+  00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM]
+     HDFS partitions=1/1 files=2 size=1.67KB
+     Iceberg snapshot id: 1843610873488300188
+     skipped Iceberg predicates: int_col IN (CAST(2 AS INT), CAST(3 AS INT))
+     stored statistics:
+       table: rows=3 size=2.37KB
+       columns: unavailable
+     extrapolated-rows=disabled max-scan-range-rows=2
+     file formats: [ORC, PARQUET]
+     mem-estimate=128.00MB mem-reservation=80.00KB thread-reservation=1
+     tuple-ids=0 row-size=25B cardinality=2
+     in pipelines: 00(GETNEXT)
+====

(impala) 03/05: IMPALA-12861: Fix mixed file format listing for Iceberg tables

Reply via email to