This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit df3a38096e6f230718792b384a241b0e747108fe Author: Peter Rozsa <[email protected]> AuthorDate: Thu Jun 27 15:59:08 2024 +0200 IMPALA-12861: Fix mixed file format listing for Iceberg tables This change fixes file format information collection for Iceberg tables. Previously, all file descriptor's file formats were collected from getSampledOrRawPartitions() in HdfsScanNode for Iceberg tables, now the collection part is extracted as a method and it's overridden in IcebergScanNode. Now, only the to-be-scanned file descriptor's file format is recorded, showing the correct file formats for each SCAN nodes in the plans. Tests: - Planner tests added for mixed file format table with partitioning. Change-Id: Ifae900914a0d255f5a4d9b8539361247dfeaad7b Reviewed-on: http://gerrit.cloudera.org:8080/21871 Reviewed-by: Daniel Becker <[email protected]> Reviewed-by: Zoltan Borok-Nagy <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- common/fbs/IcebergObjects.fbs | 3 - .../org/apache/impala/planner/HdfsScanNode.java | 18 +- .../org/apache/impala/planner/IcebergScanNode.java | 50 +++-- .../org/apache/impala/planner/PlannerTest.java | 11 + .../functional/functional_schema_template.sql | 23 ++ .../datasets/functional/schema_constraints.csv | 1 + .../PlannerTest/iceberg-mixed-file-format.test | 245 +++++++++++++++++++++ 7 files changed, 317 insertions(+), 34 deletions(-) diff --git a/common/fbs/IcebergObjects.fbs b/common/fbs/IcebergObjects.fbs index 20729f313..9de67707f 100644 --- a/common/fbs/IcebergObjects.fbs +++ b/common/fbs/IcebergObjects.fbs @@ -20,9 +20,6 @@ namespace org.apache.impala.fb; enum FbIcebergDataFileFormat: byte { PARQUET, ORC, - // We add AVRO here as a future possibility. - // The Iceberg spec allows AVRO data files, but currently Impala - // cannot read such Iceberg tables. See IMPALA-11158. AVRO } diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java index f5a52f3ac..a72eb3df8 100644 --- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java @@ -495,7 +495,8 @@ public class HdfsScanNode extends ScanNode { * Initialize sampledFiles_, sampledPartitions_, fileFormats_, and countStarSlot_. * @param analyzer Analyzer object used to init this class. */ - private void checkSamplingAndCountStar(Analyzer analyzer) { + private void checkSamplingAndCountStar(Analyzer analyzer) + throws ImpalaRuntimeException { if (sampleParams_ != null) { long percentBytes = sampleParams_.getPercentBytes(); long randomSeed; @@ -521,12 +522,7 @@ public class HdfsScanNode extends ScanNode { } } - // Populate fileFormats_. - for (FeFsPartition partition : getSampledOrRawPartitions()) { - if (partition.getFileFormat() != HdfsFileFormat.ICEBERG) { - fileFormats_.add(partition.getFileFormat()); - } - } + populateFileFormats(); // Initialize countStarSlot_. if (canApplyCountStarOptimization(analyzer, fileFormats_)) { @@ -536,6 +532,14 @@ public class HdfsScanNode extends ScanNode { } } + protected void populateFileFormats() throws ImpalaRuntimeException { + for (FeFsPartition partition : getSampledOrRawPartitions()) { + if (partition.getFileFormat() != HdfsFileFormat.ICEBERG) { + fileFormats_.add(partition.getFileFormat()); + } + } + } + /** * Throws NotImplementedException if we do not support scanning the partition. * Specifically: diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java index 62c65070c..d859835b3 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java @@ -82,8 +82,7 @@ public class IcebergScanNode extends HdfsScanNode { public IcebergScanNode(PlanNodeId id, TableRef tblRef, List<Expr> conjuncts, MultiAggregateInfo aggInfo, List<FileDescriptor> fileDescs, - List<Expr> nonIdentityConjuncts, List<Expr> skippedConjuncts, long snapshotId) - throws ImpalaRuntimeException { + List<Expr> nonIdentityConjuncts, List<Expr> skippedConjuncts, long snapshotId) { this(id, tblRef, conjuncts, aggInfo, fileDescs, nonIdentityConjuncts, skippedConjuncts, null, snapshotId); } @@ -91,8 +90,7 @@ public class IcebergScanNode extends HdfsScanNode { public IcebergScanNode(PlanNodeId id, TableRef tblRef, List<Expr> conjuncts, MultiAggregateInfo aggInfo, List<FileDescriptor> fileDescs, List<Expr> nonIdentityConjuncts, List<Expr> skippedConjuncts, PlanNodeId deleteId, - long snapshotId) - throws ImpalaRuntimeException { + long snapshotId) { super(id, tblRef.getDesc(), conjuncts, getIcebergPartition(((FeIcebergTable)tblRef.getTable()).getFeFsTable()), tblRef, aggInfo, null, false); @@ -111,26 +109,6 @@ public class IcebergScanNode extends HdfsScanNode { } nonIdentityConjuncts_ = nonIdentityConjuncts; snapshotId_ = snapshotId; - //TODO IMPALA-11577: optimize file format counting - boolean hasParquet = false; - boolean hasOrc = false; - boolean hasAvro = false; - for (FileDescriptor fileDesc : fileDescs_) { - byte fileFormat = fileDesc.getFbFileMetadata().icebergMetadata().fileFormat(); - if (fileFormat == FbIcebergDataFileFormat.PARQUET) { - hasParquet = true; - } else if (fileFormat == FbIcebergDataFileFormat.ORC) { - hasOrc = true; - } else if (fileFormat == FbIcebergDataFileFormat.AVRO) { - hasAvro = true; - } else { - throw new ImpalaRuntimeException(String.format( - "Invalid Iceberg file format of file: %s", fileDesc.getAbsolutePath())); - } - } - if (hasParquet) fileFormats_.add(HdfsFileFormat.PARQUET); - if (hasOrc) fileFormats_.add(HdfsFileFormat.ORC); - if (hasAvro) fileFormats_.add(HdfsFileFormat.AVRO); this.skippedConjuncts_ = skippedConjuncts; this.deleteFileScanNodeId = deleteId; } @@ -282,4 +260,28 @@ public class IcebergScanNode extends HdfsScanNode { } return output.toString(); } + + @Override + protected void populateFileFormats() throws ImpalaRuntimeException { + //TODO IMPALA-11577: optimize file format counting + boolean hasParquet = false; + boolean hasOrc = false; + boolean hasAvro = false; + for (FileDescriptor fileDesc : fileDescs_) { + byte fileFormat = fileDesc.getFbFileMetadata().icebergMetadata().fileFormat(); + if (fileFormat == FbIcebergDataFileFormat.PARQUET) { + hasParquet = true; + } else if (fileFormat == FbIcebergDataFileFormat.ORC) { + hasOrc = true; + } else if (fileFormat == FbIcebergDataFileFormat.AVRO) { + hasAvro = true; + } else { + throw new ImpalaRuntimeException(String.format( + "Invalid Iceberg file format of file: %s", fileDesc.getAbsolutePath())); + } + } + if (hasParquet) fileFormats_.add(HdfsFileFormat.PARQUET); + if (hasOrc) fileFormats_.add(HdfsFileFormat.ORC); + if (hasAvro) fileFormats_.add(HdfsFileFormat.AVRO); + } } diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index 18dc11639..756025de4 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -1296,6 +1296,17 @@ public class PlannerTest extends PlannerTestBase { ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY)); } + /** + * Checks file format listing for Iceberg tables. + */ + @Test + public void testIcebergMixedFileFormat() { + TQueryOptions options = defaultQueryOptions(); + options.setExplain_level(TExplainLevel.VERBOSE); + runPlannerTestFile("iceberg-mixed-file-format", "functional_parquet", options, + ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY)); + } + /** * Checks exercising predicate pushdown with Iceberg tables, without predicate * subsetting. diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index a8541728c..641045ea7 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -3872,6 +3872,29 @@ INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(3, 'parquet', 2.5, fa ---- DATASET functional ---- BASE_TABLE_NAME +iceberg_mixed_file_format_part +---- CREATE_HIVE +CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} ( + string_col string, + double_col double, + bool_col boolean +) +PARTITIONED BY (int_col int) +STORED BY ICEBERG +LOCATION '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_mixed_file_format_part'; +---- DEPENDENT_LOAD_HIVE +-- This INSERT must run in Hive, because Impala doesn't support inserting into tables +-- with avro and orc file formats. +ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='avro'); +INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values('avro', 0.5, true, 1); +ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='orc'); +INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values('orc', 1.5, false, 2); +ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='parquet'); +INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values('parquet', 2.5, false, 3); +==== +---- DATASET +functional +---- BASE_TABLE_NAME iceberg_query_metadata ---- CREATE CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} ( diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index 4b2668ec9..a3d279df3 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -103,6 +103,7 @@ table_name:iceberg_v2_partitioned_position_deletes_orc, constraint:restrict_to, table_name:iceberg_multiple_storage_locations, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_avro_format, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_mixed_file_format, constraint:restrict_to, table_format:parquet/none/none +table_name:iceberg_mixed_file_format_part, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_test_metadata, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_with_key_metadata, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_lineitem_multiblock, constraint:restrict_to, table_format:parquet/none/none diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-mixed-file-format.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-mixed-file-format.test new file mode 100644 index 000000000..58d490248 --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-mixed-file-format.test @@ -0,0 +1,245 @@ +select * from iceberg_mixed_file_format_part where int_col = 1; +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=36.00MB mem-reservation=4.01MB thread-reservation=2 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part] + HDFS partitions=1/1 files=1 size=726B + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col = CAST(1 AS INT) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=3 + file formats: [AVRO] + mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) +---- DISTRIBUTEDPLAN +F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB thread-reservation=1 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 01:EXCHANGE [UNPARTITIONED] + mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) + +F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +Per-Host Resources: mem-estimate=32.11MB mem-reservation=8.00KB thread-reservation=2 + DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED] + | mem-estimate=116.00KB mem-reservation=0B thread-reservation=0 + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM] + HDFS partitions=1/1 files=1 size=726B + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col = CAST(1 AS INT) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=3 + file formats: [AVRO] + mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) +==== +select * from iceberg_mixed_file_format_part where int_col = 2; +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=132.00MB mem-reservation=4.08MB thread-reservation=2 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part] + HDFS partitions=1/1 files=1 size=542B + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col = CAST(2 AS INT) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=3 + file formats: [ORC] + mem-estimate=128.00MB mem-reservation=80.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) +---- DISTRIBUTEDPLAN +F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB thread-reservation=1 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 01:EXCHANGE [UNPARTITIONED] + mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) + +F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +Per-Host Resources: mem-estimate=128.11MB mem-reservation=80.00KB thread-reservation=2 + DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED] + | mem-estimate=116.00KB mem-reservation=0B thread-reservation=0 + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM] + HDFS partitions=1/1 files=1 size=542B + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col = CAST(2 AS INT) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=3 + file formats: [ORC] + mem-estimate=128.00MB mem-reservation=80.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) +==== +select * from iceberg_mixed_file_format_part where int_col = 3; +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB thread-reservation=2 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part] + HDFS partitions=1/1 files=1 size=1.14KB + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col = CAST(3 AS INT) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=3 + file formats: [PARQUET] + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) +---- DISTRIBUTEDPLAN +F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB thread-reservation=1 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 01:EXCHANGE [UNPARTITIONED] + mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) + +F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +Per-Host Resources: mem-estimate=64.11MB mem-reservation=32.00KB thread-reservation=2 + DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED] + | mem-estimate=116.00KB mem-reservation=0B thread-reservation=0 + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM] + HDFS partitions=1/1 files=1 size=1.14KB + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col = CAST(3 AS INT) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=3 + file formats: [PARQUET] + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=1 + in pipelines: 00(GETNEXT) +==== +select * from iceberg_mixed_file_format_part where int_col in (1, 3); +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=68.00MB mem-reservation=4.03MB thread-reservation=2 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part] + HDFS partitions=1/1 files=2 size=1.84KB + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col IN (CAST(1 AS INT), CAST(3 AS INT)) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=1 + file formats: [AVRO, PARQUET] + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=2 + in pipelines: 00(GETNEXT) +---- DISTRIBUTEDPLAN +F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB thread-reservation=1 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 01:EXCHANGE [UNPARTITIONED] + mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 + tuple-ids=0 row-size=25B cardinality=2 + in pipelines: 00(GETNEXT) + +F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2 +Per-Host Resources: mem-estimate=64.11MB mem-reservation=32.00KB thread-reservation=2 + DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED] + | mem-estimate=116.00KB mem-reservation=0B thread-reservation=0 + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM] + HDFS partitions=1/1 files=2 size=1.84KB + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col IN (CAST(1 AS INT), CAST(3 AS INT)) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=1 + file formats: [AVRO, PARQUET] + mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=2 + in pipelines: 00(GETNEXT) +==== +select * from iceberg_mixed_file_format_part where int_col in (2, 3); +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=132.00MB mem-reservation=4.08MB thread-reservation=2 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part] + HDFS partitions=1/1 files=2 size=1.67KB + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col IN (CAST(2 AS INT), CAST(3 AS INT)) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=2 + file formats: [ORC, PARQUET] + mem-estimate=128.00MB mem-reservation=80.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=2 + in pipelines: 00(GETNEXT) +---- DISTRIBUTEDPLAN +F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +Per-Host Resources: mem-estimate=4.02MB mem-reservation=4.00MB thread-reservation=1 + PLAN-ROOT SINK + | output exprs: functional_parquet.iceberg_mixed_file_format_part.string_col, functional_parquet.iceberg_mixed_file_format_part.double_col, functional_parquet.iceberg_mixed_file_format_part.bool_col, functional_parquet.iceberg_mixed_file_format_part.int_col + | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 + | + 01:EXCHANGE [UNPARTITIONED] + mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 + tuple-ids=0 row-size=25B cardinality=2 + in pipelines: 00(GETNEXT) + +F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2 +Per-Host Resources: mem-estimate=128.11MB mem-reservation=80.00KB thread-reservation=2 + DATASTREAM SINK [FRAGMENT=F01, EXCHANGE=01, UNPARTITIONED] + | mem-estimate=116.00KB mem-reservation=0B thread-reservation=0 + 00:SCAN HDFS [functional_parquet.iceberg_mixed_file_format_part, RANDOM] + HDFS partitions=1/1 files=2 size=1.67KB + Iceberg snapshot id: 1843610873488300188 + skipped Iceberg predicates: int_col IN (CAST(2 AS INT), CAST(3 AS INT)) + stored statistics: + table: rows=3 size=2.37KB + columns: unavailable + extrapolated-rows=disabled max-scan-range-rows=2 + file formats: [ORC, PARQUET] + mem-estimate=128.00MB mem-reservation=80.00KB thread-reservation=1 + tuple-ids=0 row-size=25B cardinality=2 + in pipelines: 00(GETNEXT) +====
