This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit f3504566fb97719eec81771a61785cedc85ba6fa Author: LPL <[email protected]> AuthorDate: Tue Oct 25 12:22:12 2022 +0800 IMPALA-11681: Set table stats for the Iceberg table by it's partition stats For the Iceberg tables, table-level statistics such as numRows can be computed according to iceberg parition stats, which is more accurate and real-time. Obtaining these statistics is independent of StatsSetupConst.ROW_COUNT and StatsSetupConst.TOTAL_SIZE in HMS. This is an improvement for estimating the cardinality of the Iceberg tables. But now the calculation of V2 Iceberg table is not accurate, maybe after IMPALA-11516(Return better partition stats for V2 tables) is ready, they can be considered to replace those MHS statistics. Testing: - Existing tests - Test on 'On-demand Metadata' mode - For 'select * from iceberg_v2_positional_not_all_data_files_have_delete_files where i = (select max(i) from iceberg_v2_positional_update_all_rows)', the 'Join Order' and 'Distribution Mode' are the same as when table stats are present Change-Id: I3e92d3f25e2a57a64556249410d0af3522598c00 Reviewed-on: http://gerrit.cloudera.org:8080/19168 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- common/fbs/CatalogObjects.fbs | 2 +- common/protobuf/planner.proto | 2 +- common/thrift/PlanNodes.thrift | 2 +- .../org/apache/impala/catalog/FeIcebergTable.java | 48 +++- .../impala/catalog/IcebergContentFileStore.java | 2 + .../org/apache/impala/catalog/IcebergTable.java | 11 +- .../impala/catalog/local/LocalIcebergTable.java | 1 + .../queries/PlannerTest/iceberg-v2-tables.test | 250 +++++++++++---------- .../queries/PlannerTest/tablesample.test | 36 +-- 9 files changed, 198 insertions(+), 156 deletions(-) diff --git a/common/fbs/CatalogObjects.fbs b/common/fbs/CatalogObjects.fbs index 973007d2c..8ecfb2f11 100644 --- a/common/fbs/CatalogObjects.fbs +++ b/common/fbs/CatalogObjects.fbs @@ -80,7 +80,7 @@ table FbFileDesc { // Whether this file is erasure-coded is_ec: bool = false (id: 5); - // The absolute path of the file, it`s used only when data files are outside of + // The absolute path of the file, it's used only when data files are outside of // the Iceberg table location (IMPALA-11507). absolute_path: string (id: 6); } diff --git a/common/protobuf/planner.proto b/common/protobuf/planner.proto index 208ebecd4..4e7c8ac63 100644 --- a/common/protobuf/planner.proto +++ b/common/protobuf/planner.proto @@ -55,7 +55,7 @@ message HdfsFileSplitPB { // any consistent hash. optional int32 partition_path_hash = 9; - // The absolute path of the file, it`s used only when data files are outside of + // The absolute path of the file, it's used only when data files are outside of // the Iceberg table location (IMPALA-11507). optional string absolute_path = 10; } diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift index 70e208637..46e8a5a30 100644 --- a/common/thrift/PlanNodes.thrift +++ b/common/thrift/PlanNodes.thrift @@ -224,7 +224,7 @@ struct THdfsFileSplit { // any consistent hash. 9: required i32 partition_path_hash - // The absolute path of the file, it`s used only when data files are outside of + // The absolute path of the file, it's used only when data files are outside of // the Iceberg table location (IMPALA-11507). 10: optional string absolute_path } diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java index 0ed334a31..eaf4d24a2 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java @@ -311,6 +311,22 @@ public interface FeIcebergTable extends FeFsTable { return -1; } + /** + * Sets 'tableStats_' for the Iceberg table by it's partition stats. + * TODO: Now the calculation of V2 Iceberg table is not accurate. After + * IMPALA-11516(Return better partition stats for V2 tables) is ready, this method can + * be considered to replace + * {@link Table#setTableStats(org.apache.hadoop.hive.metastore.api.Table)}. + */ + default void setIcebergTableStats() { + Preconditions.checkState(getTTableStats() != null); + Preconditions.checkState(getIcebergPartitionStats() != null); + if (getTTableStats().getNum_rows() < 0) { + getTTableStats().setNum_rows(Utils.calculateNumRows(this)); + } + getTTableStats().setTotal_file_bytes(Utils.calculateFileSizeInBytes(this)); + } + /** * Utility functions */ @@ -397,17 +413,9 @@ public interface FeIcebergTable extends FeFsTable { result.setSchema(resultSchema); TResultRowBuilder rowBuilder = new TResultRowBuilder(); - Map<String, TIcebergPartitionStats> nameToStats = table.getIcebergPartitionStats(); - if (table.getNumRows() >= 0) { - rowBuilder.add(table.getNumRows()); - } else { - rowBuilder.add(nameToStats.values().stream().mapToLong( - TIcebergPartitionStats::getNum_rows).sum()); - } - rowBuilder.add(nameToStats.values().stream().mapToLong( - TIcebergPartitionStats::getNum_files).sum()); - rowBuilder.addBytes(nameToStats.values().stream().mapToLong( - TIcebergPartitionStats::getFile_size_in_bytes).sum()); + rowBuilder.add(table.getNumRows()); + rowBuilder.add(table.getContentFileStore().getNumFiles()); + rowBuilder.addBytes(table.getTTableStats().getTotal_file_bytes()); if (!table.isMarkedCached()) { rowBuilder.add("NOT CACHED"); rowBuilder.add("NOT CACHED"); @@ -436,6 +444,24 @@ public interface FeIcebergTable extends FeFsTable { return result; } + /** + * Calculate num rows for the given iceberg table by it's partition stats. + * The result is computed by all DataFiles without any DeleteFile. + */ + public static long calculateNumRows(FeIcebergTable table) { + return table.getIcebergPartitionStats().values().stream() + .mapToLong(TIcebergPartitionStats::getNum_rows).sum(); + } + + /** + * Calculate file size in bytes for the given iceberg table by it's partition stats. + * The result is computed by all ContentFiles, including DataFile and DeleteFile. + */ + public static long calculateFileSizeInBytes(FeIcebergTable table) { + return table.getIcebergPartitionStats().values().stream() + .mapToLong(TIcebergPartitionStats::getFile_size_in_bytes).sum(); + } + /** * Get the field schema list of the current PartitionSpec from Iceberg table. * diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java index 5e84f3227..ee7cc0974 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java @@ -97,6 +97,8 @@ public class IcebergContentFileStore { public List<FileDescriptor> getDeleteFiles() { return deleteFiles_; } + public long getNumFiles() { return dataFiles_.size() + deleteFiles_.size(); } + public Iterable<FileDescriptor> getAllFiles() { return Iterables.concat(dataFiles_, deleteFiles_); } diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java index 2f1a81174..6d239e8a5 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java @@ -17,16 +17,18 @@ package org.apache.impala.catalog; +import com.codahale.metrics.Timer; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; - import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.impala.analysis.IcebergPartitionField; import org.apache.impala.analysis.IcebergPartitionSpec; import org.apache.impala.analysis.IcebergPartitionTransform; @@ -51,10 +53,6 @@ import org.apache.impala.util.IcebergSchemaConverter; import org.apache.impala.util.IcebergUtil; import org.apache.thrift.TException; -import com.codahale.metrics.Timer; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; - /** * Representation of an Iceberg table in the catalog cache. */ @@ -359,6 +357,7 @@ public class IcebergTable extends Table implements FeIcebergTable { .load(false, msClient, msTable_, true, true, false, null, null,null, reason); fileStore_ = Utils.loadAllPartition(this); partitionStats_ = Utils.loadPartitionStats(this); + setIcebergTableStats(); loadAllColumnStats(msClient); } catch (Exception e) { throw new IcebergTableLoadingException("Error loading metadata for Iceberg table " diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java index dc1c11ef5..1813ce00e 100644 --- a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java @@ -127,6 +127,7 @@ public class LocalIcebergTable extends LocalTable implements FeIcebergTable { icebergParquetPlainPageSize_ = Utils.getIcebergParquetPlainPageSize(msTable); icebergParquetDictPageSize_ = Utils.getIcebergParquetDictPageSize(msTable); partitionStats_ = tableInfo.getIceberg_table().getPartition_stats(); + setIcebergTableStats(); addVirtualColumns(ref.getVirtualColumns()); } diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test index 621fd7e4a..d9db7f81f 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test @@ -23,7 +23,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional_parquet.iceberg_v2_no_deletes] HDFS partitions=1/1 files=1 size=625B predicates: i > 1 - row-size=4B cardinality=400 + row-size=4B cardinality=1 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -40,7 +40,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional_parquet.iceberg_v2_no_deletes] HDFS partitions=1/1 files=1 size=625B predicates: i > 1 - row-size=4B cardinality=400 + row-size=4B cardinality=1 ==== SELECT count(*) from iceberg_v2_delete_positional; ---- PLAN @@ -59,7 +59,7 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional] HDFS partitions=1/1 files=1 size=662B - row-size=20B cardinality=4.73K + row-size=20B cardinality=3 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -74,7 +74,7 @@ PLAN-ROOT SINK | row-size=8B cardinality=1 | 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| row-size=20B cardinality=4.73K +| row-size=20B cardinality=3 | |--04:EXCHANGE [BROADCAST] | | @@ -84,14 +84,14 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional] HDFS partitions=1/1 files=1 size=662B - row-size=20B cardinality=4.73K + row-size=20B cardinality=3 ==== SELECT * from iceberg_v2_delete_positional; ---- PLAN PLAN-ROOT SINK | 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| row-size=40B cardinality=4.73K +| row-size=40B cardinality=3 | |--01:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional-POSITION-DELETE-01 functional_parquet.iceberg_v2_delete_positional-position-delete] | HDFS partitions=1/1 files=1 size=1.54KB @@ -99,14 +99,14 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional] HDFS partitions=1/1 files=1 size=662B - row-size=40B cardinality=4.73K + row-size=40B cardinality=3 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 04:EXCHANGE [UNPARTITIONED] | 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| row-size=40B cardinality=4.73K +| row-size=40B cardinality=3 | |--03:EXCHANGE [BROADCAST] | | @@ -116,14 +116,14 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_delete_positional] HDFS partitions=1/1 files=1 size=662B - row-size=40B cardinality=4.73K + row-size=40B cardinality=3 ==== SELECT * from iceberg_v2_positional_delete_all_rows; ---- PLAN PLAN-ROOT SINK | 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| row-size=36B cardinality=8.93K +| row-size=36B cardinality=3 | |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_delete_all_rows-position-delete] | HDFS partitions=1/1 files=1 size=2.60KB @@ -131,14 +131,14 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows] HDFS partitions=1/1 files=1 size=625B - row-size=36B cardinality=8.93K + row-size=36B cardinality=3 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 04:EXCHANGE [UNPARTITIONED] | 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| row-size=36B cardinality=8.93K +| row-size=36B cardinality=3 | |--03:EXCHANGE [BROADCAST] | | @@ -148,7 +148,7 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows] HDFS partitions=1/1 files=1 size=625B - row-size=36B cardinality=8.93K + row-size=36B cardinality=3 ==== SELECT * from iceberg_v2_no_deletes limit 1 ---- PLAN @@ -183,7 +183,7 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows] HDFS partitions=1/1 files=1 size=625B - row-size=36B cardinality=8.93K + row-size=36B cardinality=3 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -202,7 +202,7 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_delete_all_rows] HDFS partitions=1/1 files=1 size=625B - row-size=36B cardinality=8.93K + row-size=36B cardinality=3 ==== SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files limit 1 ---- PLAN @@ -214,7 +214,7 @@ PLAN-ROOT SINK | row-size=36B cardinality=1 | |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=36B cardinality=34.20K +| | row-size=36B cardinality=10 | | | |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB @@ -222,15 +222,15 @@ PLAN-ROOT SINK | | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB -| row-size=36B cardinality=34.20K +| row-size=36B cardinality=10 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB - row-size=36B cardinality=34.20K + row-size=36B cardinality=1 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -06:EXCHANGE [UNPARTITIONED] +07:EXCHANGE [UNPARTITIONED] | limit: 1 | 04:UNION @@ -238,22 +238,24 @@ PLAN-ROOT SINK | limit: 1 | row-size=36B cardinality=1 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | row-size=36B cardinality=34.20K +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=36B cardinality=10 | | -| |--05:EXCHANGE [BROADCAST] +| |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | | | | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB | | row-size=245B cardinality=4 | | +| 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB -| row-size=36B cardinality=34.20K +| row-size=36B cardinality=10 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB - row-size=36B cardinality=34.20K + row-size=36B cardinality=10 ==== SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files ---- PLAN @@ -261,10 +263,10 @@ PLAN-ROOT SINK | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=68.39K +| row-size=36B cardinality=20 | |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=36B cardinality=34.20K +| | row-size=36B cardinality=10 | | | |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB @@ -272,36 +274,38 @@ PLAN-ROOT SINK | | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB -| row-size=36B cardinality=34.20K +| row-size=36B cardinality=10 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB - row-size=36B cardinality=34.20K + row-size=36B cardinality=10 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -06:EXCHANGE [UNPARTITIONED] +07:EXCHANGE [UNPARTITIONED] | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=68.39K +| row-size=36B cardinality=20 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | row-size=36B cardinality=34.20K +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=36B cardinality=10 | | -| |--05:EXCHANGE [BROADCAST] +| |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | | | | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB | | row-size=245B cardinality=4 | | +| 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB -| row-size=36B cardinality=34.20K +| row-size=36B cardinality=10 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB - row-size=36B cardinality=34.20K + row-size=36B cardinality=10 ==== SELECT * from iceberg_v2_positional_update_all_rows ---- PLAN @@ -309,10 +313,10 @@ PLAN-ROOT SINK | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=27.47K +| row-size=36B cardinality=12 | |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=36B cardinality=13.74K +| | row-size=36B cardinality=6 | | | |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete] | | HDFS partitions=1/1 files=1 size=2.60KB @@ -320,11 +324,11 @@ PLAN-ROOT SINK | | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] | HDFS partitions=1/1 files=1 size=625B -| row-size=36B cardinality=13.74K +| row-size=36B cardinality=6 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] HDFS partitions=1/1 files=1 size=625B - row-size=36B cardinality=13.74K + row-size=36B cardinality=6 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -332,10 +336,10 @@ PLAN-ROOT SINK | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=27.47K +| row-size=36B cardinality=12 | |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | row-size=36B cardinality=13.74K +| | row-size=36B cardinality=6 | | | |--05:EXCHANGE [BROADCAST] | | | @@ -345,18 +349,18 @@ PLAN-ROOT SINK | | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] | HDFS partitions=1/1 files=1 size=625B -| row-size=36B cardinality=13.74K +| row-size=36B cardinality=6 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] HDFS partitions=1/1 files=1 size=625B - row-size=36B cardinality=13.74K + row-size=36B cardinality=6 ==== SELECT * from iceberg_v2_partitioned_position_deletes ---- PLAN PLAN-ROOT SINK | 02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| row-size=64B cardinality=4.96K +| row-size=64B cardinality=20 | |--01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete] | HDFS partitions=1/1 files=3 size=9.47KB @@ -364,24 +368,26 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes] HDFS partitions=1/1 files=3 size=3.48KB - row-size=64B cardinality=4.96K + row-size=64B cardinality=20 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -04:EXCHANGE [UNPARTITIONED] +05:EXCHANGE [UNPARTITIONED] | -02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| row-size=64B cardinality=4.96K +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| row-size=64B cardinality=20 | -|--03:EXCHANGE [BROADCAST] +|--04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.pos,functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete.file_path)] | | | 01:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE-01 functional_parquet.iceberg_v2_partitioned_position_deletes-position-delete] | HDFS partitions=1/1 files=3 size=9.47KB | row-size=182B cardinality=10 | +03:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.file__position,functional_parquet.iceberg_v2_partitioned_position_deletes.input__file__name)] +| 00:SCAN HDFS [functional_parquet.iceberg_v2_partitioned_position_deletes] HDFS partitions=1/1 files=3 size=3.48KB - row-size=64B cardinality=4.96K + row-size=64B cardinality=20 ==== SELECT * from iceberg_v2_positional_not_all_data_files_have_delete_files WHERE i > 2 @@ -390,10 +396,10 @@ PLAN-ROOT SINK | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=6.84K +| row-size=36B cardinality=2 | |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=36B cardinality=3.42K +| | row-size=36B cardinality=1 | | | |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB @@ -402,39 +408,41 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | predicates: i > 2 -| row-size=36B cardinality=3.42K +| row-size=36B cardinality=1 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB predicates: i > 2 - row-size=36B cardinality=3.42K + row-size=36B cardinality=1 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -06:EXCHANGE [UNPARTITIONED] +07:EXCHANGE [UNPARTITIONED] | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=6.84K +| row-size=36B cardinality=2 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | row-size=36B cardinality=3.42K +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=36B cardinality=1 | | -| |--05:EXCHANGE [BROADCAST] +| |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | | | | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB | | row-size=245B cardinality=4 | | +| 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | predicates: i > 2 -| row-size=36B cardinality=3.42K +| row-size=36B cardinality=1 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB predicates: i > 2 - row-size=36B cardinality=3.42K + row-size=36B cardinality=1 ==== select * from iceberg_v2_positional_not_all_data_files_have_delete_files for system_version as of 1497619269847778439 minus @@ -444,22 +452,22 @@ PLAN-ROOT SINK | 07:HASH JOIN [LEFT ANTI JOIN] | hash predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s -| row-size=16B cardinality=68.39K +| row-size=16B cardinality=20 | |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=1 size=625B -| row-size=16B cardinality=34.20K +| row-size=16B cardinality=10 | 05:AGGREGATE [FINALIZE] | group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s -| row-size=16B cardinality=68.39K +| row-size=16B cardinality=20 | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=68.39K +| row-size=36B cardinality=20 | |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=36B cardinality=34.20K +| | row-size=36B cardinality=10 | | | |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB @@ -467,56 +475,58 @@ PLAN-ROOT SINK | | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB -| row-size=36B cardinality=34.20K +| row-size=36B cardinality=10 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB - row-size=36B cardinality=34.20K + row-size=36B cardinality=10 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -12:EXCHANGE [UNPARTITIONED] +13:EXCHANGE [UNPARTITIONED] | 07:HASH JOIN [LEFT ANTI JOIN, PARTITIONED] | hash predicates: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s IS NOT DISTINCT FROM functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s -| row-size=16B cardinality=68.39K +| row-size=16B cardinality=20 | -|--11:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)] +|--12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)] | | | 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=1 size=625B -| row-size=16B cardinality=34.20K +| row-size=16B cardinality=10 | -10:AGGREGATE [FINALIZE] +11:AGGREGATE [FINALIZE] | group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s -| row-size=16B cardinality=68.39K +| row-size=16B cardinality=20 | -09:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)] +10:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s)] | 05:AGGREGATE [STREAMING] | group by: functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.i, functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.s -| row-size=16B cardinality=68.39K +| row-size=16B cardinality=20 | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=68.39K +| row-size=36B cardinality=20 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | row-size=36B cardinality=34.20K +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=36B cardinality=10 | | -| |--08:EXCHANGE [BROADCAST] +| |--09:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | | | | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB | | row-size=245B cardinality=4 | | +| 08:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB -| row-size=36B cardinality=34.20K +| row-size=36B cardinality=10 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB - row-size=36B cardinality=34.20K + row-size=36B cardinality=10 ==== with v as (select i + 1000 as ii, upper(s) as ss from iceberg_v2_positional_not_all_data_files_have_delete_files) select * from v where ii > 1003; @@ -525,10 +535,10 @@ PLAN-ROOT SINK | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=6.84K +| row-size=36B cardinality=2 | |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=36B cardinality=3.42K +| | row-size=36B cardinality=1 | | | |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB @@ -537,39 +547,41 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | predicates: i + 1000 > 1003 -| row-size=36B cardinality=3.42K +| row-size=36B cardinality=1 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB predicates: i + 1000 > 1003 - row-size=36B cardinality=3.42K + row-size=36B cardinality=1 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -06:EXCHANGE [UNPARTITIONED] +07:EXCHANGE [UNPARTITIONED] | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=6.84K +| row-size=36B cardinality=2 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | row-size=36B cardinality=3.42K +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=36B cardinality=1 | | -| |--05:EXCHANGE [BROADCAST] +| |--06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | | | | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB | | row-size=245B cardinality=4 | | +| 05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | predicates: i + 1000 > 1003 -| row-size=36B cardinality=3.42K +| row-size=36B cardinality=1 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB predicates: i + 1000 > 1003 - row-size=36B cardinality=3.42K + row-size=36B cardinality=1 ==== select * from iceberg_v2_positional_not_all_data_files_have_delete_files @@ -580,7 +592,7 @@ PLAN-ROOT SINK 11:HASH JOIN [LEFT SEMI JOIN] | hash predicates: i = max(i) | runtime filters: RF000 <- max(i) -| row-size=36B cardinality=68.39K +| row-size=36B cardinality=20 | |--10:AGGREGATE [FINALIZE] | | output: max(i) @@ -588,10 +600,10 @@ PLAN-ROOT SINK | | | 09:UNION | | pass-through-operands: all -| | row-size=24B cardinality=27.47K +| | row-size=24B cardinality=12 | | | |--07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | | row-size=24B cardinality=13.74K +| | | row-size=24B cardinality=6 | | | | | |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete] | | | HDFS partitions=1/1 files=1 size=2.60KB @@ -599,18 +611,18 @@ PLAN-ROOT SINK | | | | | 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] | | HDFS partitions=1/1 files=1 size=625B -| | row-size=24B cardinality=13.74K +| | row-size=24B cardinality=6 | | | 08:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] | HDFS partitions=1/1 files=1 size=625B -| row-size=24B cardinality=13.74K +| row-size=24B cardinality=6 | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=68.39K +| row-size=36B cardinality=20 | |--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=36B cardinality=34.20K +| | row-size=36B cardinality=10 | | | |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB @@ -619,29 +631,29 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | runtime filters: RF000 -> i -| row-size=36B cardinality=34.20K +| row-size=36B cardinality=10 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB runtime filters: RF000 -> i - row-size=36B cardinality=34.20K + row-size=36B cardinality=10 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -17:EXCHANGE [UNPARTITIONED] +18:EXCHANGE [UNPARTITIONED] | 11:HASH JOIN [LEFT SEMI JOIN, BROADCAST] | hash predicates: i = max(i) | runtime filters: RF000 <- max(i) -| row-size=36B cardinality=68.39K +| row-size=36B cardinality=20 | -|--16:EXCHANGE [BROADCAST] +|--17:EXCHANGE [BROADCAST] | | -| 15:AGGREGATE [FINALIZE] +| 16:AGGREGATE [FINALIZE] | | output: max:merge(i) | | row-size=4B cardinality=1 | | -| 14:EXCHANGE [UNPARTITIONED] +| 15:EXCHANGE [UNPARTITIONED] | | | 10:AGGREGATE | | output: max(i) @@ -649,12 +661,12 @@ PLAN-ROOT SINK | | | 09:UNION | | pass-through-operands: all -| | row-size=24B cardinality=27.47K +| | row-size=24B cardinality=12 | | | |--07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | | row-size=24B cardinality=13.74K +| | | row-size=24B cardinality=6 | | | -| | |--13:EXCHANGE [BROADCAST] +| | |--14:EXCHANGE [BROADCAST] | | | | | | | 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_update_all_rows-position-delete] | | | HDFS partitions=1/1 files=1 size=2.60KB @@ -662,32 +674,34 @@ PLAN-ROOT SINK | | | | | 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] | | HDFS partitions=1/1 files=1 size=625B -| | row-size=24B cardinality=13.74K +| | row-size=24B cardinality=6 | | | 08:SCAN HDFS [functional_parquet.iceberg_v2_positional_update_all_rows] | HDFS partitions=1/1 files=1 size=625B -| row-size=24B cardinality=13.74K +| row-size=24B cardinality=6 | 04:UNION | pass-through-operands: all -| row-size=36B cardinality=68.39K +| row-size=36B cardinality=20 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | row-size=36B cardinality=34.20K +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=36B cardinality=10 | | -| |--12:EXCHANGE [BROADCAST] +| |--13:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | | | | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] | | HDFS partitions=1/1 files=2 size=5.33KB | | row-size=245B cardinality=4 | | +| 12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | runtime filters: RF000 -> i -| row-size=36B cardinality=34.20K +| row-size=36B cardinality=10 | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB runtime filters: RF000 -> i - row-size=36B cardinality=34.20K + row-size=36B cardinality=10 ==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test index ec8b7f7a1..56b9ae535 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test @@ -259,11 +259,11 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional_parquet.iceberg_non_partitioned] HDFS partitions=1/1 files=3 size=3.41KB stored statistics: - table: rows=unavailable size=unavailable + table: rows=20 size=22.90KB columns: unavailable - extrapolated-rows=disabled max-scan-range-rows=unavailable + extrapolated-rows=disabled max-scan-range-rows=6 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 - tuple-ids=0 row-size=44B cardinality=380 + tuple-ids=0 row-size=44B cardinality=2 in pipelines: 00(GETNEXT) ==== # Sampling Iceberg tables. Count(*) is not optimized. @@ -284,11 +284,11 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional_parquet.iceberg_non_partitioned] HDFS partitions=1/1 files=3 size=3.41KB stored statistics: - table: rows=unavailable size=unavailable + table: rows=20 size=22.90KB columns: all - extrapolated-rows=disabled max-scan-range-rows=unavailable + extrapolated-rows=disabled max-scan-range-rows=6 mem-estimate=32.00MB mem-reservation=8.00KB thread-reservation=1 - tuple-ids=0 row-size=0B cardinality=380 + tuple-ids=0 row-size=0B cardinality=2 in pipelines: 00(GETNEXT) ==== # Sampling partitioned Iceberg tables. @@ -303,11 +303,11 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional_parquet.iceberg_partitioned] HDFS partitions=1/1 files=10 size=11.46KB stored statistics: - table: rows=unavailable size=unavailable + table: rows=20 size=22.90KB columns: unavailable - extrapolated-rows=disabled max-scan-range-rows=unavailable + extrapolated-rows=disabled max-scan-range-rows=2 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 - tuple-ids=0 row-size=44B cardinality=1.98K + tuple-ids=0 row-size=44B cardinality=10 in pipelines: 00(GETNEXT) ==== # Sampling Iceberg tables with predicates. Predicate pushdown to Iceberg happens @@ -325,13 +325,13 @@ PLAN-ROOT SINK HDFS partitions=1/1 files=4 size=4.57KB predicates: action = 'click' stored statistics: - table: rows=unavailable size=unavailable + table: rows=20 size=22.90KB columns: unavailable - extrapolated-rows=disabled max-scan-range-rows=unavailable + extrapolated-rows=disabled max-scan-range-rows=5 parquet statistics predicates: action = 'click' parquet dictionary predicates: action = 'click' mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 - tuple-ids=0 row-size=44B cardinality=198 + tuple-ids=0 row-size=44B cardinality=1 in pipelines: 00(GETNEXT) ==== # Sampling Iceberg V2 tables. Delete files are not sampled, only the data files. So we @@ -370,20 +370,20 @@ PLAN-ROOT SINK | 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=1 size=625B | stored statistics: -| table: rows=unavailable size=unavailable +| table: rows=10 size=7.77KB | columns missing stats: i, s -| extrapolated-rows=disabled max-scan-range-rows=unavailable +| extrapolated-rows=disabled max-scan-range-rows=10 | mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 -| tuple-ids=0 row-size=36B cardinality=2.42K +| tuple-ids=0 row-size=36B cardinality=1 | in pipelines: 00(GETNEXT) | 03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=1 size=620B stored statistics: - table: rows=unavailable size=unavailable + table: rows=10 size=7.77KB columns missing stats: i, s - extrapolated-rows=disabled max-scan-range-rows=unavailable + extrapolated-rows=disabled max-scan-range-rows=10 mem-estimate=64.00MB mem-reservation=32.00KB thread-reservation=1 - tuple-ids=0 row-size=36B cardinality=2.42K + tuple-ids=0 row-size=36B cardinality=1 in pipelines: 03(GETNEXT) ====
