This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit b03cfcf2ade6dea7fed10f4a3db5c58ddf2c6bd2 Author: Zoltan Borok-Nagy <[email protected]> AuthorDate: Fri Mar 22 19:16:07 2024 +0100 IMPALA-12894: (part 2) Fix optimized count(*) for Iceberg tables with dangling delete files Impala can return incorrect results if a table has dangling delete files. Dangling delete files are delete files that are part of the snapshot but they are not applicable to any of the data files. We can have such delete files after Spark's rewrite_data_files action. During analysis we check the existence of delete files based on the snapshot summary. If there are no delete files in the table, we just replace the count(*) expression with NumericLiteral($record_count). If there are delete files in the table (based on the summary), we set optimize_count_star_for_iceberg_v2 in the query context. Without optimize_count_star_for_iceberg_v2 in the query context, the IcebergScanPlanner would create the following plan. AGGREGATE COUNT(*) | UNION ALL / \ / \ / \ SCAN all ANTI JOIN datafiles / \ without / \ deletes SCAN SCAN datafiles deletes with deletes With optimize_count_star_for_iceberg_v2 the final plan looks like the following: ArithmeticExpr(ADD) / \ / \ / \ record_count AGGREGATE of all COUNT(*) datafiles | without ANTI JOIN deletes / \ / \ SCAN SCAN datafiles deletes with deletes The ArithmeticExpr(ADD) and its left child (record_count) is created by the analyzer, IcebergScanPlanner is responsible in creating the plan under AGGREGATE COUNT(*). And if it has delete files and optimize_count_star_for_iceberg_v2 is true, it knows it can omit the original UNION ALL and its left child. However, IcebergScanPlanner checks delete file existence based on the result of planFiles(), hence dangling delete files are eliminated. And if there are no delete files, IcebergScanPlanner assumes that case is already handled by the Analyzer (i.e. it replaced count(*) with NumericLiteral($record_count)). So it will incorrectly create a normal SCAN plan of the table under COUNT(*), i.e. we end up with this: ArithmeticExpr(ADD) / \ / \ / \ record_count AGGREGATE of all COUNT(*) datafiles | without SCAN deletes datafiles without deletes Which means Impala will yield $record_count * 2 as a result. This patch fixes the FeIcebergTable.hasDeleteFiles() method, so it also ignores dangling delete files. Therefore, the analyzer will just substitute count(*) with NumericLiteral($record_count) if all deletes are dangling, i.e. no need to involve the IcebergScanPlanner at all. The patch also introduces a new query option, "iceberg_disable_count_star_optimization", so users can completely disable the statistic-based count(*)-optimization if necessary. Testing: * e2e tests * planner tests Change-Id: Ie3aca0b0a104f9ca4589cde9643f3f341d4ff99f Reviewed-on: http://gerrit.cloudera.org:8080/21190 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/service/query-options.cc | 4 + be/src/service/query-options.h | 4 +- common/thrift/ImpalaService.thrift | 3 + common/thrift/Query.thrift | 3 + .../org/apache/impala/analysis/SelectStmt.java | 11 +- .../org/apache/impala/catalog/FeIcebergTable.java | 27 +- .../apache/impala/planner/IcebergScanPlanner.java | 5 +- .../PlannerTest/iceberg-v2-tables-hash-join.test | 256 ++++++---------- .../queries/PlannerTest/iceberg-v2-tables.test | 340 +++++++++------------ .../iceberg-v2-read-position-deletes-orc.test | 4 +- .../iceberg-v2-read-position-deletes.test | 24 +- 11 files changed, 292 insertions(+), 389 deletions(-) diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc index 468aaf36a..dae5e36fb 100644 --- a/be/src/service/query-options.cc +++ b/be/src/service/query-options.cc @@ -1225,6 +1225,10 @@ Status impala::SetQueryOption(const string& key, const string& value, query_options->__set_enable_tuple_cache(enable_tuple_cache); break; } + case TImpalaQueryOptions::ICEBERG_DISABLE_COUNT_STAR_OPTIMIZATION: { + query_options->__set_iceberg_disable_count_star_optimization(IsTrue(value)); + break; + } default: if (IsRemovedQueryOption(key)) { LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'"; diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h index 254a7ccea..3d5a752b4 100644 --- a/be/src/service/query-options.h +++ b/be/src/service/query-options.h @@ -50,7 +50,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type> // time we add or remove a query option to/from the enum TImpalaQueryOptions. #define QUERY_OPTS_TABLE \ DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), \ - TImpalaQueryOptions::ENABLE_TUPLE_CACHE + 1); \ + TImpalaQueryOptions::ICEBERG_DISABLE_COUNT_STAR_OPTIMIZATION + 1); \ REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \ QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR) \ REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS) \ @@ -324,6 +324,8 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type> QUERY_OPT_FN(query_cpu_count_divisor, \ QUERY_CPU_COUNT_DIVISOR, TQueryOptionLevel::ADVANCED) \ QUERY_OPT_FN(enable_tuple_cache, ENABLE_TUPLE_CACHE, TQueryOptionLevel::ADVANCED) \ + QUERY_OPT_FN(iceberg_disable_count_star_optimization, \ + ICEBERG_DISABLE_COUNT_STAR_OPTIMIZATION, TQueryOptionLevel::ADVANCED) \ ; /// Enforce practical limits on some query options to avoid undesired query state. diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift index 6d74339ef..6dc9efbc1 100644 --- a/common/thrift/ImpalaService.thrift +++ b/common/thrift/ImpalaService.thrift @@ -921,6 +921,9 @@ enum TImpalaQueryOptions { // potentially insert tuple cache nodes into the plan. This can only be set if the // allow_tuple_caching feature startup flag is set to true. ENABLE_TUPLE_CACHE = 174 + + // Disables statistic-based count(*)-optimization for Iceberg tables. + ICEBERG_DISABLE_COUNT_STAR_OPTIMIZATION = 175 } // The summary of a DML statement. diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift index 60122ad40..e1f64f6a9 100644 --- a/common/thrift/Query.thrift +++ b/common/thrift/Query.thrift @@ -699,6 +699,9 @@ struct TQueryOptions { // See comment in ImpalaService.thrift 175: optional bool enable_tuple_cache = false; + + // See comment in ImpalaService.thrift + 176: optional bool iceberg_disable_count_star_optimization = false; } // Impala currently has three types of sessions: Beeswax, HiveServer2 and external diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java index fb503ba97..04039279e 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java @@ -1465,14 +1465,15 @@ public class SelectStmt extends QueryStmt { Analyzer.DB_DOES_NOT_EXIST_ERROR_MSG + tableName.getDb(), e); } if (!(table instanceof FeIcebergTable)) return; - + if (analyzer_.getQueryOptions().iceberg_disable_count_star_optimization) { + return; + } analyzer_.checkStmtExprLimit(); - Table iceTable = ((FeIcebergTable) table).getIcebergApiTable(); + FeIcebergTable iceTable = ((FeIcebergTable) table); if (Utils.hasDeleteFiles(iceTable, tableRef.getTimeTravelSpec())) { - // IMPALA-12894 Part1: turn off the optimisation for count(*) queries. - // optimizePlainCountStarQueryV2(tableRef, (FeIcebergTable)table); + optimizePlainCountStarQueryV2(tableRef, iceTable); } else { - optimizePlainCountStarQueryV1(tableRef, iceTable); + optimizePlainCountStarQueryV1(tableRef, iceTable.getIcebergApiTable()); } } diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java index 61768cd5c..98a13865c 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java @@ -985,17 +985,26 @@ public interface FeIcebergTable extends FeFsTable { } /** - * Return true if the Iceberg has DeleteFiles. + * Return true if the Iceberg has DeleteFiles. Only non-dangling delete files count + * so we don't use the snapshot summary for this. */ - public static boolean hasDeleteFiles(Table icebergTable, TimeTravelSpec travelSpec) { - Map<String, String> summary = getSnapshotSummary(icebergTable, travelSpec); - if (summary == null) return false; - String totalDeleteFilesStr = summary.get(SnapshotSummary.TOTAL_DELETE_FILES_PROP); - if (!Strings.isNullOrEmpty(totalDeleteFilesStr)) { - long totalDeleteFiles = Long.parseLong(totalDeleteFilesStr); - return totalDeleteFiles > 0; + public static boolean hasDeleteFiles(FeIcebergTable table, + TimeTravelSpec travelSpec) throws AnalysisException { + if (travelSpec == null) { + IcebergContentFileStore fileStore = table.getContentFileStore(); + return !fileStore.getPositionDeleteFiles().isEmpty() + || !fileStore.getEqualityDeleteFiles().isEmpty(); + } else { + try { + GroupedContentFiles groupedFiles = + IcebergUtil.getIcebergFiles(table, Lists.newArrayList(), travelSpec); + return !groupedFiles.positionDeleteFiles.isEmpty() + || !groupedFiles.equalityDeleteFiles.isEmpty(); + } catch (TableLoadingException e) { + throw new AnalysisException("Failed to get record count of Iceberg V2 table: " + + table.getFullName(), e); + } } - return false; } /** diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java index ad383d9de..e059c221d 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java @@ -32,7 +32,6 @@ import java.util.Set; import java.util.TreeSet; import java.util.function.Predicate; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.curator.shaded.com.google.common.collect.Lists; import org.apache.iceberg.ContentFile; @@ -42,8 +41,6 @@ import org.apache.iceberg.FileScanTask; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.ExpressionUtil; import org.apache.iceberg.expressions.ExpressionVisitors; -import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.expressions.Expression.Operation; import org.apache.iceberg.expressions.True; import org.apache.iceberg.io.CloseableIterable; import org.apache.impala.analysis.Analyzer; @@ -193,6 +190,8 @@ public class IcebergScanPlanner { private PlanNode createIcebergScanPlanImpl() throws ImpalaException { if (noDeleteFiles()) { + Preconditions.checkState( + !ctx_.getQueryCtx().isOptimize_count_star_for_iceberg_v2()); // If there are no delete files we can just create a single SCAN node. Preconditions.checkState(dataFilesWithDeletes_.isEmpty()); PlanNode ret = new IcebergScanNode(ctx_.getNextNodeId(), tblRef_, conjuncts_, diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-hash-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-hash-join.test index 741373461..a43b98c5b 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-hash-join.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-hash-join.test @@ -357,135 +357,99 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files ---- PLAN PLAN-ROOT SINK | -05:AGGREGATE [FINALIZE] +03:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=10 +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| row-size=20B cardinality=3 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=20B cardinality=3 -| | -| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=1 size=2.63KB -| | Iceberg snapshot id: 752781918366351945 -| | row-size=267B cardinality=1 -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=1 size=625B +|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=1 size=2.63KB | Iceberg snapshot id: 752781918366351945 -| row-size=20B cardinality=3 +| row-size=267B cardinality=1 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=3 size=1.83KB +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=1 size=625B Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=7 + row-size=20B cardinality=3 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -08:AGGREGATE [FINALIZE] +06:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -07:EXCHANGE [UNPARTITIONED] +05:EXCHANGE [UNPARTITIONED] | -05:AGGREGATE +03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=10 +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] +| row-size=20B cardinality=3 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| | row-size=20B cardinality=3 -| | -| |--06:EXCHANGE [BROADCAST] -| | | -| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=1 size=2.63KB -| | Iceberg snapshot id: 752781918366351945 -| | row-size=267B cardinality=1 +|--04:EXCHANGE [BROADCAST] | | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=1 size=625B +| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=1 size=2.63KB | Iceberg snapshot id: 752781918366351945 -| row-size=20B cardinality=3 +| row-size=267B cardinality=1 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=3 size=1.83KB +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=1 size=625B Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=7 + row-size=20B cardinality=3 ==== SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files; ---- PLAN PLAN-ROOT SINK | -05:AGGREGATE [FINALIZE] +03:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=10 +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| row-size=20B cardinality=6 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=20B cardinality=6 -| | -| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=2 size=1.22KB +|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=267B cardinality=4 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=4 + row-size=20B cardinality=6 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -09:AGGREGATE [FINALIZE] +07:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -08:EXCHANGE [UNPARTITIONED] +06:EXCHANGE [UNPARTITIONED] | -05:AGGREGATE +03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=10 +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| row-size=20B cardinality=6 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] -| | row-size=20B cardinality=6 +|--05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | -| |--07:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] -| | | -| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 -| | -| 06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=2 size=1.22KB +| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=267B cardinality=4 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +04:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=4 + row-size=20B cardinality=6 ==== SELECT * from iceberg_v2_positional_update_all_rows ---- PLAN @@ -1206,153 +1170,117 @@ u3 as (select count(*) from iceberg_v2_positional_not_all_data_files_have_delete ---- PLAN PLAN-ROOT SINK | -14:NESTED LOOP JOIN [CROSS JOIN] +10:NESTED LOOP JOIN [CROSS JOIN] | row-size=17B cardinality=1 | -|--12:AGGREGATE [FINALIZE] +|--08:AGGREGATE [FINALIZE] | | output: count(*) | | row-size=8B cardinality=1 | | -| 11:UNION -| | pass-through-operands: all -| | row-size=20B cardinality=10 +| 07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| | row-size=20B cardinality=6 | | -| |--09:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | | row-size=20B cardinality=6 -| | | -| | |--08:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-08 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | | HDFS partitions=1/1 files=2 size=5.33KB -| | | Iceberg snapshot id: 1497619269847778439 -| | | row-size=267B cardinality=4 -| | | -| | 07:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| | HDFS partitions=1/1 files=2 size=1.22KB +| |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB | | Iceberg snapshot id: 1497619269847778439 -| | row-size=20B cardinality=6 +| | row-size=267B cardinality=4 | | -| 10:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=4 +| row-size=20B cardinality=6 | -13:NESTED LOOP JOIN [CROSS JOIN] +09:NESTED LOOP JOIN [CROSS JOIN] | row-size=9B cardinality=1 | -|--06:UNION +|--04:UNION | constant-operands=1 | row-size=1B cardinality=1 | -05:AGGREGATE [FINALIZE] +03:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=10 +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| row-size=20B cardinality=6 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=20B cardinality=6 -| | -| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=2 size=1.22KB +|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=267B cardinality=4 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=4 + row-size=20B cardinality=6 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -14:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +10:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=17B cardinality=1 | -|--24:EXCHANGE [UNPARTITIONED] +|--20:EXCHANGE [UNPARTITIONED] | | -| 23:AGGREGATE [FINALIZE] +| 19:AGGREGATE [FINALIZE] | | output: count:merge(*) | | row-size=8B cardinality=1 | | -| 22:EXCHANGE [UNPARTITIONED] +| 18:EXCHANGE [UNPARTITIONED] | | -| 12:AGGREGATE +| 08:AGGREGATE | | output: count(*) | | row-size=8B cardinality=1 | | -| 11:UNION -| | pass-through-operands: all -| | row-size=20B cardinality=10 +| 07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=20B cardinality=6 | | -| |--09:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] -| | | row-size=20B cardinality=6 +| |--17:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | | -| | |--21:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] -| | | | -| | | 08:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-08 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | | HDFS partitions=1/1 files=2 size=5.33KB -| | | Iceberg snapshot id: 1497619269847778439 -| | | row-size=267B cardinality=4 -| | | -| | 20:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] -| | | -| | 07:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| | HDFS partitions=1/1 files=2 size=1.22KB +| | 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB | | Iceberg snapshot id: 1497619269847778439 -| | row-size=20B cardinality=6 +| | row-size=267B cardinality=4 +| | +| 16:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] | | -| 10:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=4 +| row-size=20B cardinality=6 | -13:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +09:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=9B cardinality=1 | -|--19:EXCHANGE [UNPARTITIONED] +|--15:EXCHANGE [UNPARTITIONED] | | -| 06:UNION +| 04:UNION | constant-operands=1 | row-size=1B cardinality=1 | -18:AGGREGATE [FINALIZE] +14:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -17:EXCHANGE [UNPARTITIONED] +13:EXCHANGE [UNPARTITIONED] | -05:AGGREGATE +03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=10 +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| row-size=20B cardinality=6 | -|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] -| | row-size=20B cardinality=6 +|--12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] | | -| |--16:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] -| | | -| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 -| | -| 15:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=2 size=1.22KB +| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=267B cardinality=4 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +11:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=4 + row-size=20B cardinality=6 ==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test index a463dcd97..57750d661 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test @@ -353,199 +353,145 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files ---- PLAN PLAN-ROOT SINK | -05:AGGREGATE [FINALIZE] +03:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=9 +02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| row-size=20B cardinality=2 | -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| | row-size=20B cardinality=2 -| | -| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=1 size=2.63KB -| | Iceberg snapshot id: 752781918366351945 -| | row-size=267B cardinality=1 -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=1 size=625B +|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=1 size=2.63KB | Iceberg snapshot id: 752781918366351945 -| row-size=20B cardinality=3 +| row-size=267B cardinality=1 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=3 size=1.83KB +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=1 size=625B Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=7 + row-size=20B cardinality=3 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -08:AGGREGATE [FINALIZE] +06:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -07:EXCHANGE [UNPARTITIONED] +05:EXCHANGE [UNPARTITIONED] | -05:AGGREGATE +03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=9 +02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| row-size=20B cardinality=2 | -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| | row-size=20B cardinality=2 -| | -| |--06:EXCHANGE [DIRECTED] -| | | -| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=1 size=2.63KB -| | Iceberg snapshot id: 752781918366351945 -| | row-size=267B cardinality=1 +|--04:EXCHANGE [DIRECTED] | | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=1 size=625B +| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=1 size=2.63KB | Iceberg snapshot id: 752781918366351945 -| row-size=20B cardinality=3 +| row-size=267B cardinality=1 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=3 size=1.83KB +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=1 size=625B Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=7 + row-size=20B cardinality=3 ==== SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files for system_time as of "2022-08-19 13:50:00"; ---- PLAN PLAN-ROOT SINK | -05:AGGREGATE [FINALIZE] +03:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=9 +02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| row-size=20B cardinality=2 | -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| | row-size=20B cardinality=2 -| | -| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=1 size=2.63KB -| | Iceberg snapshot id: 752781918366351945 -| | row-size=267B cardinality=1 -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=1 size=625B +|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=1 size=2.63KB | Iceberg snapshot id: 752781918366351945 -| row-size=20B cardinality=3 +| row-size=267B cardinality=1 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=3 size=1.83KB +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=1 size=625B Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=7 + row-size=20B cardinality=3 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -08:AGGREGATE [FINALIZE] +06:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -07:EXCHANGE [UNPARTITIONED] +05:EXCHANGE [UNPARTITIONED] | -05:AGGREGATE +03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=9 +02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| row-size=20B cardinality=2 | -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| | row-size=20B cardinality=2 -| | -| |--06:EXCHANGE [DIRECTED] -| | | -| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=1 size=2.63KB -| | Iceberg snapshot id: 752781918366351945 -| | row-size=267B cardinality=1 +|--04:EXCHANGE [DIRECTED] | | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=1 size=625B +| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=1 size=2.63KB | Iceberg snapshot id: 752781918366351945 -| row-size=20B cardinality=3 +| row-size=267B cardinality=1 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=3 size=1.83KB +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=1 size=625B Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=7 + row-size=20B cardinality=3 ==== SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files; ---- PLAN PLAN-ROOT SINK | -05:AGGREGATE [FINALIZE] +03:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=6 +02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| row-size=20B cardinality=2 | -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| | row-size=20B cardinality=2 -| | -| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=2 size=1.22KB +|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=267B cardinality=4 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=4 + row-size=20B cardinality=6 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -08:AGGREGATE [FINALIZE] +06:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -07:EXCHANGE [UNPARTITIONED] +05:EXCHANGE [UNPARTITIONED] | -05:AGGREGATE +03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=6 +02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| row-size=20B cardinality=2 | -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| | row-size=20B cardinality=2 -| | -| |--06:EXCHANGE [DIRECTED] -| | | -| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 +|--04:EXCHANGE [DIRECTED] | | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=2 size=1.22KB +| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=267B cardinality=4 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=4 + row-size=20B cardinality=6 ==== SELECT * from iceberg_v2_positional_update_all_rows ---- PLAN @@ -1256,151 +1202,115 @@ u3 as (select count(*) from iceberg_v2_positional_not_all_data_files_have_delete ---- PLAN PLAN-ROOT SINK | -14:NESTED LOOP JOIN [CROSS JOIN] +10:NESTED LOOP JOIN [CROSS JOIN] | row-size=17B cardinality=1 | -|--12:AGGREGATE [FINALIZE] +|--08:AGGREGATE [FINALIZE] | | output: count(*) | | row-size=8B cardinality=1 | | -| 11:UNION -| | pass-through-operands: all -| | row-size=20B cardinality=6 +| 07:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| | row-size=20B cardinality=2 | | -| |--09:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| | | row-size=20B cardinality=2 -| | | -| | |--08:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-08 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | | HDFS partitions=1/1 files=2 size=5.33KB -| | | Iceberg snapshot id: 1497619269847778439 -| | | row-size=267B cardinality=4 -| | | -| | 07:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| | HDFS partitions=1/1 files=2 size=1.22KB +| |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB | | Iceberg snapshot id: 1497619269847778439 -| | row-size=20B cardinality=6 +| | row-size=267B cardinality=4 | | -| 10:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=4 +| row-size=20B cardinality=6 | -13:NESTED LOOP JOIN [CROSS JOIN] +09:NESTED LOOP JOIN [CROSS JOIN] | row-size=9B cardinality=1 | -|--06:UNION +|--04:UNION | constant-operands=1 | row-size=1B cardinality=1 | -05:AGGREGATE [FINALIZE] +03:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=6 +02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| row-size=20B cardinality=2 | -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| | row-size=20B cardinality=2 -| | -| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=2 size=1.22KB +|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=267B cardinality=4 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=4 + row-size=20B cardinality=6 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -14:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +10:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=17B cardinality=1 | -|--22:EXCHANGE [UNPARTITIONED] +|--18:EXCHANGE [UNPARTITIONED] | | -| 21:AGGREGATE [FINALIZE] +| 17:AGGREGATE [FINALIZE] | | output: count:merge(*) | | row-size=8B cardinality=1 | | -| 20:EXCHANGE [UNPARTITIONED] +| 16:EXCHANGE [UNPARTITIONED] | | -| 12:AGGREGATE +| 08:AGGREGATE | | output: count(*) | | row-size=8B cardinality=1 | | -| 11:UNION -| | pass-through-operands: all -| | row-size=20B cardinality=6 +| 07:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| | row-size=20B cardinality=2 | | -| |--09:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| | | row-size=20B cardinality=2 -| | | -| | |--19:EXCHANGE [DIRECTED] -| | | | -| | | 08:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-08 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | | HDFS partitions=1/1 files=2 size=5.33KB -| | | Iceberg snapshot id: 1497619269847778439 -| | | row-size=267B cardinality=4 +| |--15:EXCHANGE [DIRECTED] | | | -| | 07:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| | HDFS partitions=1/1 files=2 size=1.22KB +| | 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB | | Iceberg snapshot id: 1497619269847778439 -| | row-size=20B cardinality=6 +| | row-size=267B cardinality=4 | | -| 10:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=4 +| row-size=20B cardinality=6 | -13:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +09:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=9B cardinality=1 | -|--18:EXCHANGE [UNPARTITIONED] +|--14:EXCHANGE [UNPARTITIONED] | | -| 06:UNION +| 04:UNION | constant-operands=1 | row-size=1B cardinality=1 | -17:AGGREGATE [FINALIZE] +13:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -16:EXCHANGE [UNPARTITIONED] +12:EXCHANGE [UNPARTITIONED] | -05:AGGREGATE +03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -04:UNION -| pass-through-operands: all -| row-size=20B cardinality=6 +02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| row-size=20B cardinality=2 | -|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| | row-size=20B cardinality=2 +|--11:EXCHANGE [DIRECTED] | | -| |--15:EXCHANGE [DIRECTED] -| | | -| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB -| | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 -| | -| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] -| HDFS partitions=1/1 files=2 size=1.22KB +| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=267B cardinality=4 | -03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=4 + row-size=20B cardinality=6 ==== select * from functional_parquet.iceberg_v2_delete_equality; ---- PLAN @@ -1860,7 +1770,7 @@ PLAN-ROOT SINK | | row-size=48B cardinality=4 | | | |--05:SCAN HDFS [functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids-EQUALITY-DELETE-05 functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids-equality-delete-05] -| | HDFS partitions=1/1 files=1 size=656B +| | HDFS partitions=1/1 files=1 size=663B | | Iceberg snapshot id: 152862018760071153 | | row-size=24B cardinality=2 | | @@ -1870,7 +1780,7 @@ PLAN-ROOT SINK | | row-size=48B cardinality=4 | | | |--03:SCAN HDFS [functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids-EQUALITY-DELETE-03 functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids-equality-delete-03] -| | HDFS partitions=1/1 files=1 size=663B +| | HDFS partitions=1/1 files=1 size=656B | | Iceberg snapshot id: 152862018760071153 | | row-size=24B cardinality=2 | | @@ -1908,7 +1818,7 @@ PLAN-ROOT SINK | |--11:EXCHANGE [BROADCAST] | | | | | 05:SCAN HDFS [functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids-EQUALITY-DELETE-05 functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids-equality-delete-05] -| | HDFS partitions=1/1 files=1 size=656B +| | HDFS partitions=1/1 files=1 size=663B | | Iceberg snapshot id: 152862018760071153 | | row-size=24B cardinality=2 | | @@ -1920,7 +1830,7 @@ PLAN-ROOT SINK | |--10:EXCHANGE [BROADCAST] | | | | | 03:SCAN HDFS [functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids-EQUALITY-DELETE-03 functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids-equality-delete-03] -| | HDFS partitions=1/1 files=1 size=663B +| | HDFS partitions=1/1 files=1 size=656B | | Iceberg snapshot id: 152862018760071153 | | row-size=24B cardinality=2 | | @@ -1944,3 +1854,31 @@ PLAN-ROOT SINK Iceberg snapshot id: 152862018760071153 row-size=48B cardinality=2 ==== +select count(*) from functional_parquet.iceberg_spark_compaction_with_dangling_delete; +---- PLAN +PLAN-ROOT SINK +| +00:UNION + constant-operands=1 + row-size=8B cardinality=1 +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +00:UNION + constant-operands=1 + row-size=8B cardinality=1 +==== +select count(*) from functional_parquet.iceberg_spark_compaction_with_dangling_delete for system_time as of "2024-03-22 19:04:00"; +---- PLAN +PLAN-ROOT SINK +| +00:UNION + constant-operands=1 + row-size=8B cardinality=1 +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +00:UNION + constant-operands=1 + row-size=8B cardinality=1 +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes-orc.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes-orc.test index e6b3021b0..a2af39918 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes-orc.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes-orc.test @@ -154,7 +154,7 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files_ ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumOrcStripes): 5 +aggregation(SUM, NumOrcStripes): 2 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -164,7 +164,7 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files_ ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumOrcStripes): 6 +aggregation(SUM, NumOrcStripes): 4 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test index 80e5cdad0..3b51b0e49 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test @@ -206,7 +206,7 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumRowGroups): 5 +aggregation(SUM, NumRowGroups): 2 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -216,7 +216,7 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumRowGroups): 6 +aggregation(SUM, NumRowGroups): 4 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -267,7 +267,7 @@ SELECT count(*) from iceberg_v2_positional_update_all_rows for system_version as ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumRowGroups): 3 +aggregation(SUM, NumRowGroups): 2 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -277,7 +277,7 @@ SELECT count(*) from iceberg_v2_positional_update_all_rows ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumRowGroups): 3 +aggregation(SUM, NumRowGroups): 2 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -750,4 +750,20 @@ select count(*) from functional_parquet.iceberg_spark_compaction_with_dangling_d 4 ---- TYPES bigint +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 0 +aggregation(SUM, NumFileMetadataRead): 0 +==== +---- QUERY +# IMPALA-12984: Let's see that we can disable Iceberg statistic-based +# count(*) optimization. +set iceberg_disable_count_star_optimization=true; +select count(*) from functional_parquet.iceberg_spark_compaction_with_dangling_delete; +---- RESULTS +4 +---- TYPES +bigint +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 0 +aggregation(SUM, NumFileMetadataRead): 1 ====
