This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 23edbde7c73c089f5409f6feb574320d767a8eda Author: Riza Suminto <[email protected]> AuthorDate: Thu Jan 2 16:39:46 2025 -0800 IMPALA-13637: Add ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE option IMPALA-13405 adds a new tuple-analysis algorithm in AggregationNode to lower cardinality estimation when planning multi-column grouping. This patch adds a query option ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE that allows users to enable/disable the algorithm if necessary. Default is True. Testing: - Add testAggregationNoTupleAnalysis. This test is based on TpcdsPlannerTest#testQ19 but with ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE set to false. Change-Id: Iabd8daa3d9414fc33d232643014042dc20530514 Reviewed-on: http://gerrit.cloudera.org:8080/22294 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/service/query-options.cc | 4 + be/src/service/query-options.h | 4 +- common/thrift/ImpalaService.thrift | 4 + common/thrift/Query.thrift | 3 + .../org/apache/impala/planner/AggregationNode.java | 8 +- .../org/apache/impala/planner/PlannerTest.java | 8 + testdata/bin/restore-stats-on-planner-tests.py | 1 + .../PlannerTest/aggregation-no-tuple-analysis.test | 619 +++++++++++++++++++++ 8 files changed, 647 insertions(+), 4 deletions(-) diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc index 72b4f39c3..88446e4de 100644 --- a/be/src/service/query-options.cc +++ b/be/src/service/query-options.cc @@ -1322,6 +1322,10 @@ Status impala::SetQueryOption(TImpalaQueryOptions::type option, const string& va query_options->__set_long_polling_time_ms(int32_t_val); break; } + case TImpalaQueryOptions::ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE: { + query_options->__set_enable_tuple_analysis_in_aggregate(IsTrue(value)); + break; + } default: string key = to_string(option); if (IsRemovedQueryOption(key)) { diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h index 44ddd6f48..5dd8bafd0 100644 --- a/be/src/service/query-options.h +++ b/be/src/service/query-options.h @@ -51,7 +51,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type> // plus one. Thus, the second argument to the DCHECK has to be updated every // time we add or remove a query option to/from the enum TImpalaQueryOptions. constexpr unsigned NUM_QUERY_OPTIONS = - TImpalaQueryOptions::ENABLE_TUPLE_CACHE_VERIFICATION + 1; + TImpalaQueryOptions::ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE + 1; #define QUERY_OPTS_TABLE \ DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), NUM_QUERY_OPTIONS); \ REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \ @@ -360,6 +360,8 @@ constexpr unsigned NUM_QUERY_OPTIONS = TQueryOptionLevel::REGULAR) \ QUERY_OPT_FN(enable_tuple_cache_verification, ENABLE_TUPLE_CACHE_VERIFICATION, \ TQueryOptionLevel::ADVANCED) \ + QUERY_OPT_FN(enable_tuple_analysis_in_aggregate, \ + ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE, TQueryOptionLevel::ADVANCED) \ ; /// Enforce practical limits on some query options to avoid undesired query state. diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift index ca7b7f25b..df00545ff 100644 --- a/common/thrift/ImpalaService.thrift +++ b/common/thrift/ImpalaService.thrift @@ -969,6 +969,10 @@ enum TImpalaQueryOptions { // tuple_cache_debug_dump_dir is specified and enable_tuple_cache_verification is set // to true. ENABLE_TUPLE_CACHE_VERIFICATION = 183 + + // If True, enable tuple analysis for both preaggregation and final aggregation node. + // Enabling this feature can lower cardinality estimate of multi-column grouping. + ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE = 184 } // The summary of a DML statement. diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift index e21babc2c..118b10956 100644 --- a/common/thrift/Query.thrift +++ b/common/thrift/Query.thrift @@ -753,6 +753,9 @@ struct TQueryOptions { // See comment in ImpalaService.thrift 184: optional bool enable_tuple_cache_verification = false; + + // See comment in ImpalaService.thrift + 185: optional bool enable_tuple_analysis_in_aggregate = true } // Impala currently has three types of sessions: Beeswax, HiveServer2 and external diff --git a/fe/src/main/java/org/apache/impala/planner/AggregationNode.java b/fe/src/main/java/org/apache/impala/planner/AggregationNode.java index 0535f60da..ca25592ef 100644 --- a/fe/src/main/java/org/apache/impala/planner/AggregationNode.java +++ b/fe/src/main/java/org/apache/impala/planner/AggregationNode.java @@ -120,7 +120,7 @@ public class AggregationNode extends PlanNode { // May set to true in computeStats() and will stay true during lifetime of this // AggregationNode. // TODO: IMPALA-13542 - private boolean skipTupleBasedAnalysis_ = false; + private boolean skipTupleAnalysis_ = false; public AggregationNode( PlanNodeId id, PlanNode input, MultiAggregateInfo multiAggInfo, AggPhase aggPhase) { @@ -276,7 +276,9 @@ public class AggregationNode extends PlanNode { // DistributedPlanner.java may transfer conjunct to merge phase aggregation later. // Keep skipping tuple-based analysis to maintain same number as single node plan. // TODO: IMPALA-13542 - skipTupleBasedAnalysis_ |= !conjuncts_.isEmpty(); + skipTupleAnalysis_ |= !conjuncts_.isEmpty(); + skipTupleAnalysis_ |= + !analyzer.getQueryOptions().isEnable_tuple_analysis_in_aggregate(); boolean unknownEstimate = false; aggClassNumGroups_ = Lists.newArrayList(); @@ -354,7 +356,7 @@ public class AggregationNode extends PlanNode { Preconditions.checkArgument(aggInputCardinality >= -1, aggInputCardinality); if (groupingExprs.isEmpty()) return NON_GROUPING_AGG_NUM_GROUPS; if (planNode instanceof AggregationNode - && ((AggregationNode) planNode).skipTupleBasedAnalysis_) { + && ((AggregationNode) planNode).skipTupleAnalysis_) { // This AggregationNode has been planned with non-empty conjunct before. // Skip tuple based to avoid severe underestimation. // TODO: IMPALA-13542 diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index 54f0ffbf3..f9d414e59 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -192,6 +192,14 @@ public class PlannerTest extends PlannerTestBase { runCardinalityVerifyingTest("aggregation"); } + @Test + public void testAggregationNoTupleAnalysis() { + Set<PlannerTestOption> testOptions = tpcdsParquetTestOptions(); + TQueryOptions options = tpcdsParquetQueryOptions(); + runPlannerTestFile( + "aggregation-no-tuple-analysis", "tpcds_parquet", options, testOptions); + } + @Test public void testGroupingSets() { runPlannerTestFile("grouping-sets"); diff --git a/testdata/bin/restore-stats-on-planner-tests.py b/testdata/bin/restore-stats-on-planner-tests.py index 77001bd49..d040d2a92 100755 --- a/testdata/bin/restore-stats-on-planner-tests.py +++ b/testdata/bin/restore-stats-on-planner-tests.py @@ -47,6 +47,7 @@ PATH_TO_REPLACE = { "agg-node-high-mem-estimate.test", "agg-node-low-mem-estimate.test", "agg-node-max-mem-estimate.test", + "aggregation-no-tuple-analysis.test", "processing-cost-plan-admission-slots.test", "tpcds-processing-cost.test", ] diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/aggregation-no-tuple-analysis.test b/testdata/workloads/functional-planner/queries/PlannerTest/aggregation-no-tuple-analysis.test new file mode 100644 index 000000000..7ef23c73c --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/aggregation-no-tuple-analysis.test @@ -0,0 +1,619 @@ +# TPCDS-Q19, with ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE=false +select + i_brand_id brand_id, + i_brand brand, + i_manufact_id, + i_manufact, + sum(ss_ext_sales_price) ext_price +from + date_dim, + store_sales, + item, + customer, + customer_address, + store +where + d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id = 7 + and d_moy = 11 + and d_year = 1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) + and ss_store_sk = s_store_sk +group by + i_brand, + i_brand_id, + i_manufact_id, + i_manufact +order by + ext_price desc, + i_brand, + i_brand_id, + i_manufact_id, + i_manufact +limit 100 +---- QUERYOPTIONS +ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE=false +---- PLAN +Max Per-Host Resource Reservation: Memory=16.27MB Threads=7 +Per-Host Resource Estimates: Memory=315MB +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=314.81MB mem-reservation=16.27MB thread-reservation=7 runtime-filters-memory=5.00MB +PLAN-ROOT SINK +| output exprs: i_brand_id, i_brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) +| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 +| +12:TOP-N [LIMIT=100] +| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC +| mem-estimate=7.38KB mem-reservation=0B thread-reservation=0 +| tuple-ids=7 row-size=76B cardinality=100 +| in pipelines: 12(GETNEXT), 11(OPEN) +| +11:AGGREGATE [FINALIZE] +| output: sum(ss_ext_sales_price) +| group by: i_brand, i_brand_id, i_manufact_id, i_manufact +| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=6 row-size=76B cardinality=1.72K +| in pipelines: 11(GETNEXT), 04(OPEN) +| +10:HASH JOIN [INNER JOIN] +| hash predicates: ss_store_sk = s_store_sk +| fk/pk conjuncts: ss_store_sk = s_store_sk +| other predicates: substr(ca_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) != substr(s_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) +| runtime filters: RF000[bloom] <- s_store_sk, RF001[min_max] <- s_store_sk +| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=4,3,1,2,0,5 row-size=158B cardinality=1.72K +| in pipelines: 04(GETNEXT), 05(OPEN) +| +|--05:SCAN HDFS [tpcds_parquet.store] +| HDFS partitions=1/1 files=1 size=9.93KB +| stored statistics: +| table: rows=12 size=9.93KB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=12 +| mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 +| tuple-ids=5 row-size=21B cardinality=12 +| in pipelines: 05(GETNEXT) +| +09:HASH JOIN [INNER JOIN] +| hash predicates: ca_address_sk = c_current_addr_sk +| fk/pk conjuncts: ca_address_sk = c_current_addr_sk +| runtime filters: RF002[bloom] <- c_current_addr_sk, RF003[min_max] <- c_current_addr_sk +| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=4,3,1,2,0 row-size=137B cardinality=1.72K +| in pipelines: 04(GETNEXT), 03(OPEN) +| +|--08:HASH JOIN [INNER JOIN] +| | hash predicates: c_customer_sk = ss_customer_sk +| | fk/pk conjuncts: c_customer_sk = ss_customer_sk +| | runtime filters: RF004[bloom] <- ss_customer_sk, RF005[min_max] <- ss_customer_sk +| | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| | tuple-ids=3,1,2,0 row-size=116B cardinality=1.72K +| | in pipelines: 03(GETNEXT), 01(OPEN) +| | +| |--07:HASH JOIN [INNER JOIN] +| | | hash predicates: ss_sold_date_sk = d_date_sk +| | | fk/pk conjuncts: ss_sold_date_sk = d_date_sk +| | | runtime filters: RF006[bloom] <- d_date_sk +| | | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| | | tuple-ids=1,2,0 row-size=108B cardinality=1.72K +| | | in pipelines: 01(GETNEXT), 00(OPEN) +| | | +| | |--00:SCAN HDFS [tpcds_parquet.date_dim] +| | | HDFS partitions=1/1 files=1 size=2.15MB +| | | predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| | | stored statistics: +| | | table: rows=73.05K size=2.15MB +| | | columns: all +| | | extrapolated-rows=disabled max-scan-range-rows=73.05K +| | | parquet statistics predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| | | parquet dictionary predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| | | mem-estimate=48.00MB mem-reservation=512.00KB thread-reservation=1 +| | | tuple-ids=0 row-size=12B cardinality=108 +| | | in pipelines: 00(GETNEXT) +| | | +| | 06:HASH JOIN [INNER JOIN] +| | | hash predicates: ss_item_sk = i_item_sk +| | | fk/pk conjuncts: ss_item_sk = i_item_sk +| | | runtime filters: RF008[bloom] <- i_item_sk, RF009[min_max] <- i_item_sk +| | | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| | | tuple-ids=1,2 row-size=96B cardinality=1.72K(filtered from 28.96K) +| | | in pipelines: 01(GETNEXT), 02(OPEN) +| | | +| | |--02:SCAN HDFS [tpcds_parquet.item] +| | | HDFS partitions=1/1 files=1 size=1.73MB +| | | predicates: i_manager_id = CAST(7 AS INT) +| | | stored statistics: +| | | table: rows=18.00K size=1.73MB +| | | columns: all +| | | extrapolated-rows=disabled max-scan-range-rows=18.00K +| | | parquet statistics predicates: i_manager_id = CAST(7 AS INT) +| | | parquet dictionary predicates: i_manager_id = CAST(7 AS INT) +| | | mem-estimate=96.00MB mem-reservation=512.00KB thread-reservation=1 +| | | tuple-ids=2 row-size=72B cardinality=181 +| | | in pipelines: 02(GETNEXT) +| | | +| | 01:SCAN HDFS [tpcds_parquet.store_sales] +| | HDFS partitions=1824/1824 files=1824 size=200.96MB +| | runtime filters: RF001[min_max] -> ss_store_sk, RF009[min_max] -> ss_item_sk, RF000[bloom] -> ss_store_sk, RF006[bloom] -> ss_sold_date_sk, RF008[bloom] -> ss_item_sk +| | stored statistics: +| | table: rows=2.88M size=200.96MB +| | partitions: 1824/1824 rows=2.88M +| | columns: all +| | extrapolated-rows=disabled max-scan-range-rows=130.09K est-scan-range=109(filtered from 1824) +| | mem-estimate=64.00MB mem-reservation=2.00MB thread-reservation=1 +| | tuple-ids=1 row-size=24B cardinality=1.72K(filtered from 2.88M) +| | in pipelines: 01(GETNEXT) +| | +| 03:SCAN HDFS [tpcds_parquet.customer] +| HDFS partitions=1/1 files=1 size=5.49MB +| runtime filters: RF005[min_max] -> c_customer_sk, RF004[bloom] -> c_customer_sk +| stored statistics: +| table: rows=100.00K size=5.49MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=100.00K +| mem-estimate=32.00MB mem-reservation=1.00MB thread-reservation=1 +| tuple-ids=3 row-size=8B cardinality=1.72K(filtered from 100.00K) +| in pipelines: 03(GETNEXT) +| +04:SCAN HDFS [tpcds_parquet.customer_address] + HDFS partitions=1/1 files=1 size=1.16MB + runtime filters: RF003[min_max] -> ca_address_sk, RF002[bloom] -> ca_address_sk + stored statistics: + table: rows=50.00K size=1.16MB + columns: all + extrapolated-rows=disabled max-scan-range-rows=50.00K + mem-estimate=32.00MB mem-reservation=512.00KB thread-reservation=1 + tuple-ids=4 row-size=21B cardinality=1.76K(filtered from 50.00K) + in pipelines: 04(GETNEXT) +---- DISTRIBUTEDPLAN +Max Per-Host Resource Reservation: Memory=31.08MB Threads=16 +Per-Host Resource Estimates: Memory=353MB +F09:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=4.03MB mem-reservation=4.00MB thread-reservation=1 +PLAN-ROOT SINK +| output exprs: i_brand_id, i_brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) +| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 +| +22:MERGING-EXCHANGE [UNPARTITIONED] +| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC +| limit: 100 +| mem-estimate=25.76KB mem-reservation=0B thread-reservation=0 +| tuple-ids=7 row-size=76B cardinality=100 +| in pipelines: 12(GETNEXT) +| +F08:PLAN FRAGMENT [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)] hosts=3 instances=3 +Per-Host Resources: mem-estimate=10.27MB mem-reservation=1.94MB thread-reservation=1 +12:TOP-N [LIMIT=100] +| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC +| mem-estimate=7.38KB mem-reservation=0B thread-reservation=0 +| tuple-ids=7 row-size=76B cardinality=100 +| in pipelines: 12(GETNEXT), 21(OPEN) +| +21:AGGREGATE [FINALIZE] +| output: sum:merge(ss_ext_sales_price) +| group by: i_brand, i_brand_id, i_manufact_id, i_manufact +| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=6 row-size=76B cardinality=1.72K +| in pipelines: 21(GETNEXT), 01(OPEN) +| +20:EXCHANGE [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)] +| mem-estimate=280.82KB mem-reservation=0B thread-reservation=0 +| tuple-ids=6 row-size=76B cardinality=1.72K +| in pipelines: 01(GETNEXT) +| +F06:PLAN FRAGMENT [HASH(c_current_addr_sk)] hosts=3 instances=3 +Per-Host Resources: mem-estimate=17.36MB mem-reservation=7.88MB thread-reservation=1 runtime-filters-memory=2.00MB +11:AGGREGATE [STREAMING] +| output: sum(ss_ext_sales_price) +| group by: i_brand, i_brand_id, i_manufact_id, i_manufact +| mem-estimate=10.00MB mem-reservation=2.00MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=6 row-size=76B cardinality=1.72K +| in pipelines: 01(GETNEXT) +| +10:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_store_sk = s_store_sk +| fk/pk conjuncts: ss_store_sk = s_store_sk +| other predicates: substr(ca_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) != substr(s_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) +| runtime filters: RF000[bloom] <- s_store_sk, RF001[min_max] <- s_store_sk +| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2,0,3,4,5 row-size=158B cardinality=1.72K +| in pipelines: 01(GETNEXT), 05(OPEN) +| +|--19:EXCHANGE [BROADCAST] +| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 +| | tuple-ids=5 row-size=21B cardinality=12 +| | in pipelines: 05(GETNEXT) +| | +| F07:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=32.10MB mem-reservation=16.00KB thread-reservation=2 +| 05:SCAN HDFS [tpcds_parquet.store, RANDOM] +| HDFS partitions=1/1 files=1 size=9.93KB +| stored statistics: +| table: rows=12 size=9.93KB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=12 +| mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1 +| tuple-ids=5 row-size=21B cardinality=12 +| in pipelines: 05(GETNEXT) +| +09:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: c_current_addr_sk = ca_address_sk +| fk/pk conjuncts: c_current_addr_sk = ca_address_sk +| runtime filters: RF002[bloom] <- ca_address_sk, RF003[min_max] <- ca_address_sk +| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2,0,3,4 row-size=137B cardinality=1.72K +| in pipelines: 01(GETNEXT), 04(OPEN) +| +|--18:EXCHANGE [HASH(ca_address_sk)] +| | mem-estimate=1.03MB mem-reservation=0B thread-reservation=0 +| | tuple-ids=4 row-size=21B cardinality=50.00K +| | in pipelines: 04(GETNEXT) +| | +| F05:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=32.29MB mem-reservation=512.00KB thread-reservation=2 +| 04:SCAN HDFS [tpcds_parquet.customer_address, RANDOM] +| HDFS partitions=1/1 files=1 size=1.16MB +| stored statistics: +| table: rows=50.00K size=1.16MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=50.00K +| mem-estimate=32.00MB mem-reservation=512.00KB thread-reservation=1 +| tuple-ids=4 row-size=21B cardinality=50.00K +| in pipelines: 04(GETNEXT) +| +17:EXCHANGE [HASH(c_current_addr_sk)] +| mem-estimate=459.16KB mem-reservation=0B thread-reservation=0 +| tuple-ids=1,2,0,3 row-size=116B cardinality=1.72K +| in pipelines: 01(GETNEXT) +| +F04:PLAN FRAGMENT [HASH(ss_customer_sk)] hosts=3 instances=3 +Per-Host Resources: mem-estimate=5.83MB mem-reservation=3.88MB thread-reservation=1 runtime-filters-memory=1.00MB +08:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: ss_customer_sk = c_customer_sk +| fk/pk conjuncts: ss_customer_sk = c_customer_sk +| runtime filters: RF004[bloom] <- c_customer_sk, RF005[min_max] <- c_customer_sk +| mem-estimate=2.88MB mem-reservation=2.88MB spill-buffer=128.00KB thread-reservation=0 +| tuple-ids=1,2,0,3 row-size=116B cardinality=1.72K +| in pipelines: 01(GETNEXT), 03(OPEN) +| +|--16:EXCHANGE [HASH(c_customer_sk)] +| | mem-estimate=793.25KB mem-reservation=0B thread-reservation=0 +| | tuple-ids=3 row-size=8B cardinality=100.00K +| | in pipelines: 03(GETNEXT) +| | +| F03:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=33.14MB mem-reservation=2.00MB thread-reservation=2 runtime-filters-memory=1.00MB +| 03:SCAN HDFS [tpcds_parquet.customer, RANDOM] +| HDFS partitions=1/1 files=1 size=5.49MB +| runtime filters: RF003[min_max] -> c_current_addr_sk, RF002[bloom] -> c_current_addr_sk +| stored statistics: +| table: rows=100.00K size=5.49MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=100.00K +| mem-estimate=32.00MB mem-reservation=1.00MB thread-reservation=1 +| tuple-ids=3 row-size=8B cardinality=100.00K +| in pipelines: 03(GETNEXT) +| +15:EXCHANGE [HASH(ss_customer_sk)] +| mem-estimate=418.69KB mem-reservation=0B thread-reservation=0 +| tuple-ids=1,2,0 row-size=108B cardinality=1.72K +| in pipelines: 01(GETNEXT) +| +F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 +Per-Host Resources: mem-estimate=73.28MB mem-reservation=9.88MB thread-reservation=2 runtime-filters-memory=4.00MB +07:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_sold_date_sk = d_date_sk +| fk/pk conjuncts: ss_sold_date_sk = d_date_sk +| runtime filters: RF006[bloom] <- d_date_sk +| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2,0 row-size=108B cardinality=1.72K +| in pipelines: 01(GETNEXT), 00(OPEN) +| +|--14:EXCHANGE [BROADCAST] +| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 +| | tuple-ids=0 row-size=12B cardinality=108 +| | in pipelines: 00(GETNEXT) +| | +| F02:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=48.06MB mem-reservation=512.00KB thread-reservation=2 +| 00:SCAN HDFS [tpcds_parquet.date_dim, RANDOM] +| HDFS partitions=1/1 files=1 size=2.15MB +| predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| stored statistics: +| table: rows=73.05K size=2.15MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=73.05K +| parquet statistics predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| parquet dictionary predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| mem-estimate=48.00MB mem-reservation=512.00KB thread-reservation=1 +| tuple-ids=0 row-size=12B cardinality=108 +| in pipelines: 00(GETNEXT) +| +06:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_item_sk = i_item_sk +| fk/pk conjuncts: ss_item_sk = i_item_sk +| runtime filters: RF008[bloom] <- i_item_sk, RF009[min_max] <- i_item_sk +| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2 row-size=96B cardinality=1.72K(filtered from 28.96K) +| in pipelines: 01(GETNEXT), 02(OPEN) +| +|--13:EXCHANGE [BROADCAST] +| | mem-estimate=26.00KB mem-reservation=0B thread-reservation=0 +| | tuple-ids=2 row-size=72B cardinality=181 +| | in pipelines: 02(GETNEXT) +| | +| F01:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=96.30MB mem-reservation=512.00KB thread-reservation=2 +| 02:SCAN HDFS [tpcds_parquet.item, RANDOM] +| HDFS partitions=1/1 files=1 size=1.73MB +| predicates: i_manager_id = CAST(7 AS INT) +| stored statistics: +| table: rows=18.00K size=1.73MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=18.00K +| parquet statistics predicates: i_manager_id = CAST(7 AS INT) +| parquet dictionary predicates: i_manager_id = CAST(7 AS INT) +| mem-estimate=96.00MB mem-reservation=512.00KB thread-reservation=1 +| tuple-ids=2 row-size=72B cardinality=181 +| in pipelines: 02(GETNEXT) +| +01:SCAN HDFS [tpcds_parquet.store_sales, RANDOM] + HDFS partitions=1824/1824 files=1824 size=200.96MB + runtime filters: RF001[min_max] -> ss_store_sk, RF005[min_max] -> ss_customer_sk, RF009[min_max] -> ss_item_sk, RF000[bloom] -> ss_store_sk, RF004[bloom] -> ss_customer_sk, RF006[bloom] -> ss_sold_date_sk, RF008[bloom] -> ss_item_sk + stored statistics: + table: rows=2.88M size=200.96MB + partitions: 1824/1824 rows=2.88M + columns: all + extrapolated-rows=disabled max-scan-range-rows=130.09K est-scan-range=109(filtered from 1824) + mem-estimate=64.00MB mem-reservation=2.00MB thread-reservation=1 + tuple-ids=1 row-size=24B cardinality=1.72K(filtered from 2.88M) + in pipelines: 01(GETNEXT) +---- PARALLELPLANS +Max Per-Host Resource Reservation: Memory=49.77MB Threads=21 +Per-Host Resource Estimates: Memory=212MB +F09:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Instance Resources: mem-estimate=4.05MB mem-reservation=4.00MB thread-reservation=1 +PLAN-ROOT SINK +| output exprs: i_brand_id, i_brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) +| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 +| +22:MERGING-EXCHANGE [UNPARTITIONED] +| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC +| limit: 100 +| mem-estimate=49.06KB mem-reservation=0B thread-reservation=0 +| tuple-ids=7 row-size=76B cardinality=100 +| in pipelines: 12(GETNEXT) +| +F08:PLAN FRAGMENT [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)] hosts=3 instances=6 +Per-Instance Resources: mem-estimate=10.51MB mem-reservation=1.94MB thread-reservation=1 +12:TOP-N [LIMIT=100] +| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC +| mem-estimate=7.38KB mem-reservation=0B thread-reservation=0 +| tuple-ids=7 row-size=76B cardinality=100 +| in pipelines: 12(GETNEXT), 21(OPEN) +| +21:AGGREGATE [FINALIZE] +| output: sum:merge(ss_ext_sales_price) +| group by: i_brand, i_brand_id, i_manufact_id, i_manufact +| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=6 row-size=76B cardinality=1.72K +| in pipelines: 21(GETNEXT), 01(OPEN) +| +20:EXCHANGE [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)] +| mem-estimate=519.44KB mem-reservation=0B thread-reservation=0 +| tuple-ids=6 row-size=76B cardinality=1.72K +| in pipelines: 01(GETNEXT) +| +F06:PLAN FRAGMENT [HASH(c_current_addr_sk)] hosts=3 instances=6 +Per-Instance Resources: mem-estimate=12.70MB mem-reservation=2.00MB thread-reservation=1 +11:AGGREGATE [STREAMING] +| output: sum(ss_ext_sales_price) +| group by: i_brand, i_brand_id, i_manufact_id, i_manufact +| mem-estimate=10.00MB mem-reservation=2.00MB spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=6 row-size=76B cardinality=1.72K +| in pipelines: 01(GETNEXT) +| +10:HASH JOIN [INNER JOIN, BROADCAST] +| hash-table-id=00 +| hash predicates: ss_store_sk = s_store_sk +| fk/pk conjuncts: ss_store_sk = s_store_sk +| other predicates: substr(ca_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) != substr(s_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) +| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2,0,3,4,5 row-size=158B cardinality=1.72K +| in pipelines: 01(GETNEXT), 05(OPEN) +| +|--F10:PLAN FRAGMENT [HASH(c_current_addr_sk)] hosts=3 instances=3 +| | Per-Instance Resources: mem-estimate=4.89MB mem-reservation=4.88MB thread-reservation=1 runtime-filters-memory=1.00MB +| JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: s_store_sk +| | runtime filters: RF000[bloom] <- s_store_sk, RF001[min_max] <- s_store_sk +| | mem-estimate=3.88MB mem-reservation=3.88MB spill-buffer=64.00KB thread-reservation=0 +| | +| 19:EXCHANGE [BROADCAST] +| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 +| | tuple-ids=5 row-size=21B cardinality=12 +| | in pipelines: 05(GETNEXT) +| | +| F07:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Instance Resources: mem-estimate=16.10MB mem-reservation=16.00KB thread-reservation=1 +| 05:SCAN HDFS [tpcds_parquet.store, RANDOM] +| HDFS partitions=1/1 files=1 size=9.93KB +| stored statistics: +| table: rows=12 size=9.93KB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=12 +| mem-estimate=16.00MB mem-reservation=16.00KB thread-reservation=0 +| tuple-ids=5 row-size=21B cardinality=12 +| in pipelines: 05(GETNEXT) +| +09:HASH JOIN [INNER JOIN, PARTITIONED] +| hash-table-id=01 +| hash predicates: c_current_addr_sk = ca_address_sk +| fk/pk conjuncts: c_current_addr_sk = ca_address_sk +| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2,0,3,4 row-size=137B cardinality=1.72K +| in pipelines: 01(GETNEXT), 04(OPEN) +| +|--F11:PLAN FRAGMENT [HASH(c_current_addr_sk)] hosts=3 instances=6 +| | Per-Instance Resources: mem-estimate=3.96MB mem-reservation=2.94MB thread-reservation=1 runtime-filters-memory=1.00MB +| JOIN BUILD +| | join-table-id=01 plan-id=02 cohort-id=01 +| | build expressions: ca_address_sk +| | runtime filters: RF002[bloom] <- ca_address_sk, RF003[min_max] <- ca_address_sk +| | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| | +| 18:EXCHANGE [HASH(ca_address_sk)] +| | mem-estimate=1.03MB mem-reservation=0B thread-reservation=0 +| | tuple-ids=4 row-size=21B cardinality=50.00K +| | in pipelines: 04(GETNEXT) +| | +| F05:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Instance Resources: mem-estimate=16.59MB mem-reservation=512.00KB thread-reservation=1 +| 04:SCAN HDFS [tpcds_parquet.customer_address, RANDOM] +| HDFS partitions=1/1 files=1 size=1.16MB +| stored statistics: +| table: rows=50.00K size=1.16MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=50.00K +| mem-estimate=16.00MB mem-reservation=512.00KB thread-reservation=0 +| tuple-ids=4 row-size=21B cardinality=50.00K +| in pipelines: 04(GETNEXT) +| +17:EXCHANGE [HASH(c_current_addr_sk)] +| mem-estimate=853.78KB mem-reservation=0B thread-reservation=0 +| tuple-ids=1,2,0,3 row-size=116B cardinality=1.72K +| in pipelines: 01(GETNEXT) +| +F04:PLAN FRAGMENT [HASH(ss_customer_sk)] hosts=3 instances=6 +Per-Instance Resources: mem-estimate=3.84MB mem-reservation=0B thread-reservation=1 +08:HASH JOIN [INNER JOIN, PARTITIONED] +| hash-table-id=02 +| hash predicates: ss_customer_sk = c_customer_sk +| fk/pk conjuncts: ss_customer_sk = c_customer_sk +| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2,0,3 row-size=116B cardinality=1.72K +| in pipelines: 01(GETNEXT), 03(OPEN) +| +|--F12:PLAN FRAGMENT [HASH(ss_customer_sk)] hosts=3 instances=6 +| | Per-Instance Resources: mem-estimate=3.71MB mem-reservation=2.94MB thread-reservation=1 runtime-filters-memory=1.00MB +| JOIN BUILD +| | join-table-id=02 plan-id=03 cohort-id=01 +| | build expressions: c_customer_sk +| | runtime filters: RF004[bloom] <- c_customer_sk, RF005[min_max] <- c_customer_sk +| | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 +| | +| 16:EXCHANGE [HASH(c_customer_sk)] +| | mem-estimate=793.25KB mem-reservation=0B thread-reservation=0 +| | tuple-ids=3 row-size=8B cardinality=100.00K +| | in pipelines: 03(GETNEXT) +| | +| F03:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Host Shared Resources: mem-estimate=1.00MB mem-reservation=1.00MB thread-reservation=0 runtime-filters-memory=1.00MB +| Per-Instance Resources: mem-estimate=16.28MB mem-reservation=1.00MB thread-reservation=1 +| 03:SCAN HDFS [tpcds_parquet.customer, RANDOM] +| HDFS partitions=1/1 files=1 size=5.49MB +| runtime filters: RF003[min_max] -> c_current_addr_sk, RF002[bloom] -> c_current_addr_sk +| stored statistics: +| table: rows=100.00K size=5.49MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=100.00K +| mem-estimate=16.00MB mem-reservation=1.00MB thread-reservation=0 +| tuple-ids=3 row-size=8B cardinality=100.00K +| in pipelines: 03(GETNEXT) +| +15:EXCHANGE [HASH(ss_customer_sk)] +| mem-estimate=777.31KB mem-reservation=0B thread-reservation=0 +| tuple-ids=1,2,0 row-size=108B cardinality=1.72K +| in pipelines: 01(GETNEXT) +| +F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 +Per-Host Shared Resources: mem-estimate=4.00MB mem-reservation=4.00MB thread-reservation=0 runtime-filters-memory=4.00MB +Per-Instance Resources: mem-estimate=18.80MB mem-reservation=2.00MB thread-reservation=1 +07:HASH JOIN [INNER JOIN, BROADCAST] +| hash-table-id=03 +| hash predicates: ss_sold_date_sk = d_date_sk +| fk/pk conjuncts: ss_sold_date_sk = d_date_sk +| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2,0 row-size=108B cardinality=1.72K +| in pipelines: 01(GETNEXT), 00(OPEN) +| +|--F13:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 +| | Per-Instance Resources: mem-estimate=4.89MB mem-reservation=4.88MB thread-reservation=1 runtime-filters-memory=1.00MB +| JOIN BUILD +| | join-table-id=03 plan-id=04 cohort-id=01 +| | build expressions: d_date_sk +| | runtime filters: RF006[bloom] <- d_date_sk +| | mem-estimate=3.88MB mem-reservation=3.88MB spill-buffer=64.00KB thread-reservation=0 +| | +| 14:EXCHANGE [BROADCAST] +| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0 +| | tuple-ids=0 row-size=12B cardinality=108 +| | in pipelines: 00(GETNEXT) +| | +| F02:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Instance Resources: mem-estimate=16.06MB mem-reservation=512.00KB thread-reservation=1 +| 00:SCAN HDFS [tpcds_parquet.date_dim, RANDOM] +| HDFS partitions=1/1 files=1 size=2.15MB +| predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| stored statistics: +| table: rows=73.05K size=2.15MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=73.05K +| parquet statistics predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| parquet dictionary predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT) +| mem-estimate=16.00MB mem-reservation=512.00KB thread-reservation=0 +| tuple-ids=0 row-size=12B cardinality=108 +| in pipelines: 00(GETNEXT) +| +06:HASH JOIN [INNER JOIN, BROADCAST] +| hash-table-id=04 +| hash predicates: ss_item_sk = i_item_sk +| fk/pk conjuncts: ss_item_sk = i_item_sk +| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0 +| tuple-ids=1,2 row-size=96B cardinality=1.72K(filtered from 28.96K) +| in pipelines: 01(GETNEXT), 02(OPEN) +| +|--F14:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 +| | Per-Instance Resources: mem-estimate=4.90MB mem-reservation=4.88MB thread-reservation=1 runtime-filters-memory=1.00MB +| JOIN BUILD +| | join-table-id=04 plan-id=05 cohort-id=01 +| | build expressions: i_item_sk +| | runtime filters: RF008[bloom] <- i_item_sk, RF009[min_max] <- i_item_sk +| | mem-estimate=3.88MB mem-reservation=3.88MB spill-buffer=64.00KB thread-reservation=0 +| | +| 13:EXCHANGE [BROADCAST] +| | mem-estimate=26.00KB mem-reservation=0B thread-reservation=0 +| | tuple-ids=2 row-size=72B cardinality=181 +| | in pipelines: 02(GETNEXT) +| | +| F01:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 +| Per-Instance Resources: mem-estimate=16.30MB mem-reservation=512.00KB thread-reservation=1 +| 02:SCAN HDFS [tpcds_parquet.item, RANDOM] +| HDFS partitions=1/1 files=1 size=1.73MB +| predicates: i_manager_id = CAST(7 AS INT) +| stored statistics: +| table: rows=18.00K size=1.73MB +| columns: all +| extrapolated-rows=disabled max-scan-range-rows=18.00K +| parquet statistics predicates: i_manager_id = CAST(7 AS INT) +| parquet dictionary predicates: i_manager_id = CAST(7 AS INT) +| mem-estimate=16.00MB mem-reservation=512.00KB thread-reservation=0 +| tuple-ids=2 row-size=72B cardinality=181 +| in pipelines: 02(GETNEXT) +| +01:SCAN HDFS [tpcds_parquet.store_sales, RANDOM] + HDFS partitions=1824/1824 files=1824 size=200.96MB + runtime filters: RF001[min_max] -> ss_store_sk, RF005[min_max] -> ss_customer_sk, RF009[min_max] -> ss_item_sk, RF000[bloom] -> ss_store_sk, RF004[bloom] -> ss_customer_sk, RF006[bloom] -> ss_sold_date_sk, RF008[bloom] -> ss_item_sk + stored statistics: + table: rows=2.88M size=200.96MB + partitions: 1824/1824 rows=2.88M + columns: all + extrapolated-rows=disabled max-scan-range-rows=130.09K est-scan-range=109(filtered from 1824) + mem-estimate=16.00MB mem-reservation=2.00MB thread-reservation=0 + tuple-ids=1 row-size=24B cardinality=1.72K(filtered from 2.88M) + in pipelines: 01(GETNEXT) +====
