This is an automated email from the ASF dual-hosted git repository. yumwang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d627d8e4f48 [SPARK-40248][SQL] Use larger number of bits to build Bloom filter d627d8e4f48 is described below commit d627d8e4f4802b8200574a1a73c4bebe5d813a5a Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Wed Nov 2 18:05:54 2022 +0800 [SPARK-40248][SQL] Use larger number of bits to build Bloom filter ### What changes were proposed in this pull request? This PR makes Bloom filter join use larger number of bits to build Bloom filter if row count is exist. ### Why are the changes needed? To fix Bloom filter join cannot filter out more data when CBO is enabled. For example: TPC-DS q64: CBO is enabled | CBO is disabled -- | -- <img width="282" height="600" alt="image" src="https://user-images.githubusercontent.com/5399861/187076753-2e9ccc72-0289-4537-a6d9-3a01a37bf6cd.png"> | <img width="373" height="600" alt="image" src="https://user-images.githubusercontent.com/5399861/187076786-c982e711-52e2-4199-ba42-e1100f57287b.png"> <img width="532" height="400" alt="image" src="https://user-images.githubusercontent.com/5399861/187075553-bd6956b7-8f1f-4df5-82b7-d010defb6d21.png"> | <img width="622" height="400" alt="image" src="https://user-images.githubusercontent.com/5399861/187075588-254c3246-b9af-403c-8df7-d8344fd1d2a4.png"> After this PR: Build bloom filter | Filter data -- | -- <img width="262" height="600" alt="image" src="https://user-images.githubusercontent.com/5399861/187075676-85b2afae-03a0-4430-9c4e-2679c6ef62f7.png"> | <img width="509" height="600" alt="image" src="https://user-images.githubusercontent.com/5399861/187075713-41173dc1-d01d-476a-b218-5c67be823e1b.png"> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #37697 from wangyum/SPARK-40248. Lead-authored-by: Yuming Wang <yumw...@ebay.com> Co-authored-by: Yuming Wang <wgy...@gmail.com> Signed-off-by: Yuming Wang <yumw...@ebay.com> --- .../org/apache/spark/util/sketch/BloomFilter.java | 9 ++++++++ .../aggregate/BloomFilterAggregate.scala | 16 +++++++++++---- .../catalyst/optimizer/InjectRuntimeFilter.scala | 3 +-- .../approved-plans-modified/q10.sf100/explain.txt | 8 ++++---- .../q10.sf100/simplified.txt | 2 +- .../approved-plans-modified/q59.sf100/explain.txt | 16 +++++++-------- .../q59.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q10.sf100/explain.txt | 8 ++++---- .../approved-plans-v1_4/q10.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q16.sf100/explain.txt | 24 +++++++++++----------- .../approved-plans-v1_4/q16.sf100/simplified.txt | 6 +++--- .../approved-plans-v1_4/q2.sf100/explain.txt | 16 +++++++-------- .../approved-plans-v1_4/q2.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q32.sf100/explain.txt | 8 ++++---- .../approved-plans-v1_4/q32.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q40.sf100/explain.txt | 8 ++++---- .../approved-plans-v1_4/q40.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q59.sf100/explain.txt | 16 +++++++-------- .../approved-plans-v1_4/q59.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q64.sf100/explain.txt | 8 ++++---- .../approved-plans-v1_4/q64.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q69.sf100/explain.txt | 8 ++++---- .../approved-plans-v1_4/q69.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q80.sf100/explain.txt | 16 +++++++-------- .../approved-plans-v1_4/q80.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q85.sf100/explain.txt | 16 +++++++-------- .../approved-plans-v1_4/q85.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q92.sf100/explain.txt | 8 ++++---- .../approved-plans-v1_4/q92.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q94.sf100/explain.txt | 24 +++++++++++----------- .../approved-plans-v1_4/q94.sf100/simplified.txt | 6 +++--- .../approved-plans-v1_4/q95.sf100/explain.txt | 24 +++++++++++----------- .../approved-plans-v1_4/q95.sf100/simplified.txt | 6 +++--- .../approved-plans-v2_7/q10a.sf100/explain.txt | 8 ++++---- .../approved-plans-v2_7/q10a.sf100/simplified.txt | 2 +- .../approved-plans-v2_7/q64.sf100/explain.txt | 8 ++++---- .../approved-plans-v2_7/q64.sf100/simplified.txt | 2 +- .../approved-plans-v2_7/q80a.sf100/explain.txt | 16 +++++++-------- .../approved-plans-v2_7/q80a.sf100/simplified.txt | 4 ++-- .../spark/sql/BloomFilterAggregateQuerySuite.scala | 17 +++++++++++++++ 40 files changed, 189 insertions(+), 156 deletions(-) diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java index 2a6e270a912..5c01841e501 100644 --- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java +++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java @@ -207,6 +207,15 @@ public abstract class BloomFilter { static final double DEFAULT_FPP = 0.03; + /** + * Computes m (total bits of Bloom filter) which is expected to achieve. + * The smaller the expectedNumItems, the smaller the fpp. + */ + public static long optimalNumOfBits(long expectedNumItems, long maxNumItems, long maxNumOfBits) { + double fpp = Math.min(expectedNumItems / (maxNumItems / DEFAULT_FPP), DEFAULT_FPP); + return Math.min(optimalNumOfBits(expectedNumItems, fpp), maxNumOfBits); + } + /** * Creates a {@link BloomFilter} with the expected number of insertions and a default expected * false positive probability of 3%. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala index 5b78c5b5228..980785e764c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLId, toSQLType, toSQLValue} import org.apache.spark.sql.catalyst.trees.TernaryLike import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.{RUNTIME_BLOOM_FILTER_MAX_NUM_BITS, RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS} import org.apache.spark.sql.types._ import org.apache.spark.util.sketch.BloomFilter @@ -56,6 +57,13 @@ case class BloomFilterAggregate( Multiply(estimatedNumItemsExpression, Literal(8L))) } + def this(child: Expression, estimatedNumItems: Long) = { + this(child, Literal(estimatedNumItems), + Literal(BloomFilter.optimalNumOfBits(estimatedNumItems, + SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS), + SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)))) + } + def this(child: Expression) = { this(child, Literal(SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_EXPECTED_NUM_ITEMS)), Literal(SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_NUM_BITS))) @@ -109,8 +117,8 @@ case class BloomFilterAggregate( ) } else { require(estimatedNumItems <= - SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS)) - require(numBits <= SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)) + SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS)) + require(numBits <= SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)) TypeCheckSuccess } case _ => @@ -135,12 +143,12 @@ case class BloomFilterAggregate( // Mark as lazy so that `estimatedNumItems` is not evaluated during tree transformation. private lazy val estimatedNumItems: Long = Math.min(estimatedNumItemsExpression.eval().asInstanceOf[Number].longValue, - SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS)) + SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS)) // Mark as lazy so that `numBits` is not evaluated during tree transformation. private lazy val numBits: Long = Math.min(numBitsExpression.eval().asInstanceOf[Number].longValue, - SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)) + SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)) override def first: Expression = child diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala index 8c63012c681..62782f6051b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala @@ -77,8 +77,7 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J val rowCount = filterCreationSidePlan.stats.rowCount val bloomFilterAgg = if (rowCount.isDefined && rowCount.get.longValue > 0L) { - new BloomFilterAggregate(new XxHash64(Seq(filterCreationSideExp)), - Literal(rowCount.get.longValue)) + new BloomFilterAggregate(new XxHash64(Seq(filterCreationSideExp)), rowCount.get.longValue) } else { new BloomFilterAggregate(new XxHash64(Seq(filterCreationSideExp))) } diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt index efd0db46b9f..8e472ce0479 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt @@ -304,7 +304,7 @@ Input [2]: [ca_address_sk#19, ca_county#20] (53) ObjectHashAggregate Input [1]: [ca_address_sk#19] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 20440, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 57765, 0, 0)] Aggregate Attributes [1]: [buf#39] Results [1]: [buf#40] @@ -315,9 +315,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (55) ObjectHashAggregate Input [1]: [buf#40] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 20440, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 20440, 0, 0)#41] -Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 20440, 0, 0)#41 AS bloomFilter#42] +Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 57765, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 57765, 0, 0)#41] +Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 57765, 0, 0)#41 AS bloomFilter#42] Subquery:2 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (60) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt index 9adf7d0719d..4ac6e8e08a0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt @@ -25,7 +25,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha WholeStageCodegen (1) Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 20440, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 57765, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [ca_address_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt index c021a12eca0..5ce802dabc2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt @@ -348,7 +348,7 @@ Input [2]: [d_month_seq#40, d_week_seq#41] (59) ObjectHashAggregate Input [1]: [d_week_seq#41] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 2680, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)] Aggregate Attributes [1]: [buf#85] Results [1]: [buf#86] @@ -359,9 +359,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (61) ObjectHashAggregate Input [1]: [buf#86] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 2680, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 2680, 0, 0)#87] -Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 2680, 0, 0)#87 AS bloomFilter#88] +Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)#87] +Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)#87 AS bloomFilter#88] Subquery:2 Hosting operator id = 31 Hosting Expression = Subquery scalar-subquery#52, [id=#53] ObjectHashAggregate (68) @@ -394,7 +394,7 @@ Input [2]: [d_month_seq#68, d_week_seq#69] (66) ObjectHashAggregate Input [1]: [d_week_seq#69] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 2680, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 8990, 0, 0)] Aggregate Attributes [1]: [buf#89] Results [1]: [buf#90] @@ -405,8 +405,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (68) ObjectHashAggregate Input [1]: [buf#90] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 2680, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 2680, 0, 0)#91] -Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 2680, 0, 0)#91 AS bloomFilter#92] +Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 8990, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 8990, 0, 0)#91] +Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 8990, 0, 0)#91 AS bloomFilter#92] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt index 9825affd389..534396577ab 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt @@ -22,7 +22,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s WholeStageCodegen (1) Filter [d_date_sk,d_week_seq] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 2680, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 8990, 0, 0),bloomFilter,buf] Exchange #3 ObjectHashAggregate [d_week_seq] [buf,buf] WholeStageCodegen (1) @@ -72,7 +72,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s WholeStageCodegen (5) Filter [d_date_sk,d_week_seq] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 2680, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 8990, 0, 0),bloomFilter,buf] Exchange #9 ObjectHashAggregate [d_week_seq] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt index 7163f161188..d3434fd6be9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt @@ -320,7 +320,7 @@ Input [2]: [ca_address_sk#18, ca_county#19] (56) ObjectHashAggregate Input [1]: [ca_address_sk#18] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 20440, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)] Aggregate Attributes [1]: [buf#38] Results [1]: [buf#39] @@ -331,9 +331,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (58) ObjectHashAggregate Input [1]: [buf#39] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 20440, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 20440, 0, 0)#40] -Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 20440, 0, 0)#40 AS bloomFilter#41] +Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)#40] +Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)#40 AS bloomFilter#41] Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10 BroadcastExchange (63) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt index 7930a3a4516..9528756e264 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt @@ -32,7 +32,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha WholeStageCodegen (1) Filter [c_current_addr_sk,c_current_cdemo_sk] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 20440, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 57765, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [ca_address_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt index 79c4fa5c0a4..aadf72d0af2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt @@ -291,7 +291,7 @@ Input [2]: [ca_address_sk#20, ca_state#21] (50) ObjectHashAggregate Input [1]: [ca_address_sk#20] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 143688, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)] Aggregate Attributes [1]: [buf#35] Results [1]: [buf#36] @@ -302,9 +302,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (52) ObjectHashAggregate Input [1]: [buf#36] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 143688, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 143688, 0, 0)#37] -Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 143688, 0, 0)#37 AS bloomFilter#38] +Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)#37] +Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)#37 AS bloomFilter#38] Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] ObjectHashAggregate (59) @@ -337,7 +337,7 @@ Input [2]: [cc_call_center_sk#22, cc_county#23] (57) ObjectHashAggregate Input [1]: [cc_call_center_sk#22] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 32, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 144, 0, 0)] Aggregate Attributes [1]: [buf#39] Results [1]: [buf#40] @@ -348,9 +348,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (59) ObjectHashAggregate Input [1]: [buf#40] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 32, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 32, 0, 0)#41] -Results [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 32, 0, 0)#41 AS bloomFilter#42] +Functions [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 144, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 144, 0, 0)#41] +Results [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 144, 0, 0)#41 AS bloomFilter#42] Subquery:3 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14] ObjectHashAggregate (66) @@ -383,7 +383,7 @@ Input [2]: [d_date_sk#24, d_date#25] (64) ObjectHashAggregate Input [1]: [d_date_sk#24] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 584392, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)] Aggregate Attributes [1]: [buf#43] Results [1]: [buf#44] @@ -394,8 +394,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (66) ObjectHashAggregate Input [1]: [buf#44] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 584392, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 584392, 0, 0)#45] -Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 584392, 0, 0)#45 AS bloomFilter#46] +Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)#45] +Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)#45 AS bloomFilter#46] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt index 4978dd33735..def1677f944 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt @@ -26,7 +26,7 @@ WholeStageCodegen (12) Project [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit] Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 143688, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 333176, 0, 0),bloomFilter,buf] Exchange #3 ObjectHashAggregate [ca_address_sk] [buf,buf] WholeStageCodegen (1) @@ -36,7 +36,7 @@ WholeStageCodegen (12) InputAdapter Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(cc_call_center_sk, 42), 4, 32, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(cc_call_center_sk, 42), 4, 144, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [cc_call_center_sk] [buf,buf] WholeStageCodegen (1) @@ -46,7 +46,7 @@ WholeStageCodegen (12) InputAdapter Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_county] Subquery #3 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 584392, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 1141755, 0, 0),bloomFilter,buf] Exchange #5 ObjectHashAggregate [d_date_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt index da1ba5bd46d..3b189de7d34 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt @@ -323,7 +323,7 @@ Input [2]: [d_week_seq#42, d_year#43] (56) ObjectHashAggregate Input [1]: [d_week_seq#42] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 2896, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 9656, 0, 0)] Aggregate Attributes [1]: [buf#85] Results [1]: [buf#86] @@ -334,9 +334,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (58) ObjectHashAggregate Input [1]: [buf#86] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 2896, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 2896, 0, 0)#87] -Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 2896, 0, 0)#87 AS bloomFilter#88] +Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 9656, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 9656, 0, 0)#87] +Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 9656, 0, 0)#87 AS bloomFilter#88] Subquery:2 Hosting operator id = 33 Hosting Expression = Subquery scalar-subquery#52, [id=#53] ObjectHashAggregate (65) @@ -369,7 +369,7 @@ Input [2]: [d_week_seq#68, d_year#69] (63) ObjectHashAggregate Input [1]: [d_week_seq#68] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 2896, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 9656, 0, 0)] Aggregate Attributes [1]: [buf#89] Results [1]: [buf#90] @@ -380,8 +380,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (65) ObjectHashAggregate Input [1]: [buf#90] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 2896, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 2896, 0, 0)#91] -Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 2896, 0, 0)#91 AS bloomFilter#92] +Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 9656, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 9656, 0, 0)#91] +Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 9656, 0, 0)#91 AS bloomFilter#92] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt index e322eae9788..4fb858b4252 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt @@ -31,7 +31,7 @@ WholeStageCodegen (13) WholeStageCodegen (3) Filter [d_date_sk,d_week_seq] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 362, 2896, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 362, 9656, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [d_week_seq] [buf,buf] WholeStageCodegen (1) @@ -80,7 +80,7 @@ WholeStageCodegen (13) WholeStageCodegen (8) Filter [d_date_sk,d_week_seq] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 362, 2896, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 362, 9656, 0, 0),bloomFilter,buf] Exchange #9 ObjectHashAggregate [d_week_seq] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt index e91bca4b7c2..74b51485aea 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt @@ -205,7 +205,7 @@ Input [2]: [i_item_sk#1, i_manufact_id#2] (34) ObjectHashAggregate Input [1]: [i_item_sk#1] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 1592, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)] Aggregate Attributes [1]: [buf#24] Results [1]: [buf#25] @@ -216,9 +216,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (36) ObjectHashAggregate Input [1]: [buf#25] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 1592, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 1592, 0, 0)#26] -Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 1592, 0, 0)#26 AS bloomFilter#27] +Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)#26] +Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)#26 AS bloomFilter#27] Subquery:2 Hosting operator id = 6 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (41) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt index 48c90602954..084b50e2c0e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt @@ -31,7 +31,7 @@ WholeStageCodegen (7) BroadcastHashJoin [cs_sold_date_sk,d_date_sk] Filter [cs_item_sk] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 199, 1592, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 199, 5556, 0, 0),bloomFilter,buf] Exchange #6 ObjectHashAggregate [i_item_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt index 9387f21d9c8..55ba768476e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt @@ -221,7 +221,7 @@ Input [2]: [i_item_sk#13, i_current_price#15] (38) ObjectHashAggregate Input [1]: [i_item_sk#13] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 8152, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 24988, 0, 0)] Aggregate Attributes [1]: [buf#32] Results [1]: [buf#33] @@ -232,9 +232,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (40) ObjectHashAggregate Input [1]: [buf#33] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 8152, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 8152, 0, 0)#34] -Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 8152, 0, 0)#34 AS bloomFilter#35] +Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 24988, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 24988, 0, 0)#34] +Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 24988, 0, 0)#34 AS bloomFilter#35] Subquery:2 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (44) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt index e95fab855f3..4368e7b605c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt @@ -21,7 +21,7 @@ TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after] WholeStageCodegen (1) Filter [cs_warehouse_sk,cs_item_sk] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1019, 8152, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1019, 24988, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [i_item_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt index 05660cb90fa..1aa4410e295 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt @@ -348,7 +348,7 @@ Input [2]: [d_month_seq#40, d_week_seq#41] (59) ObjectHashAggregate Input [1]: [d_week_seq#41] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 2680, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)] Aggregate Attributes [1]: [buf#88] Results [1]: [buf#89] @@ -359,9 +359,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (61) ObjectHashAggregate Input [1]: [buf#89] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 2680, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 2680, 0, 0)#90] -Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 2680, 0, 0)#90 AS bloomFilter#91] +Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)#90] +Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)#90 AS bloomFilter#91] Subquery:2 Hosting operator id = 31 Hosting Expression = Subquery scalar-subquery#52, [id=#53] ObjectHashAggregate (68) @@ -394,7 +394,7 @@ Input [2]: [d_month_seq#70, d_week_seq#71] (66) ObjectHashAggregate Input [1]: [d_week_seq#71] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 2680, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 8990, 0, 0)] Aggregate Attributes [1]: [buf#92] Results [1]: [buf#93] @@ -405,8 +405,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (68) ObjectHashAggregate Input [1]: [buf#93] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 2680, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 2680, 0, 0)#94] -Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 2680, 0, 0)#94 AS bloomFilter#95] +Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 8990, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 8990, 0, 0)#94] +Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 8990, 0, 0)#94 AS bloomFilter#95] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt index f7d471cf48f..62f4fab4891 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt @@ -22,7 +22,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s WholeStageCodegen (1) Filter [d_date_sk,d_week_seq] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 2680, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 8990, 0, 0),bloomFilter,buf] Exchange #3 ObjectHashAggregate [d_week_seq] [buf,buf] WholeStageCodegen (1) @@ -72,7 +72,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s WholeStageCodegen (5) Filter [d_date_sk,d_week_seq] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 2680, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 8990, 0, 0),bloomFilter,buf] Exchange #9 ObjectHashAggregate [d_week_seq] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt index d102d1c3642..8a57ad7ce8d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt @@ -1176,7 +1176,7 @@ Input [3]: [i_item_sk#75, i_current_price#76, i_color#77] (214) ObjectHashAggregate Input [1]: [i_item_sk#75] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 10000, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)] Aggregate Attributes [1]: [buf#176] Results [1]: [buf#177] @@ -1187,9 +1187,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=32] (216) ObjectHashAggregate Input [1]: [buf#177] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 10000, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 10000, 0, 0)#178] -Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 10000, 0, 0)#178 AS bloomFilter#179] +Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)#178] +Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)#178 AS bloomFilter#179] Subquery:2 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13 BroadcastExchange (220) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt index 87226794ac8..ce628bd2235 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt @@ -88,7 +88,7 @@ WholeStageCodegen (88) WholeStageCodegen (1) Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1250, 10000, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1250, 30121, 0, 0),bloomFilter,buf] Exchange #11 ObjectHashAggregate [i_item_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt index 18229b02f23..96ca7b8cb0b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt @@ -300,7 +300,7 @@ Input [2]: [ca_address_sk#16, ca_state#17] (52) ObjectHashAggregate Input [1]: [ca_address_sk#16] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 444448, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 899992, 0, 0)] Aggregate Attributes [1]: [buf#30] Results [1]: [buf#31] @@ -311,9 +311,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (54) ObjectHashAggregate Input [1]: [buf#31] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 444448, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 444448, 0, 0)#32] -Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 444448, 0, 0)#32 AS bloomFilter#33] +Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 899992, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 899992, 0, 0)#32] +Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 899992, 0, 0)#32 AS bloomFilter#33] Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (59) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt index 6483ac8dc74..7635aa1c6c3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt @@ -28,7 +28,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha WholeStageCodegen (1) Filter [c_current_addr_sk,c_current_cdemo_sk] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 55556, 444448, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 55556, 899992, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [ca_address_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt index 90bb3af97ec..c930a8f5223 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt @@ -639,7 +639,7 @@ Input [2]: [i_item_sk#18, i_current_price#19] (112) ObjectHashAggregate Input [1]: [i_item_sk#18] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 814584, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)] Aggregate Attributes [1]: [buf#134] Results [1]: [buf#135] @@ -650,9 +650,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] (114) ObjectHashAggregate Input [1]: [buf#135] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 814584, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 814584, 0, 0)#136] -Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 814584, 0, 0)#136 AS bloomFilter#137] +Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)#136] +Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)#136 AS bloomFilter#137] Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] ObjectHashAggregate (121) @@ -685,7 +685,7 @@ Input [2]: [p_promo_sk#20, p_channel_tv#21] (119) ObjectHashAggregate Input [1]: [p_promo_sk#20] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 7888, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)] Aggregate Attributes [1]: [buf#138] Results [1]: [buf#139] @@ -696,9 +696,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17] (121) ObjectHashAggregate Input [1]: [buf#139] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 7888, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 7888, 0, 0)#140] -Results [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 7888, 0, 0)#140 AS bloomFilter#141] +Functions [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)#140] +Results [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)#140 AS bloomFilter#141] Subquery:3 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (126) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt index 647cca694c7..315c338617f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt @@ -32,7 +32,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (1) Filter [ss_store_sk,ss_item_sk,ss_promo_sk] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 101823, 814584, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 101823, 1521109, 0, 0),bloomFilter,buf] Exchange #5 ObjectHashAggregate [i_item_sk] [buf,buf] WholeStageCodegen (1) @@ -42,7 +42,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet spark_catalog.default.item [i_item_sk,i_current_price] Subquery #3 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(p_promo_sk, 42), 986, 7888, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(p_promo_sk, 42), 986, 24246, 0, 0),bloomFilter,buf] Exchange #6 ObjectHashAggregate [p_promo_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt index 51b43a6477b..410a6a19575 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt @@ -368,7 +368,7 @@ Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25] (64) ObjectHashAggregate Input [1]: [cd_demo_sk#23] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 1279848, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 2239471, 0, 0)] Aggregate Attributes [1]: [buf#55] Results [1]: [buf#56] @@ -379,9 +379,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (66) ObjectHashAggregate Input [1]: [buf#56] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 1279848, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 1279848, 0, 0)#57] -Results [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 1279848, 0, 0)#57 AS bloomFilter#58] +Functions [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 2239471, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 2239471, 0, 0)#57] +Results [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 2239471, 0, 0)#57 AS bloomFilter#58] Subquery:3 Hosting operator id = 14 Hosting Expression = Subquery scalar-subquery#21, [id=#22] ObjectHashAggregate (73) @@ -414,7 +414,7 @@ Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31] (71) ObjectHashAggregate Input [1]: [ca_address_sk#29] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 1222696, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 2153999, 0, 0)] Aggregate Attributes [1]: [buf#59] Results [1]: [buf#60] @@ -425,8 +425,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (73) ObjectHashAggregate Input [1]: [buf#60] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 1222696, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 1222696, 0, 0)#61] -Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 1222696, 0, 0)#61 AS bloomFilter#62] +Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 2153999, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 2153999, 0, 0)#61] +Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 2153999, 0, 0)#61 AS bloomFilter#62] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt index aa9e8c4c20d..46c14e8bd67 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt @@ -59,7 +59,7 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu Project [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash] Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(cd_demo_sk, 42), 159981, 1279848, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(cd_demo_sk, 42), 159981, 2239471, 0, 0),bloomFilter,buf] Exchange #7 ObjectHashAggregate [cd_demo_sk] [buf,buf] WholeStageCodegen (1) @@ -69,7 +69,7 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu InputAdapter Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status] Subquery #3 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 152837, 1222696, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 152837, 2153999, 0, 0),bloomFilter,buf] Exchange #8 ObjectHashAggregate [ca_address_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt index e74a02ff719..dce5f37bb95 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt @@ -205,7 +205,7 @@ Input [2]: [i_item_sk#1, i_manufact_id#2] (34) ObjectHashAggregate Input [1]: [i_item_sk#1] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 1592, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)] Aggregate Attributes [1]: [buf#24] Results [1]: [buf#25] @@ -216,9 +216,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (36) ObjectHashAggregate Input [1]: [buf#25] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 1592, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 1592, 0, 0)#26] -Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 1592, 0, 0)#26 AS bloomFilter#27] +Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)#26] +Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)#26 AS bloomFilter#27] Subquery:2 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (41) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt index 0402324ad1a..d664a0c7317 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt @@ -31,7 +31,7 @@ WholeStageCodegen (7) BroadcastHashJoin [ws_sold_date_sk,d_date_sk] Filter [ws_item_sk] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 199, 1592, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 199, 5556, 0, 0),bloomFilter,buf] Exchange #6 ObjectHashAggregate [i_item_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt index 5c73695cd20..ff096bf4509 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt @@ -291,7 +291,7 @@ Input [2]: [ca_address_sk#20, ca_state#21] (50) ObjectHashAggregate Input [1]: [ca_address_sk#20] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 143688, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)] Aggregate Attributes [1]: [buf#35] Results [1]: [buf#36] @@ -302,9 +302,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (52) ObjectHashAggregate Input [1]: [buf#36] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 143688, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 143688, 0, 0)#37] -Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 143688, 0, 0)#37 AS bloomFilter#38] +Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)#37] +Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)#37 AS bloomFilter#38] Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] ObjectHashAggregate (59) @@ -337,7 +337,7 @@ Input [2]: [web_site_sk#22, web_company_name#23] (57) ObjectHashAggregate Input [1]: [web_site_sk#22] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 32, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 144, 0, 0)] Aggregate Attributes [1]: [buf#39] Results [1]: [buf#40] @@ -348,9 +348,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (59) ObjectHashAggregate Input [1]: [buf#40] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 32, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 32, 0, 0)#41] -Results [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 32, 0, 0)#41 AS bloomFilter#42] +Functions [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 144, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 144, 0, 0)#41] +Results [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 144, 0, 0)#41 AS bloomFilter#42] Subquery:3 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14] ObjectHashAggregate (66) @@ -383,7 +383,7 @@ Input [2]: [d_date_sk#24, d_date#25] (64) ObjectHashAggregate Input [1]: [d_date_sk#24] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 584392, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)] Aggregate Attributes [1]: [buf#43] Results [1]: [buf#44] @@ -394,8 +394,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (66) ObjectHashAggregate Input [1]: [buf#44] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 584392, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 584392, 0, 0)#45] -Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 584392, 0, 0)#45 AS bloomFilter#46] +Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)#45] +Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)#45 AS bloomFilter#46] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt index fc764d31f52..230b08abe0a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt @@ -26,7 +26,7 @@ WholeStageCodegen (12) Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 143688, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 333176, 0, 0),bloomFilter,buf] Exchange #3 ObjectHashAggregate [ca_address_sk] [buf,buf] WholeStageCodegen (1) @@ -36,7 +36,7 @@ WholeStageCodegen (12) InputAdapter Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(web_site_sk, 42), 4, 32, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(web_site_sk, 42), 4, 144, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [web_site_sk] [buf,buf] WholeStageCodegen (1) @@ -46,7 +46,7 @@ WholeStageCodegen (12) InputAdapter Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name] Subquery #3 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 584392, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 1141755, 0, 0),bloomFilter,buf] Exchange #5 ObjectHashAggregate [d_date_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt index aee8e7ded8d..d6cf257b8b5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -356,7 +356,7 @@ Input [2]: [ca_address_sk#21, ca_state#22] (62) ObjectHashAggregate Input [1]: [ca_address_sk#21] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 143688, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 333176, 0, 0)] Aggregate Attributes [1]: [buf#36] Results [1]: [buf#37] @@ -367,9 +367,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (64) ObjectHashAggregate Input [1]: [buf#37] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 143688, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 143688, 0, 0)#38] -Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 143688, 0, 0)#38 AS bloomFilter#39] +Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 333176, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 333176, 0, 0)#38] +Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 333176, 0, 0)#38 AS bloomFilter#39] Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#10, [id=#11] ObjectHashAggregate (71) @@ -402,7 +402,7 @@ Input [2]: [web_site_sk#23, web_company_name#24] (69) ObjectHashAggregate Input [1]: [web_site_sk#23] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 32, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 144, 0, 0)] Aggregate Attributes [1]: [buf#40] Results [1]: [buf#41] @@ -413,9 +413,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (71) ObjectHashAggregate Input [1]: [buf#41] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 32, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 32, 0, 0)#42] -Results [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 32, 0, 0)#42 AS bloomFilter#43] +Functions [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 144, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 144, 0, 0)#42] +Results [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 144, 0, 0)#42 AS bloomFilter#43] Subquery:3 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#12, [id=#13] ObjectHashAggregate (78) @@ -448,7 +448,7 @@ Input [2]: [d_date_sk#25, d_date#26] (76) ObjectHashAggregate Input [1]: [d_date_sk#25] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 584392, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 1141755, 0, 0)] Aggregate Attributes [1]: [buf#44] Results [1]: [buf#45] @@ -459,8 +459,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (78) ObjectHashAggregate Input [1]: [buf#45] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 584392, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 584392, 0, 0)#46] -Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 584392, 0, 0)#46 AS bloomFilter#47] +Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 1141755, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 1141755, 0, 0)#46] +Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 1141755, 0, 0)#46 AS bloomFilter#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt index e521277505b..8922d43c2aa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt @@ -25,7 +25,7 @@ WholeStageCodegen (21) Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit] Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 143688, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 333176, 0, 0),bloomFilter,buf] Exchange #3 ObjectHashAggregate [ca_address_sk] [buf,buf] WholeStageCodegen (1) @@ -35,7 +35,7 @@ WholeStageCodegen (21) InputAdapter Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(web_site_sk, 42), 4, 32, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(web_site_sk, 42), 4, 144, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [web_site_sk] [buf,buf] WholeStageCodegen (1) @@ -45,7 +45,7 @@ WholeStageCodegen (21) InputAdapter Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name] Subquery #3 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 584392, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 1141755, 0, 0),bloomFilter,buf] Exchange #5 ObjectHashAggregate [d_date_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt index e6ba05cca53..72298764a9e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt @@ -286,7 +286,7 @@ Input [2]: [ca_address_sk#18, ca_county#19] (50) ObjectHashAggregate Input [1]: [ca_address_sk#18] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 20440, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)] Aggregate Attributes [1]: [buf#38] Results [1]: [buf#39] @@ -297,9 +297,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (52) ObjectHashAggregate Input [1]: [buf#39] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 20440, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 20440, 0, 0)#40] -Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 20440, 0, 0)#40 AS bloomFilter#41] +Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)#40] +Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)#40 AS bloomFilter#41] Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (57) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt index 8102047b218..49da06e14bc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt @@ -25,7 +25,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha WholeStageCodegen (1) Filter [c_current_addr_sk,c_current_cdemo_sk] Subquery #1 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 20440, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 57765, 0, 0),bloomFilter,buf] Exchange #4 ObjectHashAggregate [ca_address_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt index 56dadada888..50beb987864 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt @@ -1176,7 +1176,7 @@ Input [3]: [i_item_sk#75, i_current_price#76, i_color#77] (214) ObjectHashAggregate Input [1]: [i_item_sk#75] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 10000, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)] Aggregate Attributes [1]: [buf#176] Results [1]: [buf#177] @@ -1187,9 +1187,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=32] (216) ObjectHashAggregate Input [1]: [buf#177] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 10000, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 10000, 0, 0)#178] -Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 10000, 0, 0)#178 AS bloomFilter#179] +Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)#178] +Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)#178 AS bloomFilter#179] Subquery:2 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13 BroadcastExchange (220) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt index cb088f6a6ad..9793e7cf7ef 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt @@ -88,7 +88,7 @@ WholeStageCodegen (88) WholeStageCodegen (1) Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1250, 10000, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1250, 30121, 0, 0),bloomFilter,buf] Exchange #11 ObjectHashAggregate [i_item_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt index a0246af44b7..2c3bbda04d1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt @@ -1186,7 +1186,7 @@ Input [2]: [i_item_sk#18, i_current_price#19] (207) ObjectHashAggregate Input [1]: [i_item_sk#18] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 814584, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)] Aggregate Attributes [1]: [buf#218] Results [1]: [buf#219] @@ -1197,9 +1197,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=27] (209) ObjectHashAggregate Input [1]: [buf#219] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 814584, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 814584, 0, 0)#220] -Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 814584, 0, 0)#220 AS bloomFilter#221] +Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)#220] +Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)#220 AS bloomFilter#221] Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] ObjectHashAggregate (216) @@ -1232,7 +1232,7 @@ Input [2]: [p_promo_sk#20, p_channel_tv#21] (214) ObjectHashAggregate Input [1]: [p_promo_sk#20] Keys: [] -Functions [1]: [partial_bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 7888, 0, 0)] +Functions [1]: [partial_bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)] Aggregate Attributes [1]: [buf#222] Results [1]: [buf#223] @@ -1243,9 +1243,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=28] (216) ObjectHashAggregate Input [1]: [buf#223] Keys: [] -Functions [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 7888, 0, 0)] -Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 7888, 0, 0)#224] -Results [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 7888, 0, 0)#224 AS bloomFilter#225] +Functions [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)] +Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)#224] +Results [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)#224 AS bloomFilter#225] Subquery:3 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (221) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt index 542e92a27d7..7082f78d270 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt @@ -39,7 +39,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] WholeStageCodegen (1) Filter [ss_store_sk,ss_item_sk,ss_promo_sk] Subquery #2 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 101823, 814584, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 101823, 1521109, 0, 0),bloomFilter,buf] Exchange #6 ObjectHashAggregate [i_item_sk] [buf,buf] WholeStageCodegen (1) @@ -49,7 +49,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit] InputAdapter Scan parquet spark_catalog.default.item [i_item_sk,i_current_price] Subquery #3 - ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(p_promo_sk, 42), 986, 7888, 0, 0),bloomFilter,buf] + ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(p_promo_sk, 42), 986, 24246, 0, 0),bloomFilter,buf] Exchange #7 ObjectHashAggregate [p_promo_sk] [buf,buf] WholeStageCodegen (1) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala index cf5d4c8c1e9..4edb51d2719 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala @@ -376,4 +376,21 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession { .queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec].inputPlan .collect({case agg: BaseAggregateExec => agg}).size == 2) } + + test("Test numBitsExpression") { + def checkNumBits(estimatedNumItems: Long, numBits: Long): Unit = { + val agg = new BloomFilterAggregate(Literal(1L), estimatedNumItems) + assert(agg.numBitsExpression === Literal(numBits)) + } + + checkNumBits(conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS) * 100, + conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)) + checkNumBits(conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS) + 10, 29193836) + checkNumBits(conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS), 29193763) + checkNumBits(2000000, 17482271) + checkNumBits(1000000, 10183830) + checkNumBits(10000, 197688) + checkNumBits(100, 2935) + checkNumBits(1, 38) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org