This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 2061103 [SPARK-36086][SQL][3.1] CollapseProject project replace alias
should use origin column name
2061103 is described below
commit 2061103d946879cfe7f5c88fd0f4b60c65c50ee9
Author: Angerszhuuuu <[email protected]>
AuthorDate: Mon Aug 9 16:42:12 2021 +0800
[SPARK-36086][SQL][3.1] CollapseProject project replace alias should use
origin column name
### What changes were proposed in this pull request?
For added UT, without this patch will failed as below
```
[info] - SHOW TABLES V2: SPARK-36086: CollapseProject project replace alias
should use origin column name *** FAILED *** (4 seconds, 935 milliseconds)
[info] java.lang.RuntimeException: After applying rule
org.apache.spark.sql.catalyst.optimizer.CollapseProject in batch Operator
Optimization before Inferring Filters, the structural integrity of the plan is
broken.
[info] at
org.apache.spark.sql.errors.QueryExecutionErrors$.structuralIntegrityIsBrokenAfterApplyingRuleError(QueryExecutionErrors.scala:1217)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:229)
[info] at
scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
[info] at
scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
[info] at scala.collection.immutable.List.foldLeft(List.scala:91)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:208)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:200)
[info] at scala.collection.immutable.List.foreach(List.scala:431)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:200)
[info] at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:179)
[info] at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
```
CollapseProject project replace alias should use origin column name
### Why are the changes needed?
Fix bug
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Added UT
Closes #33685 from AngersZhuuuu/SPARK-36086-3.1.
Authored-by: Angerszhuuuu <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/expressions/AliasHelper.scala | 2 +-
.../sql/catalyst/expressions/namedExpressions.scala | 8 ++++++++
.../catalyst/optimizer/CollapseProjectSuite.scala | 9 +++++++++
.../approved-plans-v1_4/q5.sf100/explain.txt | 20 ++++++++++----------
.../approved-plans-v1_4/q5.sf100/simplified.txt | 6 +++---
.../approved-plans-v1_4/q5/explain.txt | 16 ++++++++--------
.../approved-plans-v1_4/q5/simplified.txt | 6 +++---
7 files changed, 42 insertions(+), 25 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
index 7eb95e6..4c94fa4 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
@@ -72,7 +72,7 @@ trait AliasHelper {
// Use transformUp to prevent infinite recursion when the replacement
expression
// redefines the same ExprId,
trimNonTopLevelAliases(expr.transformUp {
- case a: Attribute => aliasMap.getOrElse(a, a)
+ case a: Attribute => aliasMap.get(a).map(_.withName(a.name)).getOrElse(a)
}).asInstanceOf[NamedExpression]
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index badc2ec..cabc090 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -180,6 +180,14 @@ case class Alias(child: Expression, name: String)(
}
}
+ def withName(newName: String): NamedExpression = {
+ Alias(child, newName)(
+ exprId = exprId,
+ qualifier = qualifier,
+ explicitMetadata = explicitMetadata,
+ nonInheritableMetadataKeys = nonInheritableMetadataKeys)
+ }
+
def newInstance(): NamedExpression =
Alias(child, name)(
qualifier = qualifier,
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
index 42bcd13..1e7f9b0 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
@@ -170,4 +170,13 @@ class CollapseProjectSuite extends PlanTest {
val expected = Sample(0.0, 0.6, false, 11L, relation.select('a as
'c)).analyze
comparePlans(optimized, expected)
}
+
+ test("SPARK-36086: CollapseProject should keep output schema name") {
+ val relation = LocalRelation('a.int, 'b.int)
+ val select = relation.select(('a + 'b).as('c)).analyze
+ val query = Project(Seq(select.output.head.withName("C")), select)
+ val optimized = Optimize.execute(query)
+ val expected = relation.select(('a + 'b).as('C)).analyze
+ comparePlans(optimized, expected)
+ }
}
diff --git
a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
index 55bd25c..6a0794d 100644
---
a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
+++
b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
@@ -186,14 +186,14 @@ Results [5]: [s_store_id#25, sum#31, sum#32, sum#33,
sum#34]
(24) Exchange
Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
-Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35]
+Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35]
(25) HashAggregate [codegen id : 6]
Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
Keys [1]: [s_store_id#25]
Functions [4]: [sum(UnscaledValue(sales_price#7)),
sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)),
sum(UnscaledValue(net_loss#10))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36,
sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38,
sum(UnscaledValue(net_loss#10))#39]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS
sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS
channel#43, concat(store, s_store_id#25) AS id#44]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS
sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#41,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS
channel#43, concat(store, s_store_id#25) AS id#44]
(26) Scan parquet default.catalog_sales
Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47,
cs_net_profit#48]
@@ -281,14 +281,14 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73,
sum#74, sum#75]
(45) Exchange
Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
-Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76]
+Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS,
[id=#76]
(46) HashAggregate [codegen id : 12]
Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
Keys [1]: [cp_catalog_page_id#66]
Functions [4]: [sum(UnscaledValue(sales_price#51)),
sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)),
sum(UnscaledValue(net_loss#54))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77,
sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79,
sum(UnscaledValue(net_loss#54))#80]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS
sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS RETURNS#82,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS
channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS
sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS returns#82,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS
channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]
(47) Scan parquet default.web_sales
Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88,
ws_net_profit#89]
@@ -324,7 +324,7 @@ Condition : isnotnull(wr_returned_date_sk#96)
(54) Exchange
Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98,
wr_return_amt#99, wr_net_loss#100]
-Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true,
[id=#101]
+Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5),
ENSURE_REQUIREMENTS, [id=#101]
(55) Sort [codegen id : 15]
Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98,
wr_return_amt#99, wr_net_loss#100]
@@ -346,7 +346,7 @@ Condition : ((isnotnull(ws_item_sk#102) AND
isnotnull(ws_order_number#103)) AND
(59) Exchange
Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103]
-Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint),
cast(ws_order_number#103 as bigint), 5), true, [id=#104]
+Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint),
cast(ws_order_number#103 as bigint), 5), ENSURE_REQUIREMENTS, [id=#104]
(60) Sort [codegen id : 17]
Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103]
@@ -411,19 +411,19 @@ Results [5]: [web_site_id#112, sum#118, sum#119, sum#120,
sum#121]
(74) Exchange
Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
-Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122]
+Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122]
(75) HashAggregate [codegen id : 22]
Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
Keys [1]: [web_site_id#112]
Functions [4]: [sum(UnscaledValue(sales_price#92)),
sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)),
sum(UnscaledValue(net_loss#95))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#123,
sum(UnscaledValue(return_amt#94))#124, sum(UnscaledValue(profit#93))#125,
sum(UnscaledValue(net_loss#95))#126]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#123,17,2) AS
sales#127, MakeDecimal(sum(UnscaledValue(return_amt#94))#124,17,2) AS
RETURNS#128,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#125,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#126,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#129, web channel AS
channel#130, concat(web_site, web_site_id#112) AS id#131]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#123,17,2) AS
sales#127, MakeDecimal(sum(UnscaledValue(return_amt#94))#124,17,2) AS
returns#128,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#125,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#126,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#129, web channel AS
channel#130, concat(web_site, web_site_id#112) AS id#131]
(76) Union
(77) Expand [codegen id : 23]
-Input [5]: [sales#40, RETURNS#41, profit#42, channel#43, id#44]
+Input [5]: [sales#40, returns#41, profit#42, channel#43, id#44]
Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0),
List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40,
returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42,
channel#132, id#133, spark_grouping_id#134]
(78) HashAggregate [codegen id : 23]
@@ -435,7 +435,7 @@ Results [9]: [channel#132, id#133, spark_grouping_id#134,
sum#141, isEmpty#142,
(79) Exchange
Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142,
sum#143, isEmpty#144, sum#145, isEmpty#146]
-Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5),
true, [id=#147]
+Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5),
ENSURE_REQUIREMENTS, [id=#147]
(80) HashAggregate [codegen id : 24]
Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142,
sum#143, isEmpty#144, sum#145, isEmpty#146]
diff --git
a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
index 80b07a37..25102fb 100644
---
a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
+++
b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Union
WholeStageCodegen (6)
- HashAggregate [s_store_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
+ HashAggregate [s_store_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [s_store_id] #2
WholeStageCodegen (5)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Scan parquet default.store
[s_store_sk,s_store_id]
WholeStageCodegen (12)
- HashAggregate [cp_catalog_page_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
+ HashAggregate [cp_catalog_page_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [cp_catalog_page_id] #5
WholeStageCodegen (11)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Scan parquet
default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
WholeStageCodegen (22)
- HashAggregate [web_site_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
+ HashAggregate [web_site_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [web_site_id] #7
WholeStageCodegen (21)
diff --git
a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
index 15f0cda..6020133 100644
---
a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
+++
b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
@@ -183,14 +183,14 @@ Results [5]: [s_store_id#25, sum#31, sum#32, sum#33,
sum#34]
(24) Exchange
Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
-Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35]
+Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35]
(25) HashAggregate [codegen id : 6]
Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
Keys [1]: [s_store_id#25]
Functions [4]: [sum(UnscaledValue(sales_price#7)),
sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)),
sum(UnscaledValue(net_loss#10))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36,
sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38,
sum(UnscaledValue(net_loss#10))#39]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS
sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS
channel#43, concat(store, s_store_id#25) AS id#44]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS
sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#41,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS
channel#43, concat(store, s_store_id#25) AS id#44]
(26) Scan parquet default.catalog_sales
Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47,
cs_net_profit#48]
@@ -278,14 +278,14 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73,
sum#74, sum#75]
(45) Exchange
Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
-Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76]
+Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS,
[id=#76]
(46) HashAggregate [codegen id : 12]
Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
Keys [1]: [cp_catalog_page_id#66]
Functions [4]: [sum(UnscaledValue(sales_price#51)),
sum(UnscaledValue(return_amt#53)), sum(UnscaledValue(profit#52)),
sum(UnscaledValue(net_loss#54))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#51))#77,
sum(UnscaledValue(return_amt#53))#78, sum(UnscaledValue(profit#52))#79,
sum(UnscaledValue(net_loss#54))#80]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS
sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS RETURNS#82,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS
channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#51))#77,17,2) AS
sales#81, MakeDecimal(sum(UnscaledValue(return_amt#53))#78,17,2) AS returns#82,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#52))#79,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#54))#80,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#83, catalog channel AS
channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85]
(47) Scan parquet default.web_sales
Output [4]: [ws_sold_date_sk#86, ws_web_site_sk#87, ws_ext_sales_price#88,
ws_net_profit#89]
@@ -396,19 +396,19 @@ Results [5]: [web_site_id#111, sum#117, sum#118, sum#119,
sum#120]
(71) Exchange
Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120]
-Arguments: hashpartitioning(web_site_id#111, 5), true, [id=#121]
+Arguments: hashpartitioning(web_site_id#111, 5), ENSURE_REQUIREMENTS, [id=#121]
(72) HashAggregate [codegen id : 19]
Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120]
Keys [1]: [web_site_id#111]
Functions [4]: [sum(UnscaledValue(sales_price#92)),
sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)),
sum(UnscaledValue(net_loss#95))]
Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#122,
sum(UnscaledValue(return_amt#94))#123, sum(UnscaledValue(profit#93))#124,
sum(UnscaledValue(net_loss#95))#125]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#122,17,2) AS
sales#126, MakeDecimal(sum(UnscaledValue(return_amt#94))#123,17,2) AS
RETURNS#127,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#124,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#125,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#128, web channel AS
channel#129, concat(web_site, web_site_id#111) AS id#130]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#122,17,2) AS
sales#126, MakeDecimal(sum(UnscaledValue(return_amt#94))#123,17,2) AS
returns#127,
CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#124,17,2)
as decimal(18,2))) -
promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#125,17,2) as
decimal(18,2)))), DecimalType(18,2), true) AS profit#128, web channel AS
channel#129, concat(web_site, web_site_id#111) AS id#130]
(73) Union
(74) Expand [codegen id : 20]
-Input [5]: [sales#40, RETURNS#41, profit#42, channel#43, id#44]
+Input [5]: [sales#40, returns#41, profit#42, channel#43, id#44]
Arguments: [List(sales#40, returns#41, profit#42, channel#43, id#44, 0),
List(sales#40, returns#41, profit#42, channel#43, null, 1), List(sales#40,
returns#41, profit#42, null, null, 3)], [sales#40, returns#41, profit#42,
channel#131, id#132, spark_grouping_id#133]
(75) HashAggregate [codegen id : 20]
@@ -420,7 +420,7 @@ Results [9]: [channel#131, id#132, spark_grouping_id#133,
sum#140, isEmpty#141,
(76) Exchange
Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141,
sum#142, isEmpty#143, sum#144, isEmpty#145]
-Arguments: hashpartitioning(channel#131, id#132, spark_grouping_id#133, 5),
true, [id=#146]
+Arguments: hashpartitioning(channel#131, id#132, spark_grouping_id#133, 5),
ENSURE_REQUIREMENTS, [id=#146]
(77) HashAggregate [codegen id : 21]
Input [9]: [channel#131, id#132, spark_grouping_id#133, sum#140, isEmpty#141,
sum#142, isEmpty#143, sum#144, isEmpty#145]
diff --git
a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt
b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt
index 9b7cc33..6f38d31 100644
---
a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt
+++
b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Union
WholeStageCodegen (6)
- HashAggregate [s_store_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
+ HashAggregate [s_store_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [s_store_id] #2
WholeStageCodegen (5)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Scan parquet default.store
[s_store_sk,s_store_id]
WholeStageCodegen (12)
- HashAggregate [cp_catalog_page_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
+ HashAggregate [cp_catalog_page_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [cp_catalog_page_id] #5
WholeStageCodegen (11)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
InputAdapter
Scan parquet
default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
WholeStageCodegen (19)
- HashAggregate [web_site_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
+ HashAggregate [web_site_id,sum,sum,sum,sum]
[sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
InputAdapter
Exchange [web_site_id] #7
WholeStageCodegen (18)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]