This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch comet-parquet-exec in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/comet-parquet-exec by this push: new 22eb5914a fix regression (#1309) 22eb5914a is described below commit 22eb5914ac109e4b7acd1f11725d9d1d46c0a960 Author: Andy Grove <agr...@apache.org> AuthorDate: Tue Jan 21 07:28:09 2025 -0700 fix regression (#1309) --- .../org/apache/comet/serde/QueryPlanSerde.scala | 2 +- .../approved-plans-v1_4-spark3_5/q9/explain.txt | 42 +++--- .../approved-plans-v1_4-spark3_5/q9/simplified.txt | 136 ++++++++--------- .../approved-plans-v1_4-spark4_0/q9/explain.txt | 42 +++--- .../approved-plans-v1_4-spark4_0/q9/simplified.txt | 166 ++++++++++----------- .../approved-plans-v1_4/q9/explain.txt | 42 +++--- .../approved-plans-v1_4/q9/simplified.txt | 136 ++++++++--------- .../apache/spark/sql/CometTPCDSQuerySuite.scala | 1 + .../org/apache/spark/sql/CometTPCHQuerySuite.scala | 1 + 9 files changed, 285 insertions(+), 283 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 244d24385..8c37abf3c 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -61,7 +61,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim logWarning(s"Comet native execution is disabled due to: $reason") } - def supportedDataType(dt: DataType, allowStruct: Boolean = true): Boolean = dt match { + def supportedDataType(dt: DataType, allowStruct: Boolean = false): Boolean = dt match { case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _: FloatType | _: DoubleType | _: StringType | _: BinaryType | _: TimestampType | _: DecimalType | _: DateType | _: BooleanType | _: NullType => diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9/explain.txt index 831b71a98..d2061bd1d 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (4) -+- CometProject (3) +* Project (4) ++- * CometColumnarToRow (3) +- CometFilter (2) +- CometScan parquet spark_catalog.default.reason (1) @@ -16,16 +16,16 @@ ReadSchema: struct<r_reason_sk:int> Input [1]: [r_reason_sk#1] Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) -(3) CometProject +(3) CometColumnarToRow [codegen id : 1] Input [1]: [r_reason_sk#1] -Arguments: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6], [CASE WHEN (Subquery scalar-subquery#7, [id=#8].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_net_paid) END AS bucket1#2, CASE WHEN (Subquery scalar-subquery#9, [id=#10].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery sca [...] -(4) CometColumnarToRow [codegen id : 1] -Input [5]: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6] +(4) Project [codegen id : 1] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_net_paid) END AS bucket1#4, CASE WHEN (Subquery scalar-subquery#5, [id=#6].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_net_paid) END AS bucket2#7, [...] +Input [1]: [r_reason_sk#1] ===== Subqueries ===== -Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#7, [id=#8] +Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] * CometColumnarToRow (12) +- CometProject (11) +- CometHashAggregate (10) @@ -72,11 +72,11 @@ Arguments: [mergedValue#29], [named_struct(count(1), count(1)#26, avg(ss_ext_dis (12) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#29] -Subquery:2 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] +Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -Subquery:3 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] +Subquery:3 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -Subquery:4 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#5, [id=#6] * CometColumnarToRow (20) +- CometProject (19) +- CometHashAggregate (18) @@ -123,11 +123,11 @@ Arguments: [mergedValue#42], [named_struct(count(1), count(1)#39, avg(ss_ext_dis (20) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#42] -Subquery:5 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] +Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -Subquery:6 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] +Subquery:6 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -Subquery:7 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#8, [id=#9] * CometColumnarToRow (28) +- CometProject (27) +- CometHashAggregate (26) @@ -174,11 +174,11 @@ Arguments: [mergedValue#55], [named_struct(count(1), count(1)#52, avg(ss_ext_dis (28) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#55] -Subquery:8 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] +Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -Subquery:9 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] +Subquery:9 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -Subquery:10 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] * CometColumnarToRow (36) +- CometProject (35) +- CometHashAggregate (34) @@ -225,11 +225,11 @@ Arguments: [mergedValue#68], [named_struct(count(1), count(1)#65, avg(ss_ext_dis (36) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#68] -Subquery:11 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] +Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -Subquery:12 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] +Subquery:12 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -Subquery:13 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#15, [id=#16] +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#14, [id=#15] * CometColumnarToRow (44) +- CometProject (43) +- CometHashAggregate (42) @@ -276,8 +276,8 @@ Arguments: [mergedValue#81], [named_struct(count(1), count(1)#78, avg(ss_ext_dis (44) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#81] -Subquery:14 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] +Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] -Subquery:15 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] +Subquery:15 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9/simplified.txt index 3e1b996a5..68961382c 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q9/simplified.txt @@ -1,71 +1,71 @@ WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [bucket1,bucket2,bucket3,bucket4,bucket5] - Subquery #1 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #1 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #1 - ReusedSubquery [mergedValue] #1 - Subquery #2 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #2 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #2 - ReusedSubquery [mergedValue] #2 - Subquery #3 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #3 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #3 - ReusedSubquery [mergedValue] #3 - Subquery #4 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #4 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #4 - ReusedSubquery [mergedValue] #4 - Subquery #5 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #5 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #5 - ReusedSubquery [mergedValue] #5 + Project + Subquery #1 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #1 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 + Subquery #2 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #2 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 + Subquery #3 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #3 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 + Subquery #4 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #4 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 + Subquery #5 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #5 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 + CometColumnarToRow + InputAdapter CometFilter [r_reason_sk] CometScan parquet spark_catalog.default.reason [r_reason_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9/explain.txt index 1718c0cdc..bcf7bb71c 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (4) -+- CometProject (3) +* Project (4) ++- * CometColumnarToRow (3) +- CometFilter (2) +- CometScan parquet spark_catalog.default.reason (1) @@ -16,16 +16,16 @@ ReadSchema: struct<r_reason_sk:int> Input [1]: [r_reason_sk#1] Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) -(3) CometProject +(3) CometColumnarToRow [codegen id : 1] Input [1]: [r_reason_sk#1] -Arguments: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6], [CASE WHEN (Subquery scalar-subquery#7, [id=#8].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_net_paid) END AS bucket1#2, CASE WHEN (Subquery scalar-subquery#9, [id=#10].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery sca [...] -(4) CometColumnarToRow [codegen id : 1] -Input [5]: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6] +(4) Project [codegen id : 1] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_net_paid) END AS bucket1#4, CASE WHEN (Subquery scalar-subquery#5, [id=#6].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_net_paid) END AS bucket2#7, [...] +Input [1]: [r_reason_sk#1] ===== Subqueries ===== -Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#7, [id=#8] +Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] * Project (13) +- * HashAggregate (12) +- * CometColumnarToRow (11) @@ -80,11 +80,11 @@ Results [3]: [count(1)#31 AS count(1)#34, cast((avg(UnscaledValue(ss_ext_discoun Output [1]: [named_struct(count(1), count(1)#34, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#35, avg(ss_net_paid), avg(ss_net_paid)#36) AS mergedValue#37] Input [3]: [count(1)#34, avg(ss_ext_discount_amt)#35, avg(ss_net_paid)#36] -Subquery:2 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] +Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -Subquery:3 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] +Subquery:3 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -Subquery:4 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#5, [id=#6] * Project (22) +- * HashAggregate (21) +- * CometColumnarToRow (20) @@ -139,11 +139,11 @@ Results [3]: [count(1)#52 AS count(1)#55, cast((avg(UnscaledValue(ss_ext_discoun Output [1]: [named_struct(count(1), count(1)#55, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#56, avg(ss_net_paid), avg(ss_net_paid)#57) AS mergedValue#58] Input [3]: [count(1)#55, avg(ss_ext_discount_amt)#56, avg(ss_net_paid)#57] -Subquery:5 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] +Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -Subquery:6 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] +Subquery:6 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -Subquery:7 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#8, [id=#9] * Project (31) +- * HashAggregate (30) +- * CometColumnarToRow (29) @@ -198,11 +198,11 @@ Results [3]: [count(1)#73 AS count(1)#76, cast((avg(UnscaledValue(ss_ext_discoun Output [1]: [named_struct(count(1), count(1)#76, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#77, avg(ss_net_paid), avg(ss_net_paid)#78) AS mergedValue#79] Input [3]: [count(1)#76, avg(ss_ext_discount_amt)#77, avg(ss_net_paid)#78] -Subquery:8 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] +Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -Subquery:9 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] +Subquery:9 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -Subquery:10 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] * Project (40) +- * HashAggregate (39) +- * CometColumnarToRow (38) @@ -257,11 +257,11 @@ Results [3]: [count(1)#94 AS count(1)#97, cast((avg(UnscaledValue(ss_ext_discoun Output [1]: [named_struct(count(1), count(1)#97, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#98, avg(ss_net_paid), avg(ss_net_paid)#99) AS mergedValue#100] Input [3]: [count(1)#97, avg(ss_ext_discount_amt)#98, avg(ss_net_paid)#99] -Subquery:11 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] +Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -Subquery:12 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] +Subquery:12 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -Subquery:13 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#15, [id=#16] +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#14, [id=#15] * Project (49) +- * HashAggregate (48) +- * CometColumnarToRow (47) @@ -316,8 +316,8 @@ Results [3]: [count(1)#115 AS count(1)#118, cast((avg(UnscaledValue(ss_ext_disco Output [1]: [named_struct(count(1), count(1)#118, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#119, avg(ss_net_paid), avg(ss_net_paid)#120) AS mergedValue#121] Input [3]: [count(1)#118, avg(ss_ext_discount_amt)#119, avg(ss_net_paid)#120] -Subquery:14 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] +Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] -Subquery:15 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] +Subquery:15 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9/simplified.txt index 1b4462fe9..68276072e 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark4_0/q9/simplified.txt @@ -1,86 +1,86 @@ WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [bucket1,bucket2,bucket3,bucket4,bucket5] - Subquery #1 - WholeStageCodegen (2) - Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] - HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometColumnarExchange #1 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #1 - ReusedSubquery [mergedValue] #1 - Subquery #2 - WholeStageCodegen (2) - Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] - HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometColumnarExchange #2 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #2 - ReusedSubquery [mergedValue] #2 - Subquery #3 - WholeStageCodegen (2) - Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] - HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometColumnarExchange #3 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #3 - ReusedSubquery [mergedValue] #3 - Subquery #4 - WholeStageCodegen (2) - Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] - HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometColumnarExchange #4 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #4 - ReusedSubquery [mergedValue] #4 - Subquery #5 - WholeStageCodegen (2) - Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] - HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometColumnarExchange #5 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] - CometColumnarToRow - InputAdapter - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #5 - ReusedSubquery [mergedValue] #5 + Project + Subquery #1 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometColumnarExchange #1 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 + Subquery #2 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometColumnarExchange #2 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 + Subquery #3 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometColumnarExchange #3 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 + Subquery #4 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometColumnarExchange #4 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 + Subquery #5 + WholeStageCodegen (2) + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometColumnarExchange #5 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + CometColumnarToRow + InputAdapter + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 + CometColumnarToRow + InputAdapter CometFilter [r_reason_sk] CometScan parquet spark_catalog.default.reason [r_reason_sk] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt index 831b71a98..d2061bd1d 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt @@ -1,6 +1,6 @@ == Physical Plan == -* CometColumnarToRow (4) -+- CometProject (3) +* Project (4) ++- * CometColumnarToRow (3) +- CometFilter (2) +- CometScan parquet spark_catalog.default.reason (1) @@ -16,16 +16,16 @@ ReadSchema: struct<r_reason_sk:int> Input [1]: [r_reason_sk#1] Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) -(3) CometProject +(3) CometColumnarToRow [codegen id : 1] Input [1]: [r_reason_sk#1] -Arguments: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6], [CASE WHEN (Subquery scalar-subquery#7, [id=#8].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#7, [id=#8].avg(ss_net_paid) END AS bucket1#2, CASE WHEN (Subquery scalar-subquery#9, [id=#10].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#9, [id=#10].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery sca [...] -(4) CometColumnarToRow [codegen id : 1] -Input [5]: [bucket1#2, bucket2#3, bucket3#4, bucket4#5, bucket5#6] +(4) Project [codegen id : 1] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_net_paid) END AS bucket1#4, CASE WHEN (Subquery scalar-subquery#5, [id=#6].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_net_paid) END AS bucket2#7, [...] +Input [1]: [r_reason_sk#1] ===== Subqueries ===== -Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#7, [id=#8] +Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] * CometColumnarToRow (12) +- CometProject (11) +- CometHashAggregate (10) @@ -72,11 +72,11 @@ Arguments: [mergedValue#29], [named_struct(count(1), count(1)#26, avg(ss_ext_dis (12) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#29] -Subquery:2 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] +Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -Subquery:3 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#7, [id=#8] +Subquery:3 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -Subquery:4 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10] +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#5, [id=#6] * CometColumnarToRow (20) +- CometProject (19) +- CometHashAggregate (18) @@ -123,11 +123,11 @@ Arguments: [mergedValue#42], [named_struct(count(1), count(1)#39, avg(ss_ext_dis (20) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#42] -Subquery:5 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] +Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -Subquery:6 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10] +Subquery:6 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -Subquery:7 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#8, [id=#9] * CometColumnarToRow (28) +- CometProject (27) +- CometHashAggregate (26) @@ -174,11 +174,11 @@ Arguments: [mergedValue#55], [named_struct(count(1), count(1)#52, avg(ss_ext_dis (28) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#55] -Subquery:8 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] +Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -Subquery:9 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] +Subquery:9 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -Subquery:10 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] * CometColumnarToRow (36) +- CometProject (35) +- CometHashAggregate (34) @@ -225,11 +225,11 @@ Arguments: [mergedValue#68], [named_struct(count(1), count(1)#65, avg(ss_ext_dis (36) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#68] -Subquery:11 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] +Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -Subquery:12 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#13, [id=#14] +Subquery:12 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -Subquery:13 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#15, [id=#16] +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#14, [id=#15] * CometColumnarToRow (44) +- CometProject (43) +- CometHashAggregate (42) @@ -276,8 +276,8 @@ Arguments: [mergedValue#81], [named_struct(count(1), count(1)#78, avg(ss_ext_dis (44) CometColumnarToRow [codegen id : 1] Input [1]: [mergedValue#81] -Subquery:14 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] +Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] -Subquery:15 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#15, [id=#16] +Subquery:15 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt index 3e1b996a5..68961382c 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt @@ -1,71 +1,71 @@ WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [bucket1,bucket2,bucket3,bucket4,bucket5] - Subquery #1 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #1 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #1 - ReusedSubquery [mergedValue] #1 - Subquery #2 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #2 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #2 - ReusedSubquery [mergedValue] #2 - Subquery #3 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #3 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #3 - ReusedSubquery [mergedValue] #3 - Subquery #4 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #4 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #4 - ReusedSubquery [mergedValue] #4 - Subquery #5 - WholeStageCodegen (1) - CometColumnarToRow - InputAdapter - CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] - CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] - CometExchange #5 - CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] - CometProject [ss_ext_discount_amt,ss_net_paid] - CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] - ReusedSubquery [mergedValue] #5 - ReusedSubquery [mergedValue] #5 + Project + Subquery #1 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #1 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 + Subquery #2 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #2 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 + Subquery #3 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #3 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 + Subquery #4 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #4 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 + Subquery #5 + WholeStageCodegen (1) + CometColumnarToRow + InputAdapter + CometProject [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] [mergedValue] + CometHashAggregate [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count,count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid))] + CometExchange #5 + CometHashAggregate [count,sum,count,sum,count,ss_ext_discount_amt,ss_net_paid] + CometProject [ss_ext_discount_amt,ss_net_paid] + CometFilter [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + CometScan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 + CometColumnarToRow + InputAdapter CometFilter [r_reason_sk] CometScan parquet spark_catalog.default.reason [r_reason_sk] diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala index ee59e7897..8d084fd75 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCDSQuerySuite.scala @@ -185,6 +185,7 @@ class CometTPCDSQuerySuite "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager") conf.set(CometConf.COMET_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_ENABLED.key, "true") + conf.set(CometConf.COMET_NATIVE_SCAN_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key, "true") conf.set(CometConf.COMET_MEMORY_OVERHEAD.key, "15g") conf.set("spark.sql.adaptive.coalescePartitions.enabled", "true") diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala b/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala index c81fba3ef..0dadb2217 100644 --- a/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/CometTPCHQuerySuite.scala @@ -89,6 +89,7 @@ class CometTPCHQuerySuite extends QueryTest with TPCBase with ShimCometTPCHQuery "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager") conf.set(CometConf.COMET_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_ENABLED.key, "true") + conf.set(CometConf.COMET_NATIVE_SCAN_ENABLED.key, "true") conf.set(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key, "true") conf.set(CometConf.COMET_SHUFFLE_MODE.key, "jvm") conf.set(MEMORY_OFFHEAP_ENABLED.key, "true") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org