dilipbiswal commented on a change in pull request #24759: [SPARK-27395][SQL] Improve EXPLAIN command URL: https://github.com/apache/spark/pull/24759#discussion_r316817014
########## File path: sql/core/src/test/resources/sql-tests/results/explain.sql.out ########## @@ -0,0 +1,714 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 17 + + +-- !query 0 +CREATE table explain_temp1 (key int, val int) USING PARQUET +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +CREATE table explain_temp2 (key int, val int) USING PARQUET +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +CREATE table explain_temp3 (key int, val int) USING PARQUET +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +SET spark.sql.codegen.wholeStage = true +-- !query 3 schema +struct<key:string,value:string> +-- !query 3 output +spark.sql.codegen.wholeStage true + + +-- !query 4 +EXPLAIN FORMATTED + SELECT key, max(val) + FROM explain_temp1 + WHERE key > 0 + GROUP BY key + ORDER BY key +-- !query 4 schema +struct<plan:string> +-- !query 4 output +== Physical Plan == +* Sort (9) ++- Exchange (8) + +- * HashAggregate (7) + +- Exchange (6) + +- * HashAggregate (5) + +- * Project (4) + +- * Filter (3) + +- * ColumnarToRow (2) + +- Scan parquet default.explain_temp1 (1) + + +(1) Scan parquet default.explain_temp1 +Output: [key#x, val#x] + +(2) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(3) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : (isnotnull(key#x) AND (key#x > 0)) + +(4) Project [codegen id : 1] +Output : [key#x, val#x] +Input : [key#x, val#x] + +(5) HashAggregate [codegen id : 1] +Input: [key#x, val#x] + +(6) Exchange +Input: [key#x, max#x] + +(7) HashAggregate [codegen id : 2] +Input: [key#x, max#x] + +(8) Exchange +Input: [key#x, max(val)#x] + +(9) Sort [codegen id : 3] +Input: [key#x, max(val)#x] + + +-- !query 5 +EXPLAIN FORMATTED + SELECT key, max(val) + FROM explain_temp1 + WHERE key > 0 + GROUP BY key + HAVING max(val) > 0 +-- !query 5 schema +struct<plan:string> +-- !query 5 output +== Physical Plan == +* Project (9) ++- * Filter (8) + +- * HashAggregate (7) + +- Exchange (6) + +- * HashAggregate (5) + +- * Project (4) + +- * Filter (3) + +- * ColumnarToRow (2) + +- Scan parquet default.explain_temp1 (1) + + +(1) Scan parquet default.explain_temp1 +Output: [key#x, val#x] + +(2) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(3) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : (isnotnull(key#x) AND (key#x > 0)) + +(4) Project [codegen id : 1] +Output : [key#x, val#x] +Input : [key#x, val#x] + +(5) HashAggregate [codegen id : 1] +Input: [key#x, val#x] + +(6) Exchange +Input: [key#x, max#x] + +(7) HashAggregate [codegen id : 2] +Input: [key#x, max#x] + +(8) Filter [codegen id : 2] +Input : [key#x, max(val)#x, max(val#x)#x] +Condition : (isnotnull(max(val#x)#x) AND (max(val#x)#x > 0)) + +(9) Project [codegen id : 2] +Output : [key#x, max(val)#x] +Input : [key#x, max(val)#x, max(val#x)#x] + + +-- !query 6 +EXPLAIN FORMATTED + SELECT key, val FROM explain_temp1 WHERE key > 0 + UNION + SELECT key, val FROM explain_temp1 WHERE key > 0 +-- !query 6 schema +struct<plan:string> +-- !query 6 output +== Physical Plan == +* HashAggregate (12) ++- Exchange (11) + +- * HashAggregate (10) + +- Union (9) + :- * Project (4) + : +- * Filter (3) + : +- * ColumnarToRow (2) + : +- Scan parquet default.explain_temp1 (1) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.explain_temp1 (5) + + +(1) Scan parquet default.explain_temp1 +Output: [key#x, val#x] + +(2) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(3) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : (isnotnull(key#x) AND (key#x > 0)) + +(4) Project [codegen id : 1] +Output : [key#x, val#x] +Input : [key#x, val#x] + +(5) Scan parquet default.explain_temp1 +Output: [key#x, val#x] + +(6) ColumnarToRow [codegen id : 2] +Input: [key#x, val#x] + +(7) Filter [codegen id : 2] +Input : [key#x, val#x] +Condition : (isnotnull(key#x) AND (key#x > 0)) + +(8) Project [codegen id : 2] +Output : [key#x, val#x] +Input : [key#x, val#x] + +(9) Union + +(10) HashAggregate [codegen id : 3] +Input: [key#x, val#x] + +(11) Exchange +Input: [key#x, val#x] + +(12) HashAggregate [codegen id : 4] +Input: [key#x, val#x] + + +-- !query 7 +EXPLAIN FORMATTED + SELECT * + FROM explain_temp1 a, + explain_temp2 b + WHERE a.key = b.key +-- !query 7 schema +struct<plan:string> +-- !query 7 output +== Physical Plan == +* BroadcastHashJoin Inner BuildRight (10) +:- * Project (4) +: +- * Filter (3) +: +- * ColumnarToRow (2) +: +- Scan parquet default.explain_temp1 (1) ++- BroadcastExchange (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.explain_temp2 (5) + + +(1) Scan parquet default.explain_temp1 +Output: [key#x, val#x] + +(2) ColumnarToRow [codegen id : 2] +Input: [key#x, val#x] + +(3) Filter [codegen id : 2] +Input : [key#x, val#x] +Condition : isnotnull(key#x) + +(4) Project [codegen id : 2] +Output : [key#x, val#x] +Input : [key#x, val#x] + +(5) Scan parquet default.explain_temp2 +Output: [key#x, val#x] + +(6) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(7) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : isnotnull(key#x) + +(8) Project [codegen id : 1] +Output : [key#x, val#x] +Input : [key#x, val#x] + +(9) BroadcastExchange +Input: [key#x, val#x] + +(10) BroadcastHashJoin [codegen id : 2] +Left keys: List(key#x) +Right keys: List(key#x) +Join condition: None + + +-- !query 8 +EXPLAIN FORMATTED + SELECT * + FROM explain_temp1 a + LEFT OUTER JOIN explain_temp2 b + ON a.key = b.key +-- !query 8 schema +struct<plan:string> +-- !query 8 output +== Physical Plan == +* BroadcastHashJoin LeftOuter BuildRight (8) +:- * ColumnarToRow (2) +: +- Scan parquet default.explain_temp1 (1) ++- BroadcastExchange (7) + +- * Project (6) + +- * Filter (5) + +- * ColumnarToRow (4) + +- Scan parquet default.explain_temp2 (3) + + +(1) Scan parquet default.explain_temp1 +Output: [key#x, val#x] + +(2) ColumnarToRow [codegen id : 2] +Input: [key#x, val#x] + +(3) Scan parquet default.explain_temp2 +Output: [key#x, val#x] + +(4) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(5) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : isnotnull(key#x) + +(6) Project [codegen id : 1] +Output : [key#x, val#x] +Input : [key#x, val#x] + +(7) BroadcastExchange +Input: [key#x, val#x] + +(8) BroadcastHashJoin [codegen id : 2] +Left keys: List(key#x) +Right keys: List(key#x) +Join condition: None + + +-- !query 9 +EXPLAIN FORMATTED + SELECT * + FROM explain_temp1 + WHERE key = (SELECT max(key) + FROM explain_temp2 + WHERE key = (SELECT max(key) + FROM explain_temp3 + WHERE val > 0) + AND val = 2) + AND val > 3 +-- !query 9 schema +struct<plan:string> +-- !query 9 output +== Physical Plan == +* Project (4) ++- * Filter (3) + +- * ColumnarToRow (2) + +- Scan parquet default.explain_temp1 (1) + + +(1) Scan parquet default.explain_temp1 +Output: [key#x, val#x] + +(2) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(3) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : (((isnotnull(key#x) AND isnotnull(val#x)) AND (key#x = Subquery scalar-subquery#x)) AND (val#x > 3)) + +(4) Project [codegen id : 1] +Output : [key#x, val#x] +Input : [key#x, val#x] + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#x +* HashAggregate (11) ++- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.explain_temp2 (5) + + +(5) Scan parquet default.explain_temp2 +Output: [key#x, val#x] + +(6) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(7) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : (((isnotnull(key#x) AND isnotnull(val#x)) AND (key#x = Subquery scalar-subquery#x)) AND (val#x = 2)) + +(8) Project [codegen id : 1] +Output : [key#x] +Input : [key#x, val#x] + +(9) HashAggregate [codegen id : 1] +Input: [key#x] + +(10) Exchange +Input: [max#x] + +(11) HashAggregate [codegen id : 2] +Input: [max#x] + +Subquery:2 Hosting operator id = 7 Hosting Expression = Subquery scalar-subquery#x +* HashAggregate (18) ++- Exchange (17) + +- * HashAggregate (16) + +- * Project (15) + +- * Filter (14) + +- * ColumnarToRow (13) + +- Scan parquet default.explain_temp3 (12) + + +(12) Scan parquet default.explain_temp3 +Output: [key#x, val#x] + +(13) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(14) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : (isnotnull(val#x) AND (val#x > 0)) + +(15) Project [codegen id : 1] +Output : [key#x] +Input : [key#x, val#x] + +(16) HashAggregate [codegen id : 1] +Input: [key#x] + +(17) Exchange +Input: [max#x] + +(18) HashAggregate [codegen id : 2] +Input: [max#x] + + +-- !query 10 +EXPLAIN FORMATTED + SELECT * + FROM explain_temp1 + WHERE key = (SELECT max(key) + FROM explain_temp2 + WHERE val > 0) + OR + key = (SELECT max(key) + FROM explain_temp3 + WHERE val > 0) +-- !query 10 schema +struct<plan:string> +-- !query 10 output +== Physical Plan == +* Filter (3) ++- * ColumnarToRow (2) + +- Scan parquet default.explain_temp1 (1) + + +(1) Scan parquet default.explain_temp1 +Output: [key#x, val#x] + +(2) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(3) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : ((key#x = Subquery scalar-subquery#x) OR (key#x = Subquery scalar-subquery#x)) + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#x +* HashAggregate (10) ++- Exchange (9) + +- * HashAggregate (8) + +- * Project (7) + +- * Filter (6) + +- * ColumnarToRow (5) + +- Scan parquet default.explain_temp2 (4) + + +(4) Scan parquet default.explain_temp2 +Output: [key#x, val#x] + +(5) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(6) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : (isnotnull(val#x) AND (val#x > 0)) + +(7) Project [codegen id : 1] +Output : [key#x] +Input : [key#x, val#x] + +(8) HashAggregate [codegen id : 1] +Input: [key#x] + +(9) Exchange +Input: [max#x] + +(10) HashAggregate [codegen id : 2] +Input: [max#x] + +Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#x +* HashAggregate (17) ++- Exchange (16) + +- * HashAggregate (15) + +- * Project (14) + +- * Filter (13) + +- * ColumnarToRow (12) + +- Scan parquet default.explain_temp3 (11) + + +(11) Scan parquet default.explain_temp3 +Output: [key#x, val#x] + +(12) ColumnarToRow [codegen id : 1] +Input: [key#x, val#x] + +(13) Filter [codegen id : 1] +Input : [key#x, val#x] +Condition : (isnotnull(val#x) AND (val#x > 0)) + +(14) Project [codegen id : 1] +Output : [key#x] +Input : [key#x, val#x] + +(15) HashAggregate [codegen id : 1] +Input: [key#x] + +(16) Exchange +Input: [max#x] + +(17) HashAggregate [codegen id : 2] +Input: [max#x] + + +-- !query 11 +EXPLAIN FORMATTED + SELECT (SELECT Avg(key) FROM explain_temp1) + (SELECT Avg(key) FROM explain_temp1) + FROM explain_temp1 +-- !query 11 schema +struct<plan:string> +-- !query 11 output +== Physical Plan == +* Project (3) ++- * ColumnarToRow (2) + +- Scan parquet default.explain_temp1 (1) + + +(1) Scan parquet default.explain_temp1 +Output: [] Review comment: @cloud-fan Because of column pruning .. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
