dilipbiswal commented on a change in pull request #24759: [SPARK-27395][SQL] 
Improve EXPLAIN command
URL: https://github.com/apache/spark/pull/24759#discussion_r316817014
 
 

 ##########
 File path: sql/core/src/test/resources/sql-tests/results/explain.sql.out
 ##########
 @@ -0,0 +1,714 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 17
+
+
+-- !query 0
+CREATE table  explain_temp1 (key int, val int) USING PARQUET
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE table  explain_temp2 (key int, val int) USING PARQUET
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+CREATE table  explain_temp3 (key int, val int) USING PARQUET
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SET spark.sql.codegen.wholeStage = true
+-- !query 3 schema
+struct<key:string,value:string>
+-- !query 3 output
+spark.sql.codegen.wholeStage   true
+
+
+-- !query 4
+EXPLAIN FORMATTED
+  SELECT key, max(val) 
+  FROM   explain_temp1 
+  WHERE  key > 0 
+  GROUP  BY key 
+  ORDER  BY key
+-- !query 4 schema
+struct<plan:string>
+-- !query 4 output
+== Physical Plan ==
+* Sort (9)
++- Exchange (8)
+   +- * HashAggregate (7)
+      +- Exchange (6)
+         +- * HashAggregate (5)
+            +- * Project (4)
+               +- * Filter (3)
+                  +- * ColumnarToRow (2)
+                     +- Scan parquet default.explain_temp1 (1)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+     
+(2) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(3) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : (isnotnull(key#x) AND (key#x > 0))
+     
+(4) Project [codegen id : 1]
+Output    : [key#x, val#x]
+Input     : [key#x, val#x]
+     
+(5) HashAggregate [codegen id : 1]
+Input: [key#x, val#x]
+     
+(6) Exchange 
+Input: [key#x, max#x]
+     
+(7) HashAggregate [codegen id : 2]
+Input: [key#x, max#x]
+     
+(8) Exchange 
+Input: [key#x, max(val)#x]
+     
+(9) Sort [codegen id : 3]
+Input: [key#x, max(val)#x]
+
+
+-- !query 5
+EXPLAIN FORMATTED
+  SELECT key, max(val)
+  FROM explain_temp1
+  WHERE key > 0
+  GROUP BY key
+  HAVING max(val) > 0
+-- !query 5 schema
+struct<plan:string>
+-- !query 5 output
+== Physical Plan ==
+* Project (9)
++- * Filter (8)
+   +- * HashAggregate (7)
+      +- Exchange (6)
+         +- * HashAggregate (5)
+            +- * Project (4)
+               +- * Filter (3)
+                  +- * ColumnarToRow (2)
+                     +- Scan parquet default.explain_temp1 (1)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+     
+(2) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(3) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : (isnotnull(key#x) AND (key#x > 0))
+     
+(4) Project [codegen id : 1]
+Output    : [key#x, val#x]
+Input     : [key#x, val#x]
+     
+(5) HashAggregate [codegen id : 1]
+Input: [key#x, val#x]
+     
+(6) Exchange 
+Input: [key#x, max#x]
+     
+(7) HashAggregate [codegen id : 2]
+Input: [key#x, max#x]
+     
+(8) Filter [codegen id : 2]
+Input     : [key#x, max(val)#x, max(val#x)#x]
+Condition : (isnotnull(max(val#x)#x) AND (max(val#x)#x > 0))
+     
+(9) Project [codegen id : 2]
+Output    : [key#x, max(val)#x]
+Input     : [key#x, max(val)#x, max(val#x)#x]
+
+
+-- !query 6
+EXPLAIN FORMATTED
+  SELECT key, val FROM explain_temp1 WHERE key > 0
+  UNION 
+  SELECT key, val FROM explain_temp1 WHERE key > 0
+-- !query 6 schema
+struct<plan:string>
+-- !query 6 output
+== Physical Plan ==
+* HashAggregate (12)
++- Exchange (11)
+   +- * HashAggregate (10)
+      +- Union (9)
+         :- * Project (4)
+         :  +- * Filter (3)
+         :     +- * ColumnarToRow (2)
+         :        +- Scan parquet default.explain_temp1 (1)
+         +- * Project (8)
+            +- * Filter (7)
+               +- * ColumnarToRow (6)
+                  +- Scan parquet default.explain_temp1 (5)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+     
+(2) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(3) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : (isnotnull(key#x) AND (key#x > 0))
+     
+(4) Project [codegen id : 1]
+Output    : [key#x, val#x]
+Input     : [key#x, val#x]
+     
+(5) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+     
+(6) ColumnarToRow [codegen id : 2]
+Input: [key#x, val#x]
+     
+(7) Filter [codegen id : 2]
+Input     : [key#x, val#x]
+Condition : (isnotnull(key#x) AND (key#x > 0))
+     
+(8) Project [codegen id : 2]
+Output    : [key#x, val#x]
+Input     : [key#x, val#x]
+     
+(9) Union 
+     
+(10) HashAggregate [codegen id : 3]
+Input: [key#x, val#x]
+     
+(11) Exchange 
+Input: [key#x, val#x]
+     
+(12) HashAggregate [codegen id : 4]
+Input: [key#x, val#x]
+
+
+-- !query 7
+EXPLAIN FORMATTED
+  SELECT * 
+  FROM   explain_temp1 a, 
+         explain_temp2 b 
+  WHERE  a.key = b.key
+-- !query 7 schema
+struct<plan:string>
+-- !query 7 output
+== Physical Plan ==
+* BroadcastHashJoin Inner BuildRight (10)
+:- * Project (4)
+:  +- * Filter (3)
+:     +- * ColumnarToRow (2)
+:        +- Scan parquet default.explain_temp1 (1)
++- BroadcastExchange (9)
+   +- * Project (8)
+      +- * Filter (7)
+         +- * ColumnarToRow (6)
+            +- Scan parquet default.explain_temp2 (5)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+     
+(2) ColumnarToRow [codegen id : 2]
+Input: [key#x, val#x]
+     
+(3) Filter [codegen id : 2]
+Input     : [key#x, val#x]
+Condition : isnotnull(key#x)
+     
+(4) Project [codegen id : 2]
+Output    : [key#x, val#x]
+Input     : [key#x, val#x]
+     
+(5) Scan parquet default.explain_temp2 
+Output: [key#x, val#x]
+     
+(6) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(7) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : isnotnull(key#x)
+     
+(8) Project [codegen id : 1]
+Output    : [key#x, val#x]
+Input     : [key#x, val#x]
+     
+(9) BroadcastExchange 
+Input: [key#x, val#x]
+     
+(10) BroadcastHashJoin [codegen id : 2]
+Left keys: List(key#x)
+Right keys: List(key#x)
+Join condition: None
+
+
+-- !query 8
+EXPLAIN FORMATTED
+  SELECT * 
+  FROM   explain_temp1 a 
+         LEFT OUTER JOIN explain_temp2 b 
+                      ON a.key = b.key
+-- !query 8 schema
+struct<plan:string>
+-- !query 8 output
+== Physical Plan ==
+* BroadcastHashJoin LeftOuter BuildRight (8)
+:- * ColumnarToRow (2)
+:  +- Scan parquet default.explain_temp1 (1)
++- BroadcastExchange (7)
+   +- * Project (6)
+      +- * Filter (5)
+         +- * ColumnarToRow (4)
+            +- Scan parquet default.explain_temp2 (3)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+     
+(2) ColumnarToRow [codegen id : 2]
+Input: [key#x, val#x]
+     
+(3) Scan parquet default.explain_temp2 
+Output: [key#x, val#x]
+     
+(4) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(5) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : isnotnull(key#x)
+     
+(6) Project [codegen id : 1]
+Output    : [key#x, val#x]
+Input     : [key#x, val#x]
+     
+(7) BroadcastExchange 
+Input: [key#x, val#x]
+     
+(8) BroadcastHashJoin [codegen id : 2]
+Left keys: List(key#x)
+Right keys: List(key#x)
+Join condition: None
+
+
+-- !query 9
+EXPLAIN FORMATTED
+  SELECT * 
+  FROM   explain_temp1 
+  WHERE  key = (SELECT max(key) 
+                FROM   explain_temp2 
+                WHERE  key = (SELECT max(key) 
+                              FROM   explain_temp3 
+                              WHERE  val > 0) 
+                       AND val = 2) 
+         AND val > 3
+-- !query 9 schema
+struct<plan:string>
+-- !query 9 output
+== Physical Plan ==
+* Project (4)
++- * Filter (3)
+   +- * ColumnarToRow (2)
+      +- Scan parquet default.explain_temp1 (1)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+     
+(2) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(3) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : (((isnotnull(key#x) AND isnotnull(val#x)) AND (key#x = Subquery 
scalar-subquery#x)) AND (val#x > 3))
+     
+(4) Project [codegen id : 1]
+Output    : [key#x, val#x]
+Input     : [key#x, val#x]
+     
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery 
scalar-subquery#x
+* HashAggregate (11)
++- Exchange (10)
+   +- * HashAggregate (9)
+      +- * Project (8)
+         +- * Filter (7)
+            +- * ColumnarToRow (6)
+               +- Scan parquet default.explain_temp2 (5)
+
+
+(5) Scan parquet default.explain_temp2 
+Output: [key#x, val#x]
+     
+(6) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(7) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : (((isnotnull(key#x) AND isnotnull(val#x)) AND (key#x = Subquery 
scalar-subquery#x)) AND (val#x = 2))
+     
+(8) Project [codegen id : 1]
+Output    : [key#x]
+Input     : [key#x, val#x]
+     
+(9) HashAggregate [codegen id : 1]
+Input: [key#x]
+     
+(10) Exchange 
+Input: [max#x]
+     
+(11) HashAggregate [codegen id : 2]
+Input: [max#x]
+     
+Subquery:2 Hosting operator id = 7 Hosting Expression = Subquery 
scalar-subquery#x
+* HashAggregate (18)
++- Exchange (17)
+   +- * HashAggregate (16)
+      +- * Project (15)
+         +- * Filter (14)
+            +- * ColumnarToRow (13)
+               +- Scan parquet default.explain_temp3 (12)
+
+
+(12) Scan parquet default.explain_temp3 
+Output: [key#x, val#x]
+     
+(13) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(14) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : (isnotnull(val#x) AND (val#x > 0))
+     
+(15) Project [codegen id : 1]
+Output    : [key#x]
+Input     : [key#x, val#x]
+     
+(16) HashAggregate [codegen id : 1]
+Input: [key#x]
+     
+(17) Exchange 
+Input: [max#x]
+     
+(18) HashAggregate [codegen id : 2]
+Input: [max#x]
+
+
+-- !query 10
+EXPLAIN FORMATTED
+  SELECT * 
+  FROM   explain_temp1 
+  WHERE  key = (SELECT max(key) 
+                FROM   explain_temp2 
+                WHERE  val > 0) 
+         OR
+         key = (SELECT max(key) 
+                FROM   explain_temp3
+                WHERE  val > 0)
+-- !query 10 schema
+struct<plan:string>
+-- !query 10 output
+== Physical Plan ==
+* Filter (3)
++- * ColumnarToRow (2)
+   +- Scan parquet default.explain_temp1 (1)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+     
+(2) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(3) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : ((key#x = Subquery scalar-subquery#x) OR (key#x = Subquery 
scalar-subquery#x))
+     
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery 
scalar-subquery#x
+* HashAggregate (10)
++- Exchange (9)
+   +- * HashAggregate (8)
+      +- * Project (7)
+         +- * Filter (6)
+            +- * ColumnarToRow (5)
+               +- Scan parquet default.explain_temp2 (4)
+
+
+(4) Scan parquet default.explain_temp2 
+Output: [key#x, val#x]
+     
+(5) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(6) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : (isnotnull(val#x) AND (val#x > 0))
+     
+(7) Project [codegen id : 1]
+Output    : [key#x]
+Input     : [key#x, val#x]
+     
+(8) HashAggregate [codegen id : 1]
+Input: [key#x]
+     
+(9) Exchange 
+Input: [max#x]
+     
+(10) HashAggregate [codegen id : 2]
+Input: [max#x]
+     
+Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery 
scalar-subquery#x
+* HashAggregate (17)
++- Exchange (16)
+   +- * HashAggregate (15)
+      +- * Project (14)
+         +- * Filter (13)
+            +- * ColumnarToRow (12)
+               +- Scan parquet default.explain_temp3 (11)
+
+
+(11) Scan parquet default.explain_temp3 
+Output: [key#x, val#x]
+     
+(12) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(13) Filter [codegen id : 1]
+Input     : [key#x, val#x]
+Condition : (isnotnull(val#x) AND (val#x > 0))
+     
+(14) Project [codegen id : 1]
+Output    : [key#x]
+Input     : [key#x, val#x]
+     
+(15) HashAggregate [codegen id : 1]
+Input: [key#x]
+     
+(16) Exchange 
+Input: [max#x]
+     
+(17) HashAggregate [codegen id : 2]
+Input: [max#x]
+
+
+-- !query 11
+EXPLAIN FORMATTED
+  SELECT (SELECT Avg(key) FROM explain_temp1) + (SELECT Avg(key) FROM 
explain_temp1)
+  FROM explain_temp1
+-- !query 11 schema
+struct<plan:string>
+-- !query 11 output
+== Physical Plan ==
+* Project (3)
++- * ColumnarToRow (2)
+   +- Scan parquet default.explain_temp1 (1)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: []
 
 Review comment:
   @cloud-fan Because of column pruning .. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to