[spark] branch branch-3.1 updated: [SPARK-35411][SQL] Add essential information while serializing TreeNode to json

wenchen Tue, 18 May 2021 08:21:30 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 38808c2  [SPARK-35411][SQL] Add essential information while 
serializing TreeNode to json
38808c2 is described below

commit 38808c2ca5b05f2d3471187eada3d670f4fbcd68
Author: Tengfei Huang <tengfe...@gmail.com>
AuthorDate: Tue May 18 23:20:12 2021 +0800

    [SPARK-35411][SQL] Add essential information while serializing TreeNode to 
json
    
    ### What changes were proposed in this pull request?
    Write out Seq of product objects which contain TreeNode, to avoid the cases 
as described in https://issues.apache.org/jira/browse/SPARK-35411 that 
essential information will be ignored and just written out as null values. 
These information are necessary to understand the query plans.
    
    ### Why are the changes needed?
    Information like cteRelations in With node, and branches in CaseWhen 
expression are necessary to understand the query plans, they should be written 
out to the result json string.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    UT case added.
    
    Closes #32557 from ivoson/plan-json-fix.
    
    Authored-by: Tengfei Huang <tengfe...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit 9804f07c17af6d8e789f729d5872b85740cc3186)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../apache/spark/sql/catalyst/trees/TreeNode.scala  | 10 +++++++---
 .../spark/sql/catalyst/trees/TreeNodeSuite.scala    | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 5b7beb3..d6da04e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -800,9 +800,10 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
         ("deserialized" -> s.deserialized) ~ ("replication" -> s.replication)
     case n: TreeNode[_] => n.jsonValue
     case o: Option[_] => o.map(parseToJson)
-    // Recursive scan Seq[TreeNode], Seq[Partitioning], Seq[DataType]
-    case t: Seq[_] if t.forall(_.isInstanceOf[TreeNode[_]]) ||
-      t.forall(_.isInstanceOf[Partitioning]) || 
t.forall(_.isInstanceOf[DataType]) =>
+    // Recursive scan Seq[Partitioning], Seq[DataType], Seq[Product]
+    case t: Seq[_] if t.forall(_.isInstanceOf[Partitioning]) ||
+      t.forall(_.isInstanceOf[DataType]) ||
+      t.forall(_.isInstanceOf[Product]) =>
       JArray(t.map(parseToJson).toList)
     case t: Seq[_] if t.length > 0 && t.head.isInstanceOf[String] =>
       JString(truncatedString(t, "[", ", ", "]", 
SQLConf.get.maxToStringFields))
@@ -840,6 +841,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] 
extends Product {
     case broadcast: BroadcastMode => true
     case table: CatalogTableType => true
     case storage: CatalogStorageFormat => true
+    // Write out product that contains TreeNode, since there are some Tuples 
such as cteRelations
+    // in With, branches in CaseWhen which are essential to understand the 
plan.
+    case p if p.productIterator.exists(_.isInstanceOf[TreeNode[_]]) => true
     case _ => false
   }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 4ad8475..d837af7 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -594,6 +594,27 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
           "class" -> classOf[JsonTestTreeNode].getName,
           "num-children" -> 0,
           "arg" -> "1")))
+
+    // Convert Seq of Product contains TreeNode to JSON.
+    assertJSON(
+      Seq(("a", JsonTestTreeNode("0")), ("b", JsonTestTreeNode("1"))),
+      List(
+        JObject(
+          "product-class" -> "scala.Tuple2",
+          "_1" -> "a",
+          "_2" -> List(JObject(
+            "class" -> classOf[JsonTestTreeNode].getName,
+            "num-children" -> 0,
+            "arg" -> "0"
+          ))),
+        JObject(
+          "product-class" -> "scala.Tuple2",
+          "_1" -> "b",
+          "_2" -> List(JObject(
+            "class" -> classOf[JsonTestTreeNode].getName,
+            "num-children" -> 0,
+            "arg" -> "1"
+          )))))
   }
 
   test("toJSON should not throws java.lang.StackOverflowError") {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.1 updated: [SPARK-35411][SQL] Add essential information while serializing TreeNode to json

Reply via email to