This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 38808c2 [SPARK-35411][SQL] Add essential information while serializing TreeNode to json 38808c2 is described below commit 38808c2ca5b05f2d3471187eada3d670f4fbcd68 Author: Tengfei Huang <tengfe...@gmail.com> AuthorDate: Tue May 18 23:20:12 2021 +0800 [SPARK-35411][SQL] Add essential information while serializing TreeNode to json ### What changes were proposed in this pull request? Write out Seq of product objects which contain TreeNode, to avoid the cases as described in https://issues.apache.org/jira/browse/SPARK-35411 that essential information will be ignored and just written out as null values. These information are necessary to understand the query plans. ### Why are the changes needed? Information like cteRelations in With node, and branches in CaseWhen expression are necessary to understand the query plans, they should be written out to the result json string. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT case added. Closes #32557 from ivoson/plan-json-fix. Authored-by: Tengfei Huang <tengfe...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 9804f07c17af6d8e789f729d5872b85740cc3186) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/catalyst/trees/TreeNode.scala | 10 +++++++--- .../spark/sql/catalyst/trees/TreeNodeSuite.scala | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 5b7beb3..d6da04e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -800,9 +800,10 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { ("deserialized" -> s.deserialized) ~ ("replication" -> s.replication) case n: TreeNode[_] => n.jsonValue case o: Option[_] => o.map(parseToJson) - // Recursive scan Seq[TreeNode], Seq[Partitioning], Seq[DataType] - case t: Seq[_] if t.forall(_.isInstanceOf[TreeNode[_]]) || - t.forall(_.isInstanceOf[Partitioning]) || t.forall(_.isInstanceOf[DataType]) => + // Recursive scan Seq[Partitioning], Seq[DataType], Seq[Product] + case t: Seq[_] if t.forall(_.isInstanceOf[Partitioning]) || + t.forall(_.isInstanceOf[DataType]) || + t.forall(_.isInstanceOf[Product]) => JArray(t.map(parseToJson).toList) case t: Seq[_] if t.length > 0 && t.head.isInstanceOf[String] => JString(truncatedString(t, "[", ", ", "]", SQLConf.get.maxToStringFields)) @@ -840,6 +841,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { case broadcast: BroadcastMode => true case table: CatalogTableType => true case storage: CatalogStorageFormat => true + // Write out product that contains TreeNode, since there are some Tuples such as cteRelations + // in With, branches in CaseWhen which are essential to understand the plan. + case p if p.productIterator.exists(_.isInstanceOf[TreeNode[_]]) => true case _ => false } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index 4ad8475..d837af7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -594,6 +594,27 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper { "class" -> classOf[JsonTestTreeNode].getName, "num-children" -> 0, "arg" -> "1"))) + + // Convert Seq of Product contains TreeNode to JSON. + assertJSON( + Seq(("a", JsonTestTreeNode("0")), ("b", JsonTestTreeNode("1"))), + List( + JObject( + "product-class" -> "scala.Tuple2", + "_1" -> "a", + "_2" -> List(JObject( + "class" -> classOf[JsonTestTreeNode].getName, + "num-children" -> 0, + "arg" -> "0" + ))), + JObject( + "product-class" -> "scala.Tuple2", + "_1" -> "b", + "_2" -> List(JObject( + "class" -> classOf[JsonTestTreeNode].getName, + "num-children" -> 0, + "arg" -> "1" + ))))) } test("toJSON should not throws java.lang.StackOverflowError") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org