Re: [PR] [SPARK-47289][SQL] Allow extensions to log extended information in explain plan [spark]

via GitHub Thu, 21 Mar 2024 15:28:37 -0700


parthchandra commented on code in PR #45488:
URL: https://github.com/apache/spark/pull/45488#discussion_r1534357046



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -1519,7 +1526,7 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-  val V2_BUCKETING_SHUFFLE_ENABLED =
+ val V2_BUCKETING_SHUFFLE_ENABLED =

Review Comment:
   fixed



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -1529,7 +1536,7 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-   val V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS =
+  val V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS =

Review Comment:
   reverted this change



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -1160,7 +1167,7 @@ object SQLConf {
     .intConf
     .createWithDefault(4096)
 
-  val PARQUET_FIELD_ID_WRITE_ENABLED =
+   val PARQUET_FIELD_ID_WRITE_ENABLED =

Review Comment:
   My IDE keeps changing code for me and I haven't figured out what to do to 
fix it. I'll try to correct these auto-changes manually in future efforts. 
(Thank you for being patient about this)!



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -861,7 +868,7 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val ADAPTIVE_REBALANCE_PARTITIONS_SMALL_PARTITION_FACTOR =
+  val  ADAPTIVE_REBALANCE_PARTITIONS_SMALL_PARTITION_FACTOR =

Review Comment:
   Done. Really sorry about this reformatting. My IDE keeps changing code I 
have not modified. 



##########
sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala:
##########
@@ -543,6 +544,33 @@ class SparkSessionExtensionSuite extends SparkFunSuite 
with SQLHelper with Adapt
       }
     }
   }
+
+  test("explain info") {

Review Comment:
   changed



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -4101,15 +4108,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val LEGACY_INTEGER_GROUPING_ID =
+   val LEGACY_INTEGER_GROUPING_ID =
     buildConf("spark.sql.legacy.integerGroupingId")
       .internal()
       .doc("When true, grouping_id() returns int values instead of long 
values.")
       .version("3.1.0")
       .booleanConf
       .createWithDefault(false)
 
-  val LEGACY_GROUPING_ID_WITH_APPENDED_USER_GROUPBY =
+   val LEGACY_GROUPING_ID_WITH_APPENDED_USER_GROUPBY =

Review Comment:
   fixed



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -333,6 +333,13 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val PLAN_EXPLAIN_EXTENSIONS_ENABLED = 
buildConf("spark.sql.plan.explain.extensions.enabled")

Review Comment:
   Changed to `spark.sql.enableExtensionInfo`



##########
sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala:
##########
@@ -543,6 +544,33 @@ class SparkSessionExtensionSuite extends SparkFunSuite 
with SQLHelper with Adapt
       }
     }
   }
+
+  test("explain info") {
+    val extensions = create { extensions =>
+      extensions.injectQueryStagePrepRule { _ => AddTagRule() }
+    }
+    withSession(extensions) { session =>
+      withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {

Review Comment:
   Indented 2 more spaces



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala:
##########
@@ -648,6 +702,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
 object QueryPlan extends PredicateHelper {
   val OP_ID_TAG = TreeNodeTag[Int]("operatorId")
   val CODEGEN_ID_TAG = new TreeNodeTag[Int]("wholeStageCodegenId")
+  val EXPLAIN_PLAN_INFO_SIMPLE = new 
TreeNodeTag[String]("explainPlanInfoSimple")
+  val EXPLAIN_PLAN_INFO = new TreeNodeTag[String]("explainPlanInfo")

Review Comment:
   Added better comments explaining the reason for the two methods.



##########
sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala:
##########
@@ -583,6 +611,24 @@ case class MyParser(spark: SparkSession, delegate: 
ParserInterface) extends Pars
     delegate.parseQuery(sqlText)
 }
 
+case class AddTagRule() extends Rule[SparkPlan] {

Review Comment:
   Good suggestion. Changed.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala:
##########
@@ -464,6 +464,60 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
     s"($opId) $nodeName$codegenId"
   }
 
+  def extensionsInfo(mode: String = "simple"): String = {
+    mode match {
+      case "simple" =>
+        summaryExtensionsInfoSimple().mkString("\n")
+      case _ =>
+        val sb = new StringBuilder
+        summaryExtensionsInfo(0, sb)
+        sb.toString
+    }
+  }
+
+  def summaryExtensionsInfo(indent: Int, sb: StringBuilder): Unit = {
+    var indentLevel = indent
+    sb.append(
+      getTagValue(QueryPlan.EXPLAIN_PLAN_INFO).map(t => {
+        indentLevel = indentLevel + 1
+        "  " * indentLevel + t + "\n"
+      }).getOrElse("")
+    )
+    if (innerChildren.nonEmpty) {
+      innerChildren.foreach(_.summaryExtensionsInfo(indentLevel, sb))
+    }
+    if (children.nonEmpty) {
+      children.foreach(_.summaryExtensionsInfo(indentLevel, sb))
+    }
+  }
+
+  // Sometimes all that is needed is the root level information
+  // (especially with errors during planning)
+  def summaryExtensionsInfoSimple(): mutable.Set[String] = {
+    val sb = mutable.Set[String]() // don't allow duplicates

Review Comment:
   Change the variable name. Also added an explanation for why dupes are 
removed in the comment.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -4101,15 +4108,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val LEGACY_INTEGER_GROUPING_ID =
+   val LEGACY_INTEGER_GROUPING_ID =

Review Comment:
   fixed



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -4864,6 +4871,8 @@ class SQLConf extends Serializable with Logging with 
SqlApiConf {
 
   def planChangeBatches: Option[String] = getConf(PLAN_CHANGE_LOG_BATCHES)
 
+  def planExplainExtensionsEnabled: Boolean = 
getConf(PLAN_EXPLAIN_EXTENSIONS_ENABLED)

Review Comment:
   Changed to singular



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala:
##########
@@ -464,6 +464,60 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
     s"($opId) $nodeName$codegenId"
   }
 
+  def extensionsInfo(mode: String = "simple"): String = {

Review Comment:
   Good idea. I've moved this method out of this class into `QueryExecution` 
because it already has the appropriate `ExplainMode` defined (it's a trait 
though, not an enum, but it is already available and for the same use case).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-47289][SQL] Allow extensions to log extended information in explain plan [spark]

Reply via email to