[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #39268: [SPARK-41752][SQL][UI] Group nested executions under the root execution

GitBox Fri, 30 Dec 2022 17:59:14 -0800


dongjoon-hyun commented on code in PR #39268:
URL: https://github.com/apache/spark/pull/39268#discussion_r1059565579



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala:
##########
@@ -26,40 +26,63 @@ import scala.xml.{Node, NodeSeq}
 
 import org.apache.spark.JobExecutionStatus
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI.UI_SQL_GROUP_SUB_EXECUTION_ENABLED
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.ui.{PagedDataSource, PagedTable, UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
 private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with 
Logging {
 
   private val sqlStore = parent.sqlStore
+  private val groupSubExecutionEnabled = 
parent.conf.get(UI_SQL_GROUP_SUB_EXECUTION_ENABLED)
 
   override def render(request: HttpServletRequest): Seq[Node] = {
     val currentTime = System.currentTimeMillis()
     val running = new mutable.ArrayBuffer[SQLExecutionUIData]()
     val completed = new mutable.ArrayBuffer[SQLExecutionUIData]()
     val failed = new mutable.ArrayBuffer[SQLExecutionUIData]()
+    val executionIdToSubExecutions =
+      new mutable.HashMap[Long, mutable.ArrayBuffer[SQLExecutionUIData]]()
 
     sqlStore.executionsList().foreach { e =>
-      if (e.errorMessage.isDefined) {
-        if (e.errorMessage.get.isEmpty) {
-          completed += e
+      def processExecution(e: SQLExecutionUIData): Unit = {
+        if (e.errorMessage.isDefined) {
+          if (e.errorMessage.get.isEmpty) {
+            completed += e
+          } else {
+            failed += e
+          }
+        } else if (e.completionTime.isEmpty) {
+          running += e
         } else {
-          failed += e
+          // When `completionTime` is present, it means the query execution is 
completed and
+          // `errorMessage` should be present as well. However, events 
generated by old versions of
+          // Spark do not have the `errorMessage` field. We have to check the 
status of this query

Review Comment:
   Just a question. Do we have a test coverage for only Spark event logs to 
validate this code path?



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala:
##########
@@ -290,35 +330,112 @@ private[ui] class ExecutionPagedTable(
       }
     }
 
-    <tr>
-      <td>
-        {executionUIData.executionId.toString}
-      </td>
-      <td>
-        {descriptionCell(executionUIData)}
-      </td>
-      <td sorttable_customkey={submissionTime.toString}>
-        {UIUtils.formatDate(submissionTime)}
-      </td>
-      <td sorttable_customkey={duration.toString}>
-        {UIUtils.formatDuration(duration)}
-      </td>
-      {if (showRunningJobs) {
+    def executionLinks(executionData: Seq[Long]): Seq[Node] = {
+      val details = if (executionData.nonEmpty) {
+        val onClickScript = 
"this.parentNode.parentNode.nextElementSibling.nextElementSibling" +
+          ".classList.toggle('collapsed')"
+        <span onclick={onClickScript} class="expand-details">
+          +details
+        </span>
+      } else {
+        Nil
+      }
+
+      <div>{
+        executionData.map { executionId =>
+          <a href={executionURL(executionId)}>[{executionId.toString}]</a>
+        }
+        }</div> ++ details
+    }
+
+    val baseRow: Seq[Node] = {
+      <tr>
         <td>
-          {jobLinks(executionTableRow.runningJobData)}
+          {executionUIData.executionId.toString}
         </td>
-      }}
-      {if (showSucceededJobs) {
         <td>
-          {jobLinks(executionTableRow.completedJobData)}
+          {descriptionCell(executionUIData)}
         </td>
-      }}
-      {if (showFailedJobs) {
-        <td>
-          {jobLinks(executionTableRow.failedJobData)}
+        <td sorttable_customkey={submissionTime.toString}>
+          {UIUtils.formatDate(submissionTime)}
         </td>
-      }}
-    </tr>
+        <td sorttable_customkey={duration.toString}>
+          {UIUtils.formatDuration(duration)}
+        </td>
+        {if (showRunningJobs) {
+          <td>
+            {jobLinks(executionTableRow.runningJobData)}
+          </td>
+        }}
+        {if (showSucceededJobs) {
+          <td>
+            {jobLinks(executionTableRow.completedJobData)}
+          </td>
+        }}
+        {if (showFailedJobs) {
+          <td>
+            {jobLinks(executionTableRow.failedJobData)}
+          </td>
+        }}
+        {if (showSubExecutions) {
+          <td>
+            
{executionLinks(executionTableRow.subExecutionData.map(_.executionUIData.executionId))}
+          </td>
+        }}
+      </tr>
+    }
+
+    val subRow: Seq[Node] = {if (executionTableRow.subExecutionData.nonEmpty) {
+      <tr></tr>
+        <tr class="sub-execution-list collapsed">
+          <td></td>
+          <td colspan={s"${headerInfo.length - 1}"}>
+            <table class="table table-bordered table-sm 
table-cell-width-limited">
+              <thead>
+                <tr>
+                  {headerInfo.dropRight(1).map(info => <th>{info._1}</th>)}
+                </tr>
+              </thead>
+              <tbody>
+                {
+                executionTableRow.subExecutionData.map { rowData =>

Review Comment:
   If you don't mind, shall we follow one of the previous indentation styles, 
please?
   **Style 1:**
   
https://github.com/apache/spark/blob/6aac6428aae89915c5634b6a9659aff3d450f173/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala#L132-L140
   
   **Style 2:**
   
https://github.com/apache/spark/blob/6aac6428aae89915c5634b6a9659aff3d450f173/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala#L316-L320



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala:
##########
@@ -250,32 +283,39 @@ private[ui] class ExecutionPagedTable(
   override def goButtonFormPath: String =
     
s"$parameterPath&$executionTag.sort=$encodedSortColumn&$executionTag.desc=$desc#$tableHeaderId"
 
-  override def headers: Seq[Node] = {
-    // Information for each header: title, sortable, tooltip
-    val executionHeadersAndCssClasses: Seq[(String, Boolean, Option[String])] =
-      Seq(
-        ("ID", true, None),
-        ("Description", true, None),
-        ("Submitted", true, None),
-        ("Duration", true, Some("Time from query submission to completion (or 
if still executing," +
-          "time since submission)"))) ++ {
-        if (showRunningJobs && showSucceededJobs && showFailedJobs) {
-          Seq(
-            ("Running Job IDs", true, None),
-            ("Succeeded Job IDs", true, None),
-            ("Failed Job IDs", true, None))
-        } else if (showSucceededJobs && showFailedJobs) {
-          Seq(
-            ("Succeeded Job IDs", true, None),
-            ("Failed Job IDs", true, None))
-        } else {
-          Seq(("Job IDs", true, None))
-        }
+  // Information for each header: title, sortable, tooltip
+  private val headerInfo: Seq[(String, Boolean, Option[String])] = {
+    Seq(
+      ("ID", true, None),
+      ("Description", true, None),
+      ("Submitted", true, None),
+      ("Duration", true, Some("Time from query submission to completion (or if 
still executing," +
+        "time since submission)"))) ++ {

Review Comment:
   We need a space. `"time` -> `" time`.



##########
core/src/main/scala/org/apache/spark/internal/config/UI.scala:
##########
@@ -229,4 +229,11 @@ private[spark] object UI {
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .createWithDefault("LOCAL")
+
+  val UI_SQL_GROUP_SUB_EXECUTION_ENABLED = 
ConfigBuilder("spark.ui.sql.group.sub.execution.enabled")
+    .doc("Whether to group sub executions together in SQL UI when they belong 
to the same " +
+      "root execution")
+    .version("3.4.0")
+    .booleanConf
+    .createWithDefault(false)

Review Comment:
   It's okay to enable this by default. Please update the PR.



##########
core/src/main/scala/org/apache/spark/internal/config/UI.scala:
##########
@@ -229,4 +229,11 @@ private[spark] object UI {
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .createWithDefault("LOCAL")
+
+  val UI_SQL_GROUP_SUB_EXECUTION_ENABLED = 
ConfigBuilder("spark.ui.sql.group.sub.execution.enabled")

Review Comment:
   This PR introduces 4 config namespace groups like the following. Shall we 
simplify the config namespace?
   ```
   spark.ui.sql.*
   spark.ui.sql.group.*
   spark.ui.sql.group.sub.*
   spark.ui.sql.group.sub.execution.*
   ```



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala:
##########
@@ -55,6 +56,28 @@ object SQLExecution {
     }
   }
 
+  /**
+   * Track the "root" SQL Execution Id for nested/sub queries.
+   * For the root execution, rootExecutionId == executionId.
+   */
+  private def setRootExecutionId(sc: SparkContext, executionId: String): Unit 
= {
+    // The current execution is the root execution if the root execution ID is 
null
+    if (sc.getLocalProperty(EXECUTION_ROOT_ID_KEY) == null) {
+      sc.setLocalProperty(EXECUTION_ROOT_ID_KEY, executionId)
+    }
+  }
+
+  /**
+   * Unset the "root" SQL Execution Id once the "root" SQL execution completes.
+   */
+  private def unsetRootExecutionId(sc: SparkContext, executionId: String): 
Unit = {

Review Comment:
   Do we have any other usage for this methods, `setRootExecutionId` and 
`unsetRootExecutionId`? These methods seem to be used once.



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala:
##########
@@ -290,35 +330,112 @@ private[ui] class ExecutionPagedTable(
       }
     }
 
-    <tr>
-      <td>
-        {executionUIData.executionId.toString}
-      </td>
-      <td>
-        {descriptionCell(executionUIData)}
-      </td>
-      <td sorttable_customkey={submissionTime.toString}>
-        {UIUtils.formatDate(submissionTime)}
-      </td>
-      <td sorttable_customkey={duration.toString}>
-        {UIUtils.formatDuration(duration)}
-      </td>
-      {if (showRunningJobs) {
+    def executionLinks(executionData: Seq[Long]): Seq[Node] = {
+      val details = if (executionData.nonEmpty) {
+        val onClickScript = 
"this.parentNode.parentNode.nextElementSibling.nextElementSibling" +
+          ".classList.toggle('collapsed')"
+        <span onclick={onClickScript} class="expand-details">
+          +details
+        </span>
+      } else {
+        Nil
+      }
+
+      <div>{
+        executionData.map { executionId =>
+          <a href={executionURL(executionId)}>[{executionId.toString}]</a>
+        }
+        }</div> ++ details
+    }
+
+    val baseRow: Seq[Node] = {
+      <tr>
         <td>
-          {jobLinks(executionTableRow.runningJobData)}
+          {executionUIData.executionId.toString}
         </td>
-      }}
-      {if (showSucceededJobs) {
         <td>
-          {jobLinks(executionTableRow.completedJobData)}
+          {descriptionCell(executionUIData)}
         </td>
-      }}
-      {if (showFailedJobs) {
-        <td>
-          {jobLinks(executionTableRow.failedJobData)}
+        <td sorttable_customkey={submissionTime.toString}>
+          {UIUtils.formatDate(submissionTime)}
         </td>
-      }}
-    </tr>
+        <td sorttable_customkey={duration.toString}>
+          {UIUtils.formatDuration(duration)}
+        </td>
+        {if (showRunningJobs) {
+          <td>
+            {jobLinks(executionTableRow.runningJobData)}
+          </td>
+        }}
+        {if (showSucceededJobs) {
+          <td>
+            {jobLinks(executionTableRow.completedJobData)}
+          </td>
+        }}
+        {if (showFailedJobs) {
+          <td>
+            {jobLinks(executionTableRow.failedJobData)}
+          </td>
+        }}
+        {if (showSubExecutions) {
+          <td>
+            
{executionLinks(executionTableRow.subExecutionData.map(_.executionUIData.executionId))}
+          </td>
+        }}
+      </tr>
+    }
+
+    val subRow: Seq[Node] = {if (executionTableRow.subExecutionData.nonEmpty) {
+      <tr></tr>
+        <tr class="sub-execution-list collapsed">

Review Comment:
   This `<tr>` doesn't need additional indentation here. Could you align the 
indentation with the previous `<tr>` at line 389?



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala:
##########
@@ -290,35 +330,112 @@ private[ui] class ExecutionPagedTable(
       }
     }
 
-    <tr>
-      <td>
-        {executionUIData.executionId.toString}
-      </td>
-      <td>
-        {descriptionCell(executionUIData)}
-      </td>
-      <td sorttable_customkey={submissionTime.toString}>
-        {UIUtils.formatDate(submissionTime)}
-      </td>
-      <td sorttable_customkey={duration.toString}>
-        {UIUtils.formatDuration(duration)}
-      </td>
-      {if (showRunningJobs) {
+    def executionLinks(executionData: Seq[Long]): Seq[Node] = {
+      val details = if (executionData.nonEmpty) {
+        val onClickScript = 
"this.parentNode.parentNode.nextElementSibling.nextElementSibling" +
+          ".classList.toggle('collapsed')"
+        <span onclick={onClickScript} class="expand-details">
+          +details
+        </span>
+      } else {
+        Nil
+      }
+
+      <div>{
+        executionData.map { executionId =>
+          <a href={executionURL(executionId)}>[{executionId.toString}]</a>
+        }
+        }</div> ++ details

Review Comment:
   indentation?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] dongjoon-hyun commented on a diff in pull request #39268: [SPARK-41752][SQL][UI] Group nested executions under the root execution

Reply via email to