This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 2a19c29 [SPARK-38656][UI][PYTHON] Show options for Pandas API on Spark in UI 2a19c29 is described below commit 2a19c29aac76105fb06b09f26b84e58361715e22 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Mar 29 12:00:22 2022 +0900 [SPARK-38656][UI][PYTHON] Show options for Pandas API on Spark in UI ### What changes were proposed in this pull request? This PR proposes to show options for Pandas API on Spark in UI. The options in Pandas API on Spark (https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#available-options) are currently not shown properly in "SQL Properties". It is shown, under "SQL Properties", as something like `pandas_on_Spark.compute.ops_on_diff_frames` as a key and `false` as its value that is the format internally used. This PR extracts pandas-on-Spark specific options and show separately. Additionally, this PR also proposes to hide "SQL Properties" is none of configurations set. ### Why are the changes needed? For better readability and UX for pandas API on Spark. ### Does this PR introduce _any_ user-facing change? Yes. Now, we're able to show pandas-on-Spark specific options under "Pandas API on Spark: Properties" as below: <img width="893" alt="Screen Shot 2022-03-28 at 9 31 54 AM" src="https://user-images.githubusercontent.com/6477701/160308224-3e24ce1d-c422-4271-8e71-daaca3ac600d.png"> <img width="875" alt="Screen Shot 2022-03-28 at 9 31 20 AM" src="https://user-images.githubusercontent.com/6477701/160308226-8b07a0d6-ad31-4774-b5ec-37664d7fe5dd.png"> ### How was this patch tested? Manually tested as above. Closes #35972 from HyukjinKwon/SPARK-38656. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 94abcd7037b05ac5068ce421e07306d45e957246) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../spark/sql/execution/ui/ExecutionPage.scala | 53 +++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala index 6aacec3..5734760 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala @@ -21,12 +21,18 @@ import javax.servlet.http.HttpServletRequest import scala.xml.Node +import org.json4s.JNull +import org.json4s.JsonAST.{JBool, JString} +import org.json4s.jackson.JsonMethods.parse + import org.apache.spark.JobExecutionStatus import org.apache.spark.internal.Logging import org.apache.spark.ui.{UIUtils, WebUIPage} class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging { + private val pandasOnSparkConfPrefix = "pandas_on_Spark." + private val sqlStore = parent.sqlStore override def render(request: HttpServletRequest): Seq[Node] = { @@ -82,7 +88,11 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging summary ++ planVisualization(request, metrics, graph) ++ physicalPlanDescription(executionUIData.physicalPlanDescription) ++ - modifiedConfigs(executionUIData.modifiedConfigs) + modifiedConfigs( + executionUIData.modifiedConfigs.filterKeys( + !_.startsWith(pandasOnSparkConfPrefix)).toMap) ++ + modifiedPandasOnSparkConfigs( + executionUIData.modifiedConfigs.filterKeys(_.startsWith(pandasOnSparkConfPrefix)).toMap) }.getOrElse { <div>No information to display for query {executionId}</div> } @@ -148,6 +158,8 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging } private def modifiedConfigs(modifiedConfigs: Map[String, String]): Seq[Node] = { + if (Option(modifiedConfigs).exists(_.isEmpty)) return Nil + val configs = UIUtils.listingTable( propertyHeader, propertyRow, @@ -168,6 +180,45 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging <br/> } + private def modifiedPandasOnSparkConfigs( + modifiedPandasOnSparkConfigs: Map[String, String]): Seq[Node] = { + if (Option(modifiedPandasOnSparkConfigs).exists(_.isEmpty)) return Nil + + val modifiedOptions = modifiedPandasOnSparkConfigs.toSeq.map { case (k, v) => + // Remove prefix. + val key = k.slice(pandasOnSparkConfPrefix.length, k.length) + // The codes below is a simple version of Python's repr(). + // Pandas API on Spark does not support other types in the options yet. + val pyValue = parse(v) match { + case JNull => "None" + case JBool(v) => v.toString.capitalize + case JString(s) => s"'$s'" + case _ => v + } + (key, pyValue) + } + + val configs = UIUtils.listingTable( + propertyHeader, + propertyRow, + modifiedOptions.sorted, + fixedWidth = true + ) + + <div> + <span class="collapse-pandas-on-spark-properties collapse-table" + onClick="collapseTable('collapse-pandas-on-spark-properties', + 'pandas-on-spark-properties')"> + <span class="collapse-table-arrow arrow-closed"></span> + <a>Pandas API Properties</a> + </span> + <div class="pandas-on-spark-properties collapsible-table collapsed"> + {configs} + </div> + </div> + <br/> + } + private def propertyHeader = Seq("Name", "Value") private def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr> } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org