Github user brkyvz commented on a diff in the pull request:
https://github.com/apache/spark/pull/22674#discussion_r224000809
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala ---
@@ -71,14 +72,35 @@ object SQLExecution {
val callSite = sc.getCallSite()
withSQLConfPropagated(sparkSession) {
- sc.listenerBus.post(SparkListenerSQLExecutionStart(
- executionId, callSite.shortForm, callSite.longForm,
queryExecution.toString,
- SparkPlanInfo.fromSparkPlan(queryExecution.executedPlan),
System.currentTimeMillis()))
+ var ex: Option[Exception] = None
+ val startTime = System.currentTimeMillis()
try {
+ sc.listenerBus.post(SparkListenerSQLExecutionStart(
+ executionId = executionId,
+ description = callSite.shortForm,
+ details = callSite.longForm,
+ physicalPlanDescription = queryExecution.toString,
+ // `queryExecution.executedPlan` triggers query planning. If
it fails, the exception
+ // will be caught and reported in the
`SparkListenerSQLExecutionEnd`
+ sparkPlanInfo =
SparkPlanInfo.fromSparkPlan(queryExecution.executedPlan),
+ time = startTime))
body
+ } catch {
+ case e: Exception =>
+ ex = Some(e)
+ throw e
} finally {
- sc.listenerBus.post(SparkListenerSQLExecutionEnd(
- executionId, System.currentTimeMillis()))
+ val endTime = System.currentTimeMillis()
+ val event = SparkListenerSQLExecutionEnd(executionId, endTime)
+ // Currently only `Dataset.withAction` and
`DataFrameWriter.runCommand` specify the `name`
+ // parameter. The `ExecutionListenerManager` only watches SQL
executions with name. We
+ // can specify the execution name in more places in the future,
so that
+ // `QueryExecutionListener` can track more cases.
+ event.executionName = name
+ event.duration = endTime - startTime
--- End diff --
duration used to be reported in nanos. Now it's millis. I would still
report it as nanos if possible.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]