This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6abc4a1a58ef [SPARK-45841][SQL] Expose stack trace by `DataFrameQueryContext` 6abc4a1a58ef is described below commit 6abc4a1a58ef4e5d896717b10b2314dae2af78af Author: Max Gekk <max.g...@gmail.com> AuthorDate: Wed Nov 8 15:51:50 2023 +0300 [SPARK-45841][SQL] Expose stack trace by `DataFrameQueryContext` ### What changes were proposed in this pull request? In the PR, I propose to change the case class `DataFrameQueryContext`, and add stack traces as a field and override `callSite`, `fragment` using the new field `stackTrace`. ### Why are the changes needed? By exposing the stack trace, we give users opportunity to see all stack traces needed for debugging. ### Does this PR introduce _any_ user-facing change? No, `DataFrameQueryContext` hasn't been released yet. ### How was this patch tested? By running the modified test suite: ``` $ build/sbt "test:testOnly *DatasetSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43703 from MaxGekk/stack-traces-in-DataFrameQueryContext. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../spark/sql/catalyst/trees/QueryContexts.scala | 33 +++++++++------------- .../scala/org/apache/spark/sql/DatasetSuite.scala | 13 +++++---- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala index 8d885d07ca8b..874c834b7558 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala @@ -134,9 +134,7 @@ case class SQLQueryContext( override def callSite: String = throw new UnsupportedOperationException } -case class DataFrameQueryContext( - override val fragment: String, - override val callSite: String) extends QueryContext { +case class DataFrameQueryContext(stackTrace: Seq[StackTraceElement]) extends QueryContext { override val contextType = QueryContextType.DataFrame override def objectType: String = throw new UnsupportedOperationException @@ -144,6 +142,19 @@ case class DataFrameQueryContext( override def startIndex: Int = throw new UnsupportedOperationException override def stopIndex: Int = throw new UnsupportedOperationException + override val fragment: String = { + stackTrace.headOption.map { firstElem => + val methodName = firstElem.getMethodName + if (methodName.length > 1 && methodName(0) == '$') { + methodName.substring(1) + } else { + methodName + } + }.getOrElse("") + } + + override val callSite: String = stackTrace.tail.headOption.map(_.toString).getOrElse("") + override lazy val summary: String = { val builder = new StringBuilder builder ++= "== DataFrame ==\n" @@ -157,19 +168,3 @@ case class DataFrameQueryContext( builder.result() } } - -object DataFrameQueryContext { - def apply(elements: Array[StackTraceElement]): DataFrameQueryContext = { - val fragment = elements.headOption.map { firstElem => - val methodName = firstElem.getMethodName - if (methodName.length > 1 && methodName(0) == '$') { - methodName.substring(1) - } else { - methodName - } - }.getOrElse("") - val callSite = elements.tail.headOption.map(_.toString).getOrElse("") - - DataFrameQueryContext(fragment, callSite) - } -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 66105d2ac429..dcbd8948120c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -37,6 +37,7 @@ import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoders, ExpressionEncod import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedIntEncoder import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, GenericRowWithSchema} import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi} +import org.apache.spark.sql.catalyst.trees.DataFrameQueryContext import org.apache.spark.sql.catalyst.util.sideBySide import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper @@ -2668,16 +2669,18 @@ class DatasetSuite extends QueryTest withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { val df = Seq(1).toDS() var callSitePattern: String = null + val exception = intercept[AnalysisException] { + callSitePattern = getNextLineCallSitePattern() + val c = col("a") + df.select(c) + } checkError( - exception = intercept[AnalysisException] { - callSitePattern = getNextLineCallSitePattern() - val c = col("a") - df.select(c) - }, + exception, errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", sqlState = "42703", parameters = Map("objectName" -> "`a`", "proposal" -> "`value`"), context = ExpectedContext(fragment = "col", callSitePattern = callSitePattern)) + assert(exception.context.head.asInstanceOf[DataFrameQueryContext].stackTrace.length == 2) } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org