(spark) branch master updated: [SPARK-45841][SQL] Expose stack trace by `DataFrameQueryContext`

maxgekk Wed, 08 Nov 2023 04:52:49 -0800

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 6abc4a1a58ef [SPARK-45841][SQL] Expose stack trace by 
`DataFrameQueryContext`
6abc4a1a58ef is described below

commit 6abc4a1a58ef4e5d896717b10b2314dae2af78af
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Wed Nov 8 15:51:50 2023 +0300

    [SPARK-45841][SQL] Expose stack trace by `DataFrameQueryContext`
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to change the case class `DataFrameQueryContext`, and 
add stack traces as a field and override `callSite`, `fragment` using the new 
field `stackTrace`.
    
    ### Why are the changes needed?
    By exposing the stack trace, we give users opportunity to see all stack 
traces needed for debugging.
    
    ### Does this PR introduce _any_ user-facing change?
    No, `DataFrameQueryContext` hasn't been released yet.
    
    ### How was this patch tested?
    By running the modified test suite:
    ```
    $ build/sbt "test:testOnly *DatasetSuite"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #43703 from MaxGekk/stack-traces-in-DataFrameQueryContext.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../spark/sql/catalyst/trees/QueryContexts.scala   | 33 +++++++++-------------
 .../scala/org/apache/spark/sql/DatasetSuite.scala  | 13 +++++----
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
index 8d885d07ca8b..874c834b7558 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
@@ -134,9 +134,7 @@ case class SQLQueryContext(
   override def callSite: String = throw new UnsupportedOperationException
 }
 
-case class DataFrameQueryContext(
-    override val fragment: String,
-    override val callSite: String) extends QueryContext {
+case class DataFrameQueryContext(stackTrace: Seq[StackTraceElement]) extends 
QueryContext {
   override val contextType = QueryContextType.DataFrame
 
   override def objectType: String = throw new UnsupportedOperationException
@@ -144,6 +142,19 @@ case class DataFrameQueryContext(
   override def startIndex: Int = throw new UnsupportedOperationException
   override def stopIndex: Int = throw new UnsupportedOperationException
 
+  override val fragment: String = {
+    stackTrace.headOption.map { firstElem =>
+      val methodName = firstElem.getMethodName
+      if (methodName.length > 1 && methodName(0) == '$') {
+        methodName.substring(1)
+      } else {
+        methodName
+      }
+    }.getOrElse("")
+  }
+
+  override val callSite: String = 
stackTrace.tail.headOption.map(_.toString).getOrElse("")
+
   override lazy val summary: String = {
     val builder = new StringBuilder
     builder ++= "== DataFrame ==\n"
@@ -157,19 +168,3 @@ case class DataFrameQueryContext(
     builder.result()
   }
 }
-
-object DataFrameQueryContext {
-  def apply(elements: Array[StackTraceElement]): DataFrameQueryContext = {
-    val fragment = elements.headOption.map { firstElem =>
-      val methodName = firstElem.getMethodName
-      if (methodName.length > 1 && methodName(0) == '$') {
-        methodName.substring(1)
-      } else {
-        methodName
-      }
-    }.getOrElse("")
-    val callSite = elements.tail.headOption.map(_.toString).getOrElse("")
-
-    DataFrameQueryContext(fragment, callSite)
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 66105d2ac429..dcbd8948120c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -37,6 +37,7 @@ import 
org.apache.spark.sql.catalyst.encoders.{AgnosticEncoders, ExpressionEncod
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedIntEncoder
 import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, 
GenericRowWithSchema}
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
+import org.apache.spark.sql.catalyst.trees.DataFrameQueryContext
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
@@ -2668,16 +2669,18 @@ class DatasetSuite extends QueryTest
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       val df = Seq(1).toDS()
       var callSitePattern: String = null
+      val exception = intercept[AnalysisException] {
+        callSitePattern = getNextLineCallSitePattern()
+        val c = col("a")
+        df.select(c)
+      }
       checkError(
-        exception = intercept[AnalysisException] {
-          callSitePattern = getNextLineCallSitePattern()
-          val c = col("a")
-          df.select(c)
-        },
+        exception,
         errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = "42703",
         parameters = Map("objectName" -> "`a`", "proposal" -> "`value`"),
         context = ExpectedContext(fragment = "col", callSitePattern = 
callSitePattern))
+      
assert(exception.context.head.asInstanceOf[DataFrameQueryContext].stackTrace.length
 == 2)
     }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-45841][SQL] Expose stack trace by `DataFrameQueryContext`

Reply via email to