(spark) branch master updated: [SPARK-45826][SQL] Add a SQL config for stack traces in DataFrame query context

maxgekk Sun, 26 Nov 2023 05:10:49 -0800

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new d30c9a90c6cf [SPARK-45826][SQL] Add a SQL config for stack traces in 
DataFrame query context
d30c9a90c6cf is described below

commit d30c9a90c6cf9033c45f6f418864c8d7013911e5
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Sun Nov 26 14:10:27 2023 +0100

    [SPARK-45826][SQL] Add a SQL config for stack traces in DataFrame query 
context
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to add new SQL config 
`spark.sql.stackTracesInDataFrameContext` which defines how many non-Spark 
stack traces should be captured into DataFrame query context. By default, the 
config is set to 1.
    
    ### Why are the changes needed?
    To improve user experience with Spark SQL. When users troubleshoot an 
issue, they might need more stack traces in the DataFrame context. For example:
    ```scala
    scala> spark.conf.set("spark.sql.ansi.enabled", true)
    scala> spark.conf.set("spark.sql.stackTracesInDataFrameContext", 3)
    scala> spark.range(1).select(lit(1) / lit(0)).collect()
    org.apache.spark.SparkArithmeticException: [DIVIDE_BY_ZERO] Division by 
zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If 
necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. 
SQLSTATE: 22012
    == DataFrame ==
    "div" was called from
    <init>(<console>:1)
    <init>(<console>:16)
    .<clinit>(<console>:1)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    No, it doesn't change the default behaviour.
    
    ### How was this patch tested?
    By running the modified test suite:
    ```
    $ build/sbt "test:testOnly *QueryContextSuite"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #43695 from MaxGekk/df-context-slice-conf-2.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala   | 9 +++++++++
 sql/core/src/main/scala/org/apache/spark/sql/package.scala       | 5 ++++-
 .../scala/org/apache/spark/sql/errors/QueryContextSuite.scala    | 7 +++++--
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6a8e1f92fc51..5133c40bc6fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4577,6 +4577,13 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val STACK_TRACES_IN_DATAFRAME_CONTEXT = 
buildConf("spark.sql.stackTracesInDataFrameContext")
+    .doc("The number of non-Spark stack traces in the captured DataFrame query 
context.")
+    .version("4.0.0")
+    .intConf
+    .checkValue(_ > 0, "The number of stack traces in the DataFrame context 
must be positive.")
+    .createWithDefault(1)
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -5465,6 +5472,8 @@ class SQLConf extends Serializable with Logging with 
SqlApiConf {
   def legacyRaiseErrorWithoutErrorClass: Boolean =
     getConf(SQLConf.LEGACY_RAISE_ERROR_WITHOUT_ERROR_CLASS)
 
+  def stackTracesInDataFrameContext: Int = 
getConf(SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index 96bef83af0a8..877d9906a1cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -22,6 +22,7 @@ import java.util.regex.Pattern
 import org.apache.spark.annotation.{DeveloperApi, Unstable}
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.execution.SparkStrategy
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Allows the execution of relational queries, including those expressed in 
SQL using Spark.
@@ -103,7 +104,9 @@ package object sql {
       while (i < st.length && !sparkCode(st(i))) i += 1
       // Stop at the end of the first Spark code traces
       while (i < st.length && sparkCode(st(i))) i += 1
-      val origin = Origin(stackTrace = Some(st.slice(i - 1, i + 1)))
+      val origin = Origin(stackTrace = Some(st.slice(
+        from = i - 1,
+        until = i + SQLConf.get.stackTracesInDataFrameContext)))
       CurrentOrigin.withOrigin(origin)(f)
     }
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
index 7d57eeb01bfa..426822da3c91 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
@@ -25,14 +25,17 @@ import org.apache.spark.sql.test.SharedSparkSession
 class QueryContextSuite extends QueryTest with SharedSparkSession {
 
   test("summary of DataFrame context") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+    withSQLConf(
+      SQLConf.ANSI_ENABLED.key -> "true",
+      SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT.key -> "2") {
       val e = intercept[SparkArithmeticException] {
         spark.range(1).select(lit(1) / lit(0)).collect()
       }
       assert(e.getQueryContext.head.summary() ==
         """== DataFrame ==
           |"div" was called from
-          
|org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:30)
+          
|org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:32)
+          |org.scalatest.Assertions.intercept(Assertions.scala:749)
           |""".stripMargin)
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-45826][SQL] Add a SQL config for stack traces in DataFrame query context

Reply via email to