This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d30c9a90c6cf [SPARK-45826][SQL] Add a SQL config for stack traces in
DataFrame query context
d30c9a90c6cf is described below
commit d30c9a90c6cf9033c45f6f418864c8d7013911e5
Author: Max Gekk <[email protected]>
AuthorDate: Sun Nov 26 14:10:27 2023 +0100
[SPARK-45826][SQL] Add a SQL config for stack traces in DataFrame query
context
### What changes were proposed in this pull request?
In the PR, I propose to add new SQL config
`spark.sql.stackTracesInDataFrameContext` which defines how many non-Spark
stack traces should be captured into DataFrame query context. By default, the
config is set to 1.
### Why are the changes needed?
To improve user experience with Spark SQL. When users troubleshoot an
issue, they might need more stack traces in the DataFrame context. For example:
```scala
scala> spark.conf.set("spark.sql.ansi.enabled", true)
scala> spark.conf.set("spark.sql.stackTracesInDataFrameContext", 3)
scala> spark.range(1).select(lit(1) / lit(0)).collect()
org.apache.spark.SparkArithmeticException: [DIVIDE_BY_ZERO] Division by
zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If
necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
SQLSTATE: 22012
== DataFrame ==
"div" was called from
<init>(<console>:1)
<init>(<console>:16)
.<clinit>(<console>:1)
```
### Does this PR introduce _any_ user-facing change?
No, it doesn't change the default behaviour.
### How was this patch tested?
By running the modified test suite:
```
$ build/sbt "test:testOnly *QueryContextSuite"
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43695 from MaxGekk/df-context-slice-conf-2.
Authored-by: Max Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 9 +++++++++
sql/core/src/main/scala/org/apache/spark/sql/package.scala | 5 ++++-
.../scala/org/apache/spark/sql/errors/QueryContextSuite.scala | 7 +++++--
3 files changed, 18 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6a8e1f92fc51..5133c40bc6fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4577,6 +4577,13 @@ object SQLConf {
.booleanConf
.createWithDefault(false)
+ val STACK_TRACES_IN_DATAFRAME_CONTEXT =
buildConf("spark.sql.stackTracesInDataFrameContext")
+ .doc("The number of non-Spark stack traces in the captured DataFrame query
context.")
+ .version("4.0.0")
+ .intConf
+ .checkValue(_ > 0, "The number of stack traces in the DataFrame context
must be positive.")
+ .createWithDefault(1)
+
/**
* Holds information about keys that have been deprecated.
*
@@ -5465,6 +5472,8 @@ class SQLConf extends Serializable with Logging with
SqlApiConf {
def legacyRaiseErrorWithoutErrorClass: Boolean =
getConf(SQLConf.LEGACY_RAISE_ERROR_WITHOUT_ERROR_CLASS)
+ def stackTracesInDataFrameContext: Int =
getConf(SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT)
+
/** ********************** SQLConf functionality methods ************ */
/** Set Spark SQL configuration properties. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index 96bef83af0a8..877d9906a1cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -22,6 +22,7 @@ import java.util.regex.Pattern
import org.apache.spark.annotation.{DeveloperApi, Unstable}
import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
import org.apache.spark.sql.execution.SparkStrategy
+import org.apache.spark.sql.internal.SQLConf
/**
* Allows the execution of relational queries, including those expressed in
SQL using Spark.
@@ -103,7 +104,9 @@ package object sql {
while (i < st.length && !sparkCode(st(i))) i += 1
// Stop at the end of the first Spark code traces
while (i < st.length && sparkCode(st(i))) i += 1
- val origin = Origin(stackTrace = Some(st.slice(i - 1, i + 1)))
+ val origin = Origin(stackTrace = Some(st.slice(
+ from = i - 1,
+ until = i + SQLConf.get.stackTracesInDataFrameContext)))
CurrentOrigin.withOrigin(origin)(f)
}
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
index 7d57eeb01bfa..426822da3c91 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
@@ -25,14 +25,17 @@ import org.apache.spark.sql.test.SharedSparkSession
class QueryContextSuite extends QueryTest with SharedSparkSession {
test("summary of DataFrame context") {
- withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+ withSQLConf(
+ SQLConf.ANSI_ENABLED.key -> "true",
+ SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT.key -> "2") {
val e = intercept[SparkArithmeticException] {
spark.range(1).select(lit(1) / lit(0)).collect()
}
assert(e.getQueryContext.head.summary() ==
"""== DataFrame ==
|"div" was called from
-
|org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:30)
+
|org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:32)
+ |org.scalatest.Assertions.intercept(Assertions.scala:749)
|""".stripMargin)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]