This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d30c9a90c6cf [SPARK-45826][SQL] Add a SQL config for stack traces in DataFrame query context d30c9a90c6cf is described below commit d30c9a90c6cf9033c45f6f418864c8d7013911e5 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Sun Nov 26 14:10:27 2023 +0100 [SPARK-45826][SQL] Add a SQL config for stack traces in DataFrame query context ### What changes were proposed in this pull request? In the PR, I propose to add new SQL config `spark.sql.stackTracesInDataFrameContext` which defines how many non-Spark stack traces should be captured into DataFrame query context. By default, the config is set to 1. ### Why are the changes needed? To improve user experience with Spark SQL. When users troubleshoot an issue, they might need more stack traces in the DataFrame context. For example: ```scala scala> spark.conf.set("spark.sql.ansi.enabled", true) scala> spark.conf.set("spark.sql.stackTracesInDataFrameContext", 3) scala> spark.range(1).select(lit(1) / lit(0)).collect() org.apache.spark.SparkArithmeticException: [DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22012 == DataFrame == "div" was called from <init>(<console>:1) <init>(<console>:16) .<clinit>(<console>:1) ``` ### Does this PR introduce _any_ user-facing change? No, it doesn't change the default behaviour. ### How was this patch tested? By running the modified test suite: ``` $ build/sbt "test:testOnly *QueryContextSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43695 from MaxGekk/df-context-slice-conf-2. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 9 +++++++++ sql/core/src/main/scala/org/apache/spark/sql/package.scala | 5 ++++- .../scala/org/apache/spark/sql/errors/QueryContextSuite.scala | 7 +++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 6a8e1f92fc51..5133c40bc6fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -4577,6 +4577,13 @@ object SQLConf { .booleanConf .createWithDefault(false) + val STACK_TRACES_IN_DATAFRAME_CONTEXT = buildConf("spark.sql.stackTracesInDataFrameContext") + .doc("The number of non-Spark stack traces in the captured DataFrame query context.") + .version("4.0.0") + .intConf + .checkValue(_ > 0, "The number of stack traces in the DataFrame context must be positive.") + .createWithDefault(1) + /** * Holds information about keys that have been deprecated. * @@ -5465,6 +5472,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def legacyRaiseErrorWithoutErrorClass: Boolean = getConf(SQLConf.LEGACY_RAISE_ERROR_WITHOUT_ERROR_CLASS) + def stackTracesInDataFrameContext: Int = getConf(SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala index 96bef83af0a8..877d9906a1cf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala @@ -22,6 +22,7 @@ import java.util.regex.Pattern import org.apache.spark.annotation.{DeveloperApi, Unstable} import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} import org.apache.spark.sql.execution.SparkStrategy +import org.apache.spark.sql.internal.SQLConf /** * Allows the execution of relational queries, including those expressed in SQL using Spark. @@ -103,7 +104,9 @@ package object sql { while (i < st.length && !sparkCode(st(i))) i += 1 // Stop at the end of the first Spark code traces while (i < st.length && sparkCode(st(i))) i += 1 - val origin = Origin(stackTrace = Some(st.slice(i - 1, i + 1))) + val origin = Origin(stackTrace = Some(st.slice( + from = i - 1, + until = i + SQLConf.get.stackTracesInDataFrameContext))) CurrentOrigin.withOrigin(origin)(f) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala index 7d57eeb01bfa..426822da3c91 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala @@ -25,14 +25,17 @@ import org.apache.spark.sql.test.SharedSparkSession class QueryContextSuite extends QueryTest with SharedSparkSession { test("summary of DataFrame context") { - withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + withSQLConf( + SQLConf.ANSI_ENABLED.key -> "true", + SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT.key -> "2") { val e = intercept[SparkArithmeticException] { spark.range(1).select(lit(1) / lit(0)).collect() } assert(e.getQueryContext.head.summary() == """== DataFrame == |"div" was called from - |org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:30) + |org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:32) + |org.scalatest.Assertions.intercept(Assertions.scala:749) |""".stripMargin) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org