This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 54205621560 [SPARK-39889][SQL] Use different error classes for numeric/interval divided by 0 54205621560 is described below commit 5420562156025a657d255151aff4101c9f38e905 Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Thu Jul 28 09:23:20 2022 +0500 [SPARK-39889][SQL] Use different error classes for numeric/interval divided by 0 ### What changes were proposed in this pull request? Currently, when numbers are divided by 0 under ANSI mode, the error message is like ``` [DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "ansi_mode" to "false" (except for ANSI interval type) to bypass this error. ``` The "(except for ANSI interval type)" part is confusing. We should remove it for the numeric arithmetic operations and have a new error class for the interval division error: "INTERVAL_DIVIDED_BY_ZERO" ### Why are the changes needed? For better error messages ### Does this PR introduce _any_ user-facing change? Yes, Use different error classes for numeric/interval divided by 0. After changes, the error messages are simpler and more clear. ### How was this patch tested? UT Closes #37313 from gengliangwang/fixDivideByZero. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 8 +++++++- .../test/scala/org/apache/spark/SparkThrowableSuite.scala | 2 +- .../spark/sql/catalyst/expressions/intervalExpressions.scala | 12 ++++++++---- .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala | 2 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 8 ++++++++ .../test/resources/sql-tests/results/ansi/interval.sql.out | 4 ++-- .../src/test/resources/sql-tests/results/interval.sql.out | 4 ++-- .../test/resources/sql-tests/results/postgreSQL/int8.sql.out | 6 +++--- .../sql-tests/results/postgreSQL/select_having.sql.out | 2 +- .../results/udf/postgreSQL/udf-select_having.sql.out | 2 +- .../spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala | 12 +++++++++++- 11 files changed, 45 insertions(+), 17 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 9d35b1a1a69..c4b59799f88 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -78,7 +78,7 @@ }, "DIVIDE_BY_ZERO" : { "message" : [ - "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set <config> to \"false\" (except for ANSI interval type) to bypass this error." + "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set <config> to \"false\" to bypass this error." ], "sqlState" : "22012" }, @@ -210,6 +210,12 @@ "<message>" ] }, + "INTERVAL_DIVIDED_BY_ZERO" : { + "message" : [ + "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead." + ], + "sqlState" : "22012" + }, "INVALID_ARRAY_INDEX" : { "message" : [ "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use `try_element_at` and increase the array index by 1(the starting array index is 1 for `try_element_at`) to tolerate accessing element at invalid index and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error." diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala index 4b14e2402a7..76d7e3048d7 100644 --- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala @@ -168,7 +168,7 @@ class SparkThrowableSuite extends SparkFunSuite { "[DIVIDE_BY_ZERO] Division by zero. " + "Use `try_divide` to tolerate divisor being 0 and return NULL instead. " + "If necessary set foo to \"false\" " + - "(except for ANSI interval type) to bypass this error.") + "to bypass this error.") } test("Error message is formatted") { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala index 0a275d0760f..17a2714c611 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala @@ -617,18 +617,22 @@ trait IntervalDivide { num: Any, context: Option[SQLQueryContext]): Unit = dataType match { case _: DecimalType => - if (num.asInstanceOf[Decimal].isZero) throw QueryExecutionErrors.divideByZeroError(context) - case _ => if (num == 0) throw QueryExecutionErrors.divideByZeroError(context) + if (num.asInstanceOf[Decimal].isZero) { + throw QueryExecutionErrors.intervalDividedByZeroError(context) + } + case _ => if (num == 0) throw QueryExecutionErrors.intervalDividedByZeroError(context) } def divideByZeroCheckCodegen( dataType: DataType, value: String, errorContextReference: String): String = dataType match { + // scalastyle:off line.size.limit case _: DecimalType => - s"if ($value.isZero()) throw QueryExecutionErrors.divideByZeroError($errorContextReference);" + s"if ($value.isZero()) throw QueryExecutionErrors.intervalDividedByZeroError($errorContextReference);" case _ => - s"if ($value == 0) throw QueryExecutionErrors.divideByZeroError($errorContextReference);" + s"if ($value == 0) throw QueryExecutionErrors.intervalDividedByZeroError($errorContextReference);" + // scalastyle:on line.size.limit } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index de486157cbb..b4695062c08 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -733,7 +733,7 @@ object IntervalUtils { * @throws ArithmeticException if the result overflows any field value or divided by zero */ def divideExact(interval: CalendarInterval, num: Double): CalendarInterval = { - if (num == 0) throw QueryExecutionErrors.divideByZeroError(None) + if (num == 0) throw QueryExecutionErrors.intervalDividedByZeroError(None) fromDoubles(interval.months / num, interval.days / num, interval.microseconds / num) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 80918a9d8ba..35a40ce684f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -204,6 +204,14 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { summary = getSummary(context)) } + def intervalDividedByZeroError(context: Option[SQLQueryContext]): ArithmeticException = { + new SparkArithmeticException( + errorClass = "INTERVAL_DIVIDED_BY_ZERO", + messageParameters = Array.empty, + context = context, + summary = getSummary(context)) + } + def invalidArrayIndexError( index: Int, numElements: Int, diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index d79cc37ab78..e96ab297d2a 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -225,7 +225,7 @@ select interval '2 seconds' / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[INTERVAL_DIVIDED_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. == SQL(line 1, position 8) == select interval '2 seconds' / 0 ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -261,7 +261,7 @@ select interval '2' year / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[INTERVAL_DIVIDED_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. == SQL(line 1, position 8) == select interval '2' year / 0 ^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 652e1e724b9..53172283d12 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -201,7 +201,7 @@ select interval '2 seconds' / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[INTERVAL_DIVIDED_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. == SQL(line 1, position 8) == select interval '2 seconds' / 0 ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -237,7 +237,7 @@ select interval '2' year / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[INTERVAL_DIVIDED_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. == SQL(line 1, position 8) == select interval '2' year / 0 ^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index 8da1a1ca139..664263ee8e7 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -647,7 +647,7 @@ select bigint('9223372036854775800') / bigint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select bigint('9223372036854775800') / bigint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -659,7 +659,7 @@ select bigint('-9223372036854775808') / smallint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select bigint('-9223372036854775808') / smallint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -671,7 +671,7 @@ select smallint('100') / bigint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select smallint('100') / bigint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out index 4b8945033df..87e0abb285d 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out @@ -174,7 +174,7 @@ SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2 struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 40) == ...1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2 ^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out index 5e7a9b96a82..a16887457c9 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out @@ -174,7 +174,7 @@ SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2 struct<> -- !query output org.apache.spark.SparkArithmeticException -[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 40) == ...1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2 ^^^^^^^^ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala index 8d7359e449d..36349c5e1f2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala @@ -48,7 +48,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase msg = "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. " + "If necessary set " + - s"""$ansiConf to "false" (except for ANSI interval type) to bypass this error.""" + + s"""$ansiConf to "false" to bypass this error.""" + """ |== SQL(line 1, position 8) == |select 6/0 @@ -57,6 +57,16 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase sqlState = Some("22012")) } + test("INTERVAL_DIVIDED_BY_ZERO: interval divided by zero") { + checkError( + exception = intercept[SparkArithmeticException] { + sql("select interval 1 day / 0").collect() + }, + errorClass = "INTERVAL_DIVIDED_BY_ZERO", + parameters = Map.empty + ) + } + test("INVALID_FRACTION_OF_SECOND: in the function make_timestamp") { checkError( exception = intercept[SparkDateTimeException] { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org