maropu commented on a change in pull request #25461: [SPARK-28741][SQL]Throw
exceptions when casting to integers causes overflow
URL: https://github.com/apache/spark/pull/25461#discussion_r315008759
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
##########
@@ -1075,4 +1075,113 @@ class CastSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(cast("badvalue", dataType), null)
}
}
+
+ private def testIntMaxAndMin(dt: DataType): Unit = {
+ Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value =>
+ checkExceptionInExpression[ArithmeticException](cast(value, dt),
"overflow")
+
checkExceptionInExpression[ArithmeticException](cast(Decimal(value.toString),
dt), "overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value * MICROS_PER_SECOND, TimestampType), dt),
"overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value * 1.5f, FloatType), dt), "overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value * 1.0, DoubleType), dt), "overflow")
+ }
+ }
+
+ private def testLongMaxAndMin(dt: DataType): Unit = {
+ Seq(Decimal(Long.MaxValue) + Decimal(1), Decimal(Long.MinValue) -
Decimal(1)).foreach { value =>
+ checkExceptionInExpression[ArithmeticException](
+ cast(value, dt), "overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast((value * Decimal(1.1)).toFloat, dt), "overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast((value * Decimal(1.1)).toDouble, dt), "overflow")
+ }
+ }
+
+ test("Cast to byte with option FAIL_ON_INTEGER_OVERFLOW enabled") {
+ withSQLConf(SQLConf.FAIL_ON_INTEGER_OVERFLOW.key -> "true") {
+ testIntMaxAndMin(ByteType)
+ Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
+ checkExceptionInExpression[ArithmeticException](cast(value, ByteType),
"overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType),
"overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value.toFloat, FloatType), ByteType), "overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value.toDouble, DoubleType), ByteType), "overflow")
+ }
+
+ Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
+ checkEvaluation(cast(value, ByteType), value)
+ checkEvaluation(cast(value.toString, ByteType), value)
+ checkEvaluation(cast(Decimal(value.toString), ByteType), value)
+ checkEvaluation(cast(Literal(value * MICROS_PER_SECOND,
TimestampType), ByteType), value)
+ checkEvaluation(cast(Literal(value.toInt, DateType), ByteType), null)
+ checkEvaluation(cast(Literal(value.toFloat, FloatType), ByteType),
value)
+ checkEvaluation(cast(Literal(value.toDouble, DoubleType), ByteType),
value)
+ }
+ }
+ }
+
+ test("Cast to short with option FAIL_ON_INTEGER_OVERFLOW enabled") {
+ withSQLConf(SQLConf.FAIL_ON_INTEGER_OVERFLOW.key -> "true") {
+ testIntMaxAndMin(ShortType)
+ Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
+ checkExceptionInExpression[ArithmeticException](cast(value,
ShortType), "overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType),
"overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value.toFloat, FloatType), ShortType), "overflow")
+ checkExceptionInExpression[ArithmeticException](
+ cast(Literal(value.toDouble, DoubleType), ShortType), "overflow")
+ }
+
+ Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
+ checkEvaluation(cast(value, ShortType), value)
+ checkEvaluation(cast(value.toString, ShortType), value)
+ checkEvaluation(cast(Decimal(value.toString), ShortType), value)
+ checkEvaluation(cast(Literal(value * MICROS_PER_SECOND,
TimestampType), ShortType), value)
+ checkEvaluation(cast(Literal(value.toInt, DateType), ShortType), null)
+ checkEvaluation(cast(Literal(value.toFloat, FloatType), ShortType),
value)
+ checkEvaluation(cast(Literal(value.toDouble, DoubleType), ShortType),
value)
+ }
+ }
+ }
+
+ test("Cast to int with option FAIL_ON_INTEGER_OVERFLOW enabled") {
+ withSQLConf(SQLConf.FAIL_ON_INTEGER_OVERFLOW.key -> "true") {
+ testIntMaxAndMin(IntegerType)
+ testLongMaxAndMin(IntegerType)
+
+ Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
+ checkEvaluation(cast(value, IntegerType), value)
+ checkEvaluation(cast(value.toString, IntegerType), value)
+ checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
+ checkEvaluation(cast(Literal(value * MICROS_PER_SECOND,
TimestampType), IntegerType), value)
+ checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType),
value)
+ }
+ checkEvaluation(cast(2147483647.9D, IntegerType), 2147483647)
+ checkEvaluation(cast(-2147483648.9D, IntegerType), -2147483648)
+ }
+ }
+
+ test("Cast to long with option FAIL_ON_INTEGER_OVERFLOW enabled") {
+ withSQLConf(SQLConf.FAIL_ON_INTEGER_OVERFLOW.key -> "true") {
+ testLongMaxAndMin(LongType)
+
+ Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
+ checkEvaluation(cast(value, LongType), value)
+ checkEvaluation(cast(value.toString, LongType), value)
+ checkEvaluation(cast(Decimal(value.toString), LongType), value)
+ checkEvaluation(cast(Literal(value, TimestampType), LongType),
+ Math.floorDiv(value, MICROS_PER_SECOND))
+ }
+ checkEvaluation(cast(9223372036854775807.9f, LongType),
9223372036854775807L)
Review comment:
Ah, its ok to do it like this instead;
```
checkEvaluation(cast(9223372036854775807.9f, LongType), 9223372036854775807L)
--> non-overflow case
checkEvaluation(cast(java.lang.Math.nextUp(9223372036854775807.9f),
LongType), 9223372036854775807L)
--> overflow case
```
What I'm a little worried about is that `9223372036854775807.9f` is
implicitly truncated (to `9223372036854776000.0f`?) by a compiler because it
cannot be packed in the float IEEE754 format as you said before. So, IIUC [the
test](https://github.com/apache/spark/pull/25461/files#diff-0ced8bd3a8e8459e7f66333b0d936771R1181)
is actually the same with `cast(9223372036854776000.0f, LongType)`?
What I understand is as follows(sorted by values desc) and is this correct?
```
IEEE754 continuous float values
------------------------------------------
overflow case: 9223373136366404000.0f <---
Math.nextUp(9223372036854775807.9f)
non-overflow case: 9223372036854776000.0f <--- 9223372036854775807.9f
non-overflow case: 9223371487098961900.0f <---
Math.nextDown(9223372036854775807.9f)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]