[GitHub] [spark] maropu commented on a change in pull request #25461: [SPARK-28741][SQL]Throw exceptions when casting to integers causes overflow

GitBox Sun, 18 Aug 2019 16:59:24 -0700

maropu commented on a change in pull request #25461: [SPARK-28741][SQL]Throw 
exceptions when casting to integers causes overflow
URL: https://github.com/apache/spark/pull/25461#discussion_r315008759


 ##########
 File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
 ##########
 @@ -1075,4 +1075,113 @@ class CastSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       checkEvaluation(cast("badvalue", dataType), null)
     }
   }
+
+  private def testIntMaxAndMin(dt: DataType): Unit = {
+    Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](cast(value, dt), 
"overflow")
+      
checkExceptionInExpression[ArithmeticException](cast(Decimal(value.toString), 
dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value * MICROS_PER_SECOND, TimestampType), dt), 
"overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value * 1.5f, FloatType), dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value * 1.0, DoubleType), dt), "overflow")
+    }
+  }
+
+  private def testLongMaxAndMin(dt: DataType): Unit = {
+    Seq(Decimal(Long.MaxValue) + Decimal(1), Decimal(Long.MinValue) - 
Decimal(1)).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](
+        cast(value, dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast((value * Decimal(1.1)).toFloat, dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast((value * Decimal(1.1)).toDouble, dt), "overflow")
+    }
+  }
+
+  test("Cast to byte with option FAIL_ON_INTEGER_OVERFLOW enabled") {
+    withSQLConf(SQLConf.FAIL_ON_INTEGER_OVERFLOW.key -> "true") {
+      testIntMaxAndMin(ByteType)
+      Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
+        checkExceptionInExpression[ArithmeticException](cast(value, ByteType), 
"overflow")
+        checkExceptionInExpression[ArithmeticException](
+          cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), 
"overflow")
+        checkExceptionInExpression[ArithmeticException](
+          cast(Literal(value.toFloat, FloatType), ByteType), "overflow")
+        checkExceptionInExpression[ArithmeticException](
+          cast(Literal(value.toDouble, DoubleType), ByteType), "overflow")
+      }
+
+      Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
+        checkEvaluation(cast(value, ByteType), value)
+        checkEvaluation(cast(value.toString, ByteType), value)
+        checkEvaluation(cast(Decimal(value.toString), ByteType), value)
+        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, 
TimestampType), ByteType), value)
+        checkEvaluation(cast(Literal(value.toInt, DateType), ByteType), null)
+        checkEvaluation(cast(Literal(value.toFloat, FloatType), ByteType), 
value)
+        checkEvaluation(cast(Literal(value.toDouble, DoubleType), ByteType), 
value)
+      }
+    }
+  }
+
+  test("Cast to short with option FAIL_ON_INTEGER_OVERFLOW enabled") {
+    withSQLConf(SQLConf.FAIL_ON_INTEGER_OVERFLOW.key -> "true") {
+      testIntMaxAndMin(ShortType)
+      Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
+        checkExceptionInExpression[ArithmeticException](cast(value, 
ShortType), "overflow")
+        checkExceptionInExpression[ArithmeticException](
+          cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), 
"overflow")
+        checkExceptionInExpression[ArithmeticException](
+          cast(Literal(value.toFloat, FloatType), ShortType), "overflow")
+        checkExceptionInExpression[ArithmeticException](
+          cast(Literal(value.toDouble, DoubleType), ShortType), "overflow")
+      }
+
+      Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
+        checkEvaluation(cast(value, ShortType), value)
+        checkEvaluation(cast(value.toString, ShortType), value)
+        checkEvaluation(cast(Decimal(value.toString), ShortType), value)
+        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, 
TimestampType), ShortType), value)
+        checkEvaluation(cast(Literal(value.toInt, DateType), ShortType), null)
+        checkEvaluation(cast(Literal(value.toFloat, FloatType), ShortType), 
value)
+        checkEvaluation(cast(Literal(value.toDouble, DoubleType), ShortType), 
value)
+      }
+    }
+  }
+
+  test("Cast to int with option FAIL_ON_INTEGER_OVERFLOW enabled") {
+    withSQLConf(SQLConf.FAIL_ON_INTEGER_OVERFLOW.key -> "true") {
+      testIntMaxAndMin(IntegerType)
+      testLongMaxAndMin(IntegerType)
+
+      Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
+        checkEvaluation(cast(value, IntegerType), value)
+        checkEvaluation(cast(value.toString, IntegerType), value)
+        checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
+        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, 
TimestampType), IntegerType), value)
+        checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), 
value)
+      }
+      checkEvaluation(cast(2147483647.9D, IntegerType), 2147483647)
+      checkEvaluation(cast(-2147483648.9D, IntegerType), -2147483648)
+    }
+  }
+
+  test("Cast to long with option FAIL_ON_INTEGER_OVERFLOW enabled") {
+    withSQLConf(SQLConf.FAIL_ON_INTEGER_OVERFLOW.key -> "true") {
+      testLongMaxAndMin(LongType)
+
+      Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
+        checkEvaluation(cast(value, LongType), value)
+        checkEvaluation(cast(value.toString, LongType), value)
+        checkEvaluation(cast(Decimal(value.toString), LongType), value)
+        checkEvaluation(cast(Literal(value, TimestampType), LongType),
+          Math.floorDiv(value, MICROS_PER_SECOND))
+      }
+      checkEvaluation(cast(9223372036854775807.9f, LongType), 
9223372036854775807L)
 
 Review comment:
   Ah, its ok to do it like this instead;
   ```
   checkEvaluation(cast(9223372036854775807.9f, LongType), 9223372036854775807L)
   --> non-overflow case
   checkEvaluation(cast(java.lang.Math.nextUp(9223372036854775807.9f), 
LongType), 9223372036854775807L)
   --> overflow case
   ```
   What I'm a little worried about is that `9223372036854775807.9f` is 
implicitly truncated (to `9223372036854776000.0f`?) by a compiler because it 
cannot be packed in the float IEEE754 format as you said before. So, IIUC [the 
test](https://github.com/apache/spark/pull/25461/files#diff-0ced8bd3a8e8459e7f66333b0d936771R1181)
 is actually the same with `cast(9223372036854776000.0f, LongType)`?
   
   What I understand is as follows(sorted by values desc) and is this correct?
   ```
             IEEE754 continuous float values
   ------------------------------------------
       overflow case: 9223373136366404000.0f <--- 
Math.nextUp(9223372036854775807.9f)
   non-overflow case: 9223372036854776000.0f <--- 9223372036854775807.9f
   non-overflow case: 9223371487098961900.0f <--- 
Math.nextDown(9223372036854775807.9f)
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] maropu commented on a change in pull request #25461: [SPARK-28741][SQL]Throw exceptions when casting to integers causes overflow

Reply via email to