This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 1a5cd16 [SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction 1a5cd16 is described below commit 1a5cd167e0901948d68d6c7880d39966e74d10b3 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Fri Mar 20 00:52:09 2020 +0900 [SPARK-30292][SQL][FOLLOWUP] ansi cast from strings to integral numbers (byte/short/int/long) should fail with fraction ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/26933 Fraction string like "1.23" is definitely not a valid integral format and we should fail to do the cast under the ANSI mode. ### Why are the changes needed? correct the ANSI cast behavior from string to integral ### Does this PR introduce any user-facing change? Yes under ANSI mode, but ANSI mode is off by default. ### How was this patch tested? new test Closes #27957 from cloud-fan/ansi. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> (cherry picked from commit ac262cb27255f989f6a6dd864bd5114a928b96da) Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../org/apache/spark/unsafe/types/UTF8String.java | 24 +++++++++++++--------- .../spark/sql/catalyst/expressions/CastSuite.scala | 2 ++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index c538466..186597f 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -1105,6 +1105,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable, * @return true if the parsing was successful else false */ public boolean toLong(LongWrapper toLongResult) { + return toLong(toLongResult, true); + } + + private boolean toLong(LongWrapper toLongResult, boolean allowDecimal) { int offset = 0; while (offset < this.numBytes && getByte(offset) <= ' ') offset++; if (offset == this.numBytes) return false; @@ -1129,7 +1133,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable, while (offset <= end) { b = getByte(offset); offset++; - if (b == separator) { + if (b == separator && allowDecimal) { // We allow decimals and will return a truncated integral in that case. // Therefore we won't throw an exception here (checking the fractional // part happens below.) @@ -1198,6 +1202,10 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable, * @return true if the parsing was successful else false */ public boolean toInt(IntWrapper intWrapper) { + return toInt(intWrapper, true); + } + + private boolean toInt(IntWrapper intWrapper, boolean allowDecimal) { int offset = 0; while (offset < this.numBytes && getByte(offset) <= ' ') offset++; if (offset == this.numBytes) return false; @@ -1222,7 +1230,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable, while (offset <= end) { b = getByte(offset); offset++; - if (b == separator) { + if (b == separator && allowDecimal) { // We allow decimals and will return a truncated integral in that case. // Therefore we won't throw an exception here (checking the fractional // part happens below.) @@ -1276,9 +1284,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable, if (toInt(intWrapper)) { int intValue = intWrapper.value; short result = (short) intValue; - if (result == intValue) { - return true; - } + return result == intValue; } return false; } @@ -1287,9 +1293,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable, if (toInt(intWrapper)) { int intValue = intWrapper.value; byte result = (byte) intValue; - if (result == intValue) { - return true; - } + return result == intValue; } return false; } @@ -1302,7 +1306,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable, */ public long toLongExact() { LongWrapper result = new LongWrapper(); - if (toLong(result)) { + if (toLong(result, false)) { return result.value; } throw new NumberFormatException("invalid input syntax for type numeric: " + this); @@ -1316,7 +1320,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable, */ public int toIntExact() { IntWrapper result = new IntWrapper(); - if (toInt(result)) { + if (toInt(result, false)) { return result.value; } throw new NumberFormatException("invalid input syntax for type numeric: " + this); diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index bde95f0..9d20eac 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1287,6 +1287,8 @@ class AnsiCastSuite extends CastSuiteBase { cast("123-string", dataType), "invalid input") checkExceptionInExpression[NumberFormatException]( cast("2020-07-19", dataType), "invalid input") + checkExceptionInExpression[NumberFormatException]( + cast("1.23", dataType), "invalid input") } Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org