This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.3 by this push:
new 81bf44d [SPARK-28015][SQL] Check stringToDate() consumes entire input
for the yyyy and yyyy-[m]m formats
81bf44d is described below
commit 81bf44dd96dbde1be957219121a905722cd5337e
Author: Maxim Gekk <[email protected]>
AuthorDate: Wed Jul 10 18:12:03 2019 -0700
[SPARK-28015][SQL] Check stringToDate() consumes entire input for the yyyy
and yyyy-[m]m formats
Fix `stringToDate()` for the formats `yyyy` and `yyyy-[m]m` that assumes
there are no additional chars after the last components `yyyy` and `[m]m`. In
the PR, I propose to check that entire input was consumed for the formats.
After the fix, the input `1999 08 01` will be invalid because it matches to
the pattern `yyyy` but the strings contains additional chars ` 08 01`.
Since Spark 1.6.3 ~ 2.4.3, the behavior is the same.
```
spark-sql> SELECT CAST('1999 08 01' AS DATE);
1999-01-01
```
This PR makes it return NULL like Hive.
```
spark-sql> SELECT CAST('1999 08 01' AS DATE);
NULL
```
Added new checks to `DateTimeUtilsSuite` for the `1999 08 01` and `1999 08`
inputs.
Closes #25097 from MaxGekk/spark-28015-invalid-date-format.
Authored-by: Maxim Gekk <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 17974e269d52a96932bc0fa8d95e95a618379b86)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 4 ++++
.../org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 6 ++++++
2 files changed, 10 insertions(+)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index fa69b8a..4344f9a 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -481,6 +481,10 @@ object DateTimeUtils {
// year should have exact four digits
return None
}
+ if (i < 2 && j < bytes.length) {
+ // For the `yyyy` and `yyyy-[m]m` formats, entire input must be consumed.
+ return None
+ }
segments(i) = currentSegmentValue
if (isInvalidDate(segments(0), segments(1), segments(2))) {
return None
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index b025b85..9da8c9e 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -160,6 +160,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
assert(stringToDate(UTF8String.fromString("015-03-18")).isEmpty)
assert(stringToDate(UTF8String.fromString("015")).isEmpty)
assert(stringToDate(UTF8String.fromString("02015")).isEmpty)
+ assert(stringToDate(UTF8String.fromString("1999 08 01")).isEmpty)
+ assert(stringToDate(UTF8String.fromString("1999-08 01")).isEmpty)
+ assert(stringToDate(UTF8String.fromString("1999 08")).isEmpty)
}
test("string to time") {
@@ -336,6 +339,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
checkStringToTimestamp("2015-03-18T12:03.17-20:0", None)
checkStringToTimestamp("2015-03-18T12:03.17-0:70", None)
checkStringToTimestamp("2015-03-18T12:03.17-1:0:0", None)
+ checkStringToTimestamp("1999 08 01", None)
+ checkStringToTimestamp("1999-08 01", None)
+ checkStringToTimestamp("1999 08", None)
// Truncating the fractional seconds
c = Calendar.getInstance(TimeZone.getTimeZone("GMT+00:00"))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]