This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.3 by this push:
     new 81bf44d  [SPARK-28015][SQL] Check stringToDate() consumes entire input 
for the yyyy and yyyy-[m]m formats
81bf44d is described below

commit 81bf44dd96dbde1be957219121a905722cd5337e
Author: Maxim Gekk <[email protected]>
AuthorDate: Wed Jul 10 18:12:03 2019 -0700

    [SPARK-28015][SQL] Check stringToDate() consumes entire input for the yyyy 
and yyyy-[m]m formats
    
    Fix `stringToDate()` for the formats `yyyy` and `yyyy-[m]m` that assumes 
there are no additional chars after the last components `yyyy` and `[m]m`. In 
the PR, I propose to check that entire input was consumed for the formats.
    
    After the fix, the input `1999 08 01` will be invalid because it matches to 
the pattern `yyyy` but the strings contains additional chars ` 08 01`.
    
    Since Spark 1.6.3 ~ 2.4.3, the behavior is the same.
    ```
    spark-sql> SELECT CAST('1999 08 01' AS DATE);
    1999-01-01
    ```
    
    This PR makes it return NULL like Hive.
    ```
    spark-sql> SELECT CAST('1999 08 01' AS DATE);
    NULL
    ```
    
    Added new checks to `DateTimeUtilsSuite` for the `1999 08 01` and `1999 08` 
inputs.
    
    Closes #25097 from MaxGekk/spark-28015-invalid-date-format.
    
    Authored-by: Maxim Gekk <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 17974e269d52a96932bc0fa8d95e95a618379b86)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala    | 4 ++++
 .../org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala     | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index fa69b8a..4344f9a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -481,6 +481,10 @@ object DateTimeUtils {
       // year should have exact four digits
       return None
     }
+    if (i < 2 && j < bytes.length) {
+      // For the `yyyy` and `yyyy-[m]m` formats, entire input must be consumed.
+      return None
+    }
     segments(i) = currentSegmentValue
     if (isInvalidDate(segments(0), segments(1), segments(2))) {
       return None
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index b025b85..9da8c9e 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -160,6 +160,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     assert(stringToDate(UTF8String.fromString("015-03-18")).isEmpty)
     assert(stringToDate(UTF8String.fromString("015")).isEmpty)
     assert(stringToDate(UTF8String.fromString("02015")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("1999 08 01")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("1999-08 01")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("1999 08")).isEmpty)
   }
 
   test("string to time") {
@@ -336,6 +339,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       checkStringToTimestamp("2015-03-18T12:03.17-20:0", None)
       checkStringToTimestamp("2015-03-18T12:03.17-0:70", None)
       checkStringToTimestamp("2015-03-18T12:03.17-1:0:0", None)
+      checkStringToTimestamp("1999 08 01", None)
+      checkStringToTimestamp("1999-08 01", None)
+      checkStringToTimestamp("1999 08", None)
 
       // Truncating the fractional seconds
       c = Calendar.getInstance(TimeZone.getTimeZone("GMT+00:00"))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to