This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 57fcc49  [SPARK-31176][SQL] Remove support for 'e'/'c' as datetime 
pattern charactar
57fcc49 is described below

commit 57fcc49306296b474c5b8b685ad13082f9b50a49
Author: Kent Yao <[email protected]>
AuthorDate: Wed Mar 18 20:19:50 2020 +0800

    [SPARK-31176][SQL] Remove support for 'e'/'c' as datetime pattern charactar
    
    ### What changes were proposed in this pull request?
    
    The meaning of 'u' was day number of the week in SimpleDateFormat, it was 
changed to year in DateTimeFormatter. Now we keep the old meaning of 'u' by 
substituting 'u' to 'e' internally and use DateTimeFormatter to parse the 
pattern string. In DateTimeFormatter, the 'e' and 'c' also represents 
day-of-week. e.g.
    
    ```sql
    select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuuu');
    select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuee');
    select date_format(timestamp '2019-10-06', 'yyyy-MM-dd eeee');
    ```
    Because of the substitution, they all goes to `.... eeee` silently. The 
users may congitive problems of their meanings, so we should mark them as 
illegal pattern characters to stay the same as before.
    
    This pr move the method `convertIncompatiblePattern` from `DatetimeUtils` 
to `DateTimeFormatterHelper` object, since it is quite specific for 
`DateTimeFormatterHelper` class.
    And 'e' and 'c' char checking in this method.
    
    Besides,`convertIncompatiblePattern` has a bug that will lose the last `'` 
if it ends with it, this pr fixes this too. e.g.
    
    ```sql
    spark-sql> select date_format(timestamp "2019-10-06", "yyyy-MM-dd'S'");
    20/03/18 11:19:45 ERROR SparkSQLDriver: Failed in [select 
date_format(timestamp "2019-10-06", "yyyy-MM-dd'S'")]
    java.lang.IllegalArgumentException: Pattern ends with an incomplete string 
literal: uuuu-MM-dd'S
    
    spark-sql> select to_timestamp("2019-10-06S", "yyyy-MM-dd'S'");
    NULL
    ```
    ### Why are the changes needed?
    
    avoid vagueness
    bug fix
    
    ### Does this PR introduce any user-facing change?
    
    no, these are not  exposed yet
    
    ### How was this patch tested?
    
    add ut
    
    Closes #27939 from yaooqinn/SPARK-31176.
    
    Authored-by: Kent Yao <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 docs/sql-ref-datetime-pattern.md                   |  2 +-
 .../catalyst/util/DateTimeFormatterHelper.scala    | 43 ++++++++++++++++++-
 .../spark/sql/catalyst/util/DateTimeUtils.scala    | 38 -----------------
 .../util/DateTimeFormatterHelperSuite.scala        | 48 ++++++++++++++++++++++
 .../sql/catalyst/util/DateTimeUtilsSuite.scala     | 16 --------
 .../test/resources/sql-tests/inputs/datetime.sql   |  7 ++++
 .../resources/sql-tests/results/datetime.sql.out   | 44 +++++++++++++++++++-
 7 files changed, 141 insertions(+), 57 deletions(-)

diff --git a/docs/sql-ref-datetime-pattern.md b/docs/sql-ref-datetime-pattern.md
index 429d781..f5c20ea 100644
--- a/docs/sql-ref-datetime-pattern.md
+++ b/docs/sql-ref-datetime-pattern.md
@@ -88,7 +88,7 @@ Spark uses the below letters in date and timestamp parsing 
and formatting:
   <td> Tue; Tuesday; T </td>
 </tr>
 <tr>
-  <td> <b>e</b> </td>
+  <td> <b>u</b> </td>
   <td> localized day-of-week </td>
   <td> number/text </td>
   <td> 2; 02; Tue; Tuesday; T </td>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index 668ce92..72bae28 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -55,7 +55,7 @@ trait DateTimeFormatterHelper {
       pattern: String,
       locale: Locale,
       needVarLengthSecondFraction: Boolean = false): DateTimeFormatter = {
-    val newPattern = DateTimeUtils.convertIncompatiblePattern(pattern)
+    val newPattern = convertIncompatiblePattern(pattern)
     val useVarLen = needVarLengthSecondFraction && newPattern.contains('S')
     val key = (newPattern, locale, useVarLen)
     var formatter = cache.getIfPresent(key)
@@ -157,4 +157,45 @@ private object DateTimeFormatterHelper {
       .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
     toFormatter(builder, TimestampFormatter.defaultLocale)
   }
+
+  /**
+   * In Spark 3.0, we switch to the Proleptic Gregorian calendar and use 
DateTimeFormatter for
+   * parsing/formatting datetime values. The pattern string is incompatible 
with the one defined
+   * by SimpleDateFormat in Spark 2.4 and earlier. This function converts all 
incompatible pattern
+   * for the new parser in Spark 3.0. See more details in SPARK-31030.
+   * @param pattern The input pattern.
+   * @return The pattern for new parser
+   */
+  def convertIncompatiblePattern(pattern: String): String = {
+    val eraDesignatorContained = pattern.split("'").zipWithIndex.exists {
+      case (patternPart, index) =>
+        // Text can be quoted using single quotes, we only check the non-quote 
parts.
+        index % 2 == 0 && patternPart.contains("G")
+    }
+    (pattern + " ").split("'").zipWithIndex.map {
+      case (patternPart, index) =>
+        if (index % 2 == 0) {
+          for (c <- patternPart if c == 'c' || c == 'e') {
+            throw new IllegalArgumentException(s"Illegal pattern character: 
$c")
+          }
+          // The meaning of 'u' was day number of week in SimpleDateFormat, it 
was changed to year
+          // in DateTimeFormatter. Substitute 'u' to 'e' and use 
DateTimeFormatter to parse the
+          // string. If parsable, return the result; otherwise, fall back to 
'u', and then use the
+          // legacy SimpleDateFormat parser to parse. When it is successfully 
parsed, throw an
+          // exception and ask users to change the pattern strings or turn on 
the legacy mode;
+          // otherwise, return NULL as what Spark 2.4 does.
+          val res = patternPart.replace("u", "e")
+          // In DateTimeFormatter, 'u' supports negative years. We substitute 
'y' to 'u' here for
+          // keeping the support in Spark 3.0. If parse failed in Spark 3.0, 
fall back to 'y'.
+          // We only do this substitution when there is no era designator 
found in the pattern.
+          if (!eraDesignatorContained) {
+            res.replace("y", "u")
+          } else {
+            res
+          }
+        } else {
+          patternPart
+        }
+    }.mkString("'").stripSuffix(" ")
+  }
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 9f207ec..161be05 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -936,42 +936,4 @@ object DateTimeUtils {
     val days = period.getDays
     new CalendarInterval(months, days, 0)
   }
-
-  /**
-   * In Spark 3.0, we switch to the Proleptic Gregorian calendar and use 
DateTimeFormatter for
-   * parsing/formatting datetime values. The pattern string is incompatible 
with the one defined
-   * by SimpleDateFormat in Spark 2.4 and earlier. This function converts all 
incompatible pattern
-   * for the new parser in Spark 3.0. See more details in SPARK-31030.
-   * @param pattern The input pattern.
-   * @return The pattern for new parser
-   */
-  def convertIncompatiblePattern(pattern: String): String = {
-    val eraDesignatorContained = pattern.split("'").zipWithIndex.exists {
-      case (patternPart, index) =>
-        // Text can be quoted using single quotes, we only check the non-quote 
parts.
-        index % 2 == 0 && patternPart.contains("G")
-    }
-    pattern.split("'").zipWithIndex.map {
-      case (patternPart, index) =>
-        if (index % 2 == 0) {
-          // The meaning of 'u' was day number of week in SimpleDateFormat, it 
was changed to year
-          // in DateTimeFormatter. Substitute 'u' to 'e' and use 
DateTimeFormatter to parse the
-          // string. If parsable, return the result; otherwise, fall back to 
'u', and then use the
-          // legacy SimpleDateFormat parser to parse. When it is successfully 
parsed, throw an
-          // exception and ask users to change the pattern strings or turn on 
the legacy mode;
-          // otherwise, return NULL as what Spark 2.4 does.
-          val res = patternPart.replace("u", "e")
-          // In DateTimeFormatter, 'u' supports negative years. We substitute 
'y' to 'u' here for
-          // keeping the support in Spark 3.0. If parse failed in Spark 3.0, 
fall back to 'y'.
-          // We only do this substitution when there is no era designator 
found in the pattern.
-          if (!eraDesignatorContained) {
-            res.replace("y", "u")
-          } else {
-            res
-          }
-        } else {
-          patternPart
-        }
-    }.mkString("'")
-  }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
new file mode 100644
index 0000000..811c4da
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.SparkFunSuite
+import 
org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper.convertIncompatiblePattern
+
+class DateTimeFormatterHelperSuite extends SparkFunSuite {
+
+  test("check incompatible pattern") {
+    assert(convertIncompatiblePattern("MM-DD-u") === "MM-DD-e")
+    assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz")
+      === "uuuu-MM-dd'T'HH:mm:ss.SSSz")
+    assert(convertIncompatiblePattern("yyyy-MM'y contains in quoted 
text'HH:mm:ss")
+      === "uuuu-MM'y contains in quoted text'HH:mm:ss")
+    assert(convertIncompatiblePattern("yyyy-MM-dd-u'T'HH:mm:ss.SSSz")
+      === "uuuu-MM-dd-e'T'HH:mm:ss.SSSz")
+    assert(convertIncompatiblePattern("yyyy-MM'u contains in quoted 
text'HH:mm:ss")
+      === "uuuu-MM'u contains in quoted text'HH:mm:ss")
+    assert(convertIncompatiblePattern("yyyy-MM'u contains in quoted 
text'''''HH:mm:ss")
+      === "uuuu-MM'u contains in quoted text'''''HH:mm:ss")
+    assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz G")
+      === "yyyy-MM-dd'T'HH:mm:ss.SSSz G")
+    val e1 = 
intercept[IllegalArgumentException](convertIncompatiblePattern("yyyy-MM-dd eeee 
G"))
+    assert(e1.getMessage === "Illegal pattern character: e")
+    val e2 = 
intercept[IllegalArgumentException](convertIncompatiblePattern("yyyy-MM-dd cccc 
G"))
+    assert(e2.getMessage === "Illegal pattern character: c")
+    assert(convertIncompatiblePattern("yyyy-MM-dd uuuu") === "uuuu-MM-dd eeee")
+    assert(convertIncompatiblePattern("yyyy-MM-dd EEEE") === "uuuu-MM-dd EEEE")
+    assert(convertIncompatiblePattern("yyyy-MM-dd'e'HH:mm:ss") === 
"uuuu-MM-dd'e'HH:mm:ss")
+    assert(convertIncompatiblePattern("yyyy-MM-dd'T'") === "uuuu-MM-dd'T'")
+  }
+}
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 68a4a24..b3ad168 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -670,20 +670,4 @@ class DateTimeUtilsSuite extends SparkFunSuite with 
Matchers with SQLHelper {
       assert(toDate("tomorrow CET ", zoneId).get === today + 1)
     }
   }
-
-  test("check incompatible pattern") {
-    assert(convertIncompatiblePattern("MM-DD-u") === "MM-DD-e")
-    assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz")
-      === "uuuu-MM-dd'T'HH:mm:ss.SSSz")
-    assert(convertIncompatiblePattern("yyyy-MM'y contains in quoted 
text'HH:mm:ss")
-      === "uuuu-MM'y contains in quoted text'HH:mm:ss")
-    assert(convertIncompatiblePattern("yyyy-MM-dd-u'T'HH:mm:ss.SSSz")
-      === "uuuu-MM-dd-e'T'HH:mm:ss.SSSz")
-    assert(convertIncompatiblePattern("yyyy-MM'u contains in quoted 
text'HH:mm:ss")
-      === "uuuu-MM'u contains in quoted text'HH:mm:ss")
-    assert(convertIncompatiblePattern("yyyy-MM'u contains in quoted 
text'''''HH:mm:ss")
-      === "uuuu-MM'u contains in quoted text'''''HH:mm:ss")
-    assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz G")
-      === "yyyy-MM-dd'T'HH:mm:ss.SSSz G")
-  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql 
b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index 9026427..a06cdfd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -100,3 +100,10 @@ select to_timestamp("12.12342019-10-06S10:11", 
"ss.SSSSyyyy-MM-dd'S'HH:mm");
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm");
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm");
 select to_timestamp("12.1234019-10-06S10:11", "ss.SSSSy-MM-dd'S'HH:mm");
+
+select to_timestamp("2019-10-06S", "yyyy-MM-dd'S'");
+select to_timestamp("S2019-10-06", "'S'yyyy-MM-dd");
+
+select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuee');
+select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uucc');
+select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuuu');
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out 
b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 7258d00..714412f 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 68
+-- Number of queries: 73
 
 
 -- !query
@@ -559,3 +559,45 @@ select to_timestamp("12.1234019-10-06S10:11", 
"ss.SSSSy-MM-dd'S'HH:mm")
 struct<to_timestamp('12.1234019-10-06S10:11', 
'ss.SSSSy-MM-dd\'S\'HH:mm'):timestamp>
 -- !query output
 0019-10-06 10:11:12.1234
+
+
+-- !query
+select to_timestamp("2019-10-06S", "yyyy-MM-dd'S'")
+-- !query schema
+struct<to_timestamp('2019-10-06S', 'yyyy-MM-dd\'S\''):timestamp>
+-- !query output
+2019-10-06 00:00:00
+
+
+-- !query
+select to_timestamp("S2019-10-06", "'S'yyyy-MM-dd")
+-- !query schema
+struct<to_timestamp('S2019-10-06', '\'S\'yyyy-MM-dd'):timestamp>
+-- !query output
+2019-10-06 00:00:00
+
+
+-- !query
+select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuee')
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+Illegal pattern character: e
+
+
+-- !query
+select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uucc')
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+Illegal pattern character: c
+
+
+-- !query
+select date_format(timestamp '2019-10-06', 'yyyy-MM-dd uuuu')
+-- !query schema
+struct<date_format(TIMESTAMP '2019-10-06 00:00:00', yyyy-MM-dd uuuu):string>
+-- !query output
+2019-10-06 Sunday


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to