This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 5f8ae9a3dbd [SPARK-45424][SQL] Fix TimestampFormatter return optional 
parse results when only prefix match
5f8ae9a3dbd is described below

commit 5f8ae9a3dbd2c7624bffd588483c9916c302c081
Author: Jia Fan <fanjiaemi...@qq.com>
AuthorDate: Mon Oct 9 12:30:20 2023 +0300

    [SPARK-45424][SQL] Fix TimestampFormatter return optional parse results 
when only prefix match
    
    ### What changes were proposed in this pull request?
    When use custom pattern to parse timestamp, if there have matched prefix, 
not matched all. The `Iso8601TimestampFormatter::parseOptional` and 
`Iso8601TimestampFormatter::parseWithoutTimeZoneOptional` should not return not 
empty result.
    eg: pattern = `yyyy-MM-dd HH:mm:ss`, value = `9999-12-31 23:59:59.999`. If 
fact, `yyyy-MM-dd HH:mm:ss` can parse `9999-12-31 23:59:59`  normally, but 
value have suffix `.999`. so we can't return not empty result.
    This bug will affect inference the schema in CSV/JSON.
    
    ### Why are the changes needed?
    Fix inference the schema bug.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    add new test.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #43245 from Hisoka-X/SPARK-45424-inference-schema-unresolved.
    
    Authored-by: Jia Fan <fanjiaemi...@qq.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
    (cherry picked from commit 4493b431192fcdbab1379b7ffb89eea0cdaa19f1)
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../apache/spark/sql/catalyst/util/TimestampFormatter.scala    | 10 ++++++----
 .../spark/sql/catalyst/util/TimestampFormatterSuite.scala      | 10 ++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 8a288d0e9f3..55eee41c14c 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -167,8 +167,9 @@ class Iso8601TimestampFormatter(
 
   override def parseOptional(s: String): Option[Long] = {
     try {
-      val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
-      if (parsed != null) {
+      val parsePosition = new ParsePosition(0)
+      val parsed = formatter.parseUnresolved(s, parsePosition)
+      if (parsed != null && s.length == parsePosition.getIndex) {
         Some(extractMicros(parsed))
       } else {
         None
@@ -196,8 +197,9 @@ class Iso8601TimestampFormatter(
 
   override def parseWithoutTimeZoneOptional(s: String, allowTimeZone: 
Boolean): Option[Long] = {
     try {
-      val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
-      if (parsed != null) {
+      val parsePosition = new ParsePosition(0)
+      val parsed = formatter.parseUnresolved(s, parsePosition)
+      if (parsed != null && s.length == parsePosition.getIndex) {
         Some(extractMicrosNTZ(s, parsed, allowTimeZone))
       } else {
         None
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index eb173bc7f8c..2134a0d6ecd 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -507,4 +507,14 @@ class TimestampFormatterSuite extends 
DatetimeFormatterSuite {
     assert(simpleFormatter.parseOptional("abc").isEmpty)
 
   }
+
+  test("SPARK-45424: do not return optional parse results when only prefix 
match") {
+    val formatter = new Iso8601TimestampFormatter(
+      "yyyy-MM-dd HH:mm:ss",
+      locale = DateFormatter.defaultLocale,
+      legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT,
+      isParsing = true, zoneId = DateTimeTestUtils.LA)
+    assert(formatter.parseOptional("9999-12-31 23:59:59.999").isEmpty)
+    assert(formatter.parseWithoutTimeZoneOptional("9999-12-31 23:59:59.999", 
true).isEmpty)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to