This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 4493b431192 [SPARK-45424][SQL] Fix TimestampFormatter return optional parse results when only prefix match 4493b431192 is described below commit 4493b431192fcdbab1379b7ffb89eea0cdaa19f1 Author: Jia Fan <fanjiaemi...@qq.com> AuthorDate: Mon Oct 9 12:30:20 2023 +0300 [SPARK-45424][SQL] Fix TimestampFormatter return optional parse results when only prefix match ### What changes were proposed in this pull request? When use custom pattern to parse timestamp, if there have matched prefix, not matched all. The `Iso8601TimestampFormatter::parseOptional` and `Iso8601TimestampFormatter::parseWithoutTimeZoneOptional` should not return not empty result. eg: pattern = `yyyy-MM-dd HH:mm:ss`, value = `9999-12-31 23:59:59.999`. If fact, `yyyy-MM-dd HH:mm:ss` can parse `9999-12-31 23:59:59` normally, but value have suffix `.999`. so we can't return not empty result. This bug will affect inference the schema in CSV/JSON. ### Why are the changes needed? Fix inference the schema bug. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add new test. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43245 from Hisoka-X/SPARK-45424-inference-schema-unresolved. Authored-by: Jia Fan <fanjiaemi...@qq.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../apache/spark/sql/catalyst/util/TimestampFormatter.scala | 10 ++++++---- .../spark/sql/catalyst/util/TimestampFormatterSuite.scala | 10 ++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 8a288d0e9f3..55eee41c14c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -167,8 +167,9 @@ class Iso8601TimestampFormatter( override def parseOptional(s: String): Option[Long] = { try { - val parsed = formatter.parseUnresolved(s, new ParsePosition(0)) - if (parsed != null) { + val parsePosition = new ParsePosition(0) + val parsed = formatter.parseUnresolved(s, parsePosition) + if (parsed != null && s.length == parsePosition.getIndex) { Some(extractMicros(parsed)) } else { None @@ -196,8 +197,9 @@ class Iso8601TimestampFormatter( override def parseWithoutTimeZoneOptional(s: String, allowTimeZone: Boolean): Option[Long] = { try { - val parsed = formatter.parseUnresolved(s, new ParsePosition(0)) - if (parsed != null) { + val parsePosition = new ParsePosition(0) + val parsed = formatter.parseUnresolved(s, parsePosition) + if (parsed != null && s.length == parsePosition.getIndex) { Some(extractMicrosNTZ(s, parsed, allowTimeZone)) } else { None diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index ecd849dd3af..d2fc89a034f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -491,4 +491,14 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { assert(simpleFormatter.parseOptional("abc").isEmpty) } + + test("SPARK-45424: do not return optional parse results when only prefix match") { + val formatter = new Iso8601TimestampFormatter( + "yyyy-MM-dd HH:mm:ss", + locale = DateFormatter.defaultLocale, + legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT, + isParsing = true, zoneId = DateTimeTestUtils.LA) + assert(formatter.parseOptional("9999-12-31 23:59:59.999").isEmpty) + assert(formatter.parseWithoutTimeZoneOptional("9999-12-31 23:59:59.999", true).isEmpty) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org