This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 908c472728f2 [SPARK-46396][SQL] Timestamp inference should not throw exception 908c472728f2 is described below commit 908c472728f24034baf0b59f03b04ca148eabeca Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Thu Dec 14 00:06:22 2023 -0800 [SPARK-46396][SQL] Timestamp inference should not throw exception ### What changes were proposed in this pull request? When setting `spark.sql.legacy.timeParserPolicy=LEGACY`, Spark will use the LegacyFastTimestampFormatter to infer potential timestamp columns. The inference shouldn't throw exception. However, when the input is 23012150952, there is exception: ``` For input string: "23012150952" java.lang.NumberFormatException: For input string: "23012150952" at java.base/java.lang.NumberFormatException.forInputString(NumberFormatException.java:67) at java.base/java.lang.Integer.parseInt(Integer.java:668) at java.base/java.lang.Integer.parseInt(Integer.java:786) at org.apache.commons.lang3.time.FastDateParser$NumberStrategy.parse(FastDateParser.java:304) at org.apache.commons.lang3.time.FastDateParser.parse(FastDateParser.java:1045) at org.apache.commons.lang3.time.FastDateFormat.parse(FastDateFormat.java:651) at org.apache.spark.sql.catalyst.util.LegacyFastTimestampFormatter.parseOptional(TimestampFormatter.scala:418) ``` This PR is to fix the issue. ### Why are the changes needed? Bug fix, Timestamp inference should not throw exception ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? New test case + existing tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #44338 from gengliangwang/fixParseOptional. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Gengliang Wang <gengli...@apache.org> (cherry picked from commit 4a79ae9d821e9b04fbe949251050c3e4819dff92) Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../apache/spark/sql/catalyst/util/TimestampFormatter.scala | 12 ++++++++---- .../spark/sql/catalyst/util/TimestampFormatterSuite.scala | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 55eee41c14ca..0866cee9334c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -414,10 +414,14 @@ class LegacyFastTimestampFormatter( override def parseOptional(s: String): Option[Long] = { cal.clear() // Clear the calendar because it can be re-used many times - if (fastDateFormat.parse(s, new ParsePosition(0), cal)) { - Some(extractMicros(cal)) - } else { - None + try { + if (fastDateFormat.parse(s, new ParsePosition(0), cal)) { + Some(extractMicros(cal)) + } else { + None + } + } catch { + case NonFatal(_) => None } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 2134a0d6ecd3..27d60815766d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -502,10 +502,11 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { assert(fastFormatter.parseOptional("2023-12-31 23:59:59.9990").contains(1704067199999000L)) assert(fastFormatter.parseOptional("abc").isEmpty) + assert(fastFormatter.parseOptional("23012150952").isEmpty) assert(simpleFormatter.parseOptional("2023-12-31 23:59:59.9990").contains(1704067208990000L)) assert(simpleFormatter.parseOptional("abc").isEmpty) - + assert(simpleFormatter.parseOptional("23012150952").isEmpty) } test("SPARK-45424: do not return optional parse results when only prefix match") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org