This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 5f8ae9a3dbd [SPARK-45424][SQL] Fix TimestampFormatter return optional
parse results when only prefix match
5f8ae9a3dbd is described below
commit 5f8ae9a3dbd2c7624bffd588483c9916c302c081
Author: Jia Fan <[email protected]>
AuthorDate: Mon Oct 9 12:30:20 2023 +0300
[SPARK-45424][SQL] Fix TimestampFormatter return optional parse results
when only prefix match
### What changes were proposed in this pull request?
When use custom pattern to parse timestamp, if there have matched prefix,
not matched all. The `Iso8601TimestampFormatter::parseOptional` and
`Iso8601TimestampFormatter::parseWithoutTimeZoneOptional` should not return not
empty result.
eg: pattern = `yyyy-MM-dd HH:mm:ss`, value = `9999-12-31 23:59:59.999`. If
fact, `yyyy-MM-dd HH:mm:ss` can parse `9999-12-31 23:59:59` normally, but
value have suffix `.999`. so we can't return not empty result.
This bug will affect inference the schema in CSV/JSON.
### Why are the changes needed?
Fix inference the schema bug.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
add new test.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43245 from Hisoka-X/SPARK-45424-inference-schema-unresolved.
Authored-by: Jia Fan <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
(cherry picked from commit 4493b431192fcdbab1379b7ffb89eea0cdaa19f1)
Signed-off-by: Max Gekk <[email protected]>
---
.../apache/spark/sql/catalyst/util/TimestampFormatter.scala | 10 ++++++----
.../spark/sql/catalyst/util/TimestampFormatterSuite.scala | 10 ++++++++++
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 8a288d0e9f3..55eee41c14c 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -167,8 +167,9 @@ class Iso8601TimestampFormatter(
override def parseOptional(s: String): Option[Long] = {
try {
- val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
- if (parsed != null) {
+ val parsePosition = new ParsePosition(0)
+ val parsed = formatter.parseUnresolved(s, parsePosition)
+ if (parsed != null && s.length == parsePosition.getIndex) {
Some(extractMicros(parsed))
} else {
None
@@ -196,8 +197,9 @@ class Iso8601TimestampFormatter(
override def parseWithoutTimeZoneOptional(s: String, allowTimeZone:
Boolean): Option[Long] = {
try {
- val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
- if (parsed != null) {
+ val parsePosition = new ParsePosition(0)
+ val parsed = formatter.parseUnresolved(s, parsePosition)
+ if (parsed != null && s.length == parsePosition.getIndex) {
Some(extractMicrosNTZ(s, parsed, allowTimeZone))
} else {
None
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index eb173bc7f8c..2134a0d6ecd 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -507,4 +507,14 @@ class TimestampFormatterSuite extends
DatetimeFormatterSuite {
assert(simpleFormatter.parseOptional("abc").isEmpty)
}
+
+ test("SPARK-45424: do not return optional parse results when only prefix
match") {
+ val formatter = new Iso8601TimestampFormatter(
+ "yyyy-MM-dd HH:mm:ss",
+ locale = DateFormatter.defaultLocale,
+ legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT,
+ isParsing = true, zoneId = DateTimeTestUtils.LA)
+ assert(formatter.parseOptional("9999-12-31 23:59:59.999").isEmpty)
+ assert(formatter.parseWithoutTimeZoneOptional("9999-12-31 23:59:59.999",
true).isEmpty)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]