This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a031aaa487e [SPARK-40496][SQL] Fix configs to control
"enableDateTimeParsingFallback"
a031aaa487e is described below
commit a031aaa487e8dc928ff3431ce2f3312236531bd4
Author: Ivan Sadikov <[email protected]>
AuthorDate: Wed Sep 21 13:17:21 2022 +0800
[SPARK-40496][SQL] Fix configs to control "enableDateTimeParsingFallback"
### What changes were proposed in this pull request?
The commit
https://github.com/apache/spark/commit/a93044550259fa0ee8897d0576f6eeac8ec73c27
introduced `enableDateTimeParsingFallback` config but the usage was incorrect
in CSV/UnivocityParser - DateType and TimestampType configs were swapped.
This PR fixes the issue so `enableParsingFallbackForDateType` controls
DateType fallback and `enableParsingFallbackForTimestampType` controls
TimestampType.
JSON does not have this problem - the property is correctly used for the
corresponding data types in
https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala#L305.
Setting parsing fallback via data source option or SQL config will work
correctly, the issue would only happen when using `dateFormat` or
`timestampFormat` without setting the flag explicitly.
### Why are the changes needed?
Correctness fix. Without the change, the fallback would not be enabled when
using dateFormat and timestampFormat. All other means to set parsing fallback
(data source option and SQLConf) will still work correctly.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
I added a unit test for CSV and JSON to reproduce the issue and verify it
was fixed.
Closes #37942 from sadikovi/SPARK-40496.
Authored-by: Ivan Sadikov <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/csv/UnivocityParser.scala | 4 ++--
.../sql/execution/datasources/csv/CSVSuite.scala | 25 ++++++++++++++++++++++
.../sql/execution/datasources/json/JsonSuite.scala | 25 ++++++++++++++++++++++
3 files changed, 52 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 9d855d1a93d..160f6beb09b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -224,7 +224,7 @@ class UnivocityParser(
case NonFatal(e) =>
// If fails to parse, then tries the way used in 2.0 and 1.x for
backwards
// compatibility if enabled.
- if (!enableParsingFallbackForTimestampType) {
+ if (!enableParsingFallbackForDateType) {
throw e
}
val str =
DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum))
@@ -244,7 +244,7 @@ class UnivocityParser(
} else {
// If fails to parse, then tries the way used in 2.0 and 1.x for
backwards
// compatibility if enabled.
- if (!enableParsingFallbackForDateType) {
+ if (!enableParsingFallbackForTimestampType) {
throw e
}
val str =
DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum))
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index f74f7a00c13..a82b33fb0ee 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -3008,6 +3008,31 @@ abstract class CSVSuite
}
}
}
+
+ test("SPARK-40496: disable parsing fallback when the date/timestamp format
is provided") {
+ // The test verifies that the fallback can be disabled by providing
dateFormat or
+ // timestampFormat without any additional configuration.
+ //
+ // We also need to disable "legacy" parsing mode that implicitly enables
parsing fallback.
+ for (policy <- Seq("exception", "corrected")) {
+ withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> policy) {
+ withTempPath { path =>
+ Seq("2020-01-01").toDF()
+ .repartition(1)
+ .write.text(path.getAbsolutePath)
+
+ var df = spark.read.schema("col date").option("dateFormat",
"yyyy/MM/dd")
+ .csv(path.getAbsolutePath)
+ checkAnswer(df, Seq(Row(null)))
+
+ df = spark.read.schema("col timestamp").option("timestampFormat",
"yyyy/MM/dd HH:mm:ss")
+ .csv(path.getAbsolutePath)
+
+ checkAnswer(df, Seq(Row(null)))
+ }
+ }
+ }
+ }
}
class CSVv1Suite extends CSVSuite {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index a9e1d3a751e..bc123a4eedb 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -3356,6 +3356,31 @@ abstract class JsonSuite
}
}
}
+
+ test("SPARK-40496: disable parsing fallback when the date/timestamp format
is provided") {
+ // The test verifies that the fallback can be disabled by providing
dateFormat or
+ // timestampFormat without any additional configuration.
+ //
+ // We also need to disable "legacy" parsing mode that implicitly enables
parsing fallback.
+ for (policy <- Seq("exception", "corrected")) {
+ withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> policy) {
+ withTempPath { path =>
+ Seq("""{"col": "2020-01-01"}""").toDF()
+ .repartition(1)
+ .write.text(path.getAbsolutePath)
+
+ var df = spark.read.schema("col date").option("dateFormat",
"yyyy/MM/dd")
+ .json(path.getAbsolutePath)
+ checkAnswer(df, Seq(Row(null)))
+
+ df = spark.read.schema("col timestamp").option("timestampFormat",
"yyyy/MM/dd HH:mm:ss")
+ .json(path.getAbsolutePath)
+
+ checkAnswer(df, Seq(Row(null)))
+ }
+ }
+ }
+ }
}
class JsonV1Suite extends JsonSuite {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]