Jonathancui123 commented on code in PR #36871:
URL: https://github.com/apache/spark/pull/36871#discussion_r909084899
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala:
##########
@@ -197,34 +198,46 @@ class UnivocityParser(
Decimal(decimalParser(datum), dt.precision, dt.scale)
}
- case _: TimestampType => (d: String) =>
+ case _: DateType => (d: String) =>
nullSafeDatum(d, name, nullable, options) { datum =>
try {
- timestampFormatter.parse(datum)
+ dateFormatter.parse(datum)
} catch {
case NonFatal(e) =>
// If fails to parse, then tries the way used in 2.0 and 1.x for
backwards
// compatibility.
val str =
DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum))
- DateTimeUtils.stringToTimestamp(str,
options.zoneId).getOrElse(throw e)
+ DateTimeUtils.stringToDate(str).getOrElse(throw e)
}
}
- case _: TimestampNTZType => (d: String) =>
- nullSafeDatum(d, name, nullable, options) { datum =>
- timestampNTZFormatter.parseWithoutTimeZone(datum, false)
- }
-
- case _: DateType => (d: String) =>
+ case _: TimestampType => (d: String) =>
nullSafeDatum(d, name, nullable, options) { datum =>
try {
- dateFormatter.parse(datum)
+ timestampFormatter.parse(datum)
} catch {
case NonFatal(e) =>
// If fails to parse, then tries the way used in 2.0 and 1.x for
backwards
// compatibility.
val str =
DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum))
- DateTimeUtils.stringToDate(str).getOrElse(throw e)
+ DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse {
+ // There may be date type entries in timestamp column due to
schema inference
+ if (options.inferDate) {
+ daysToMicros(dateFormatter.parse(datum), options.zoneId)
+ } else {
+ throw(e)
+ }
+ }
+ }
+ }
+
+ case _: TimestampNTZType => (d: String) =>
+ nullSafeDatum(d, name, nullable, options) { datum =>
+ try {
+ timestampNTZFormatter.parseWithoutTimeZone(datum, false)
+ } catch {
+ case NonFatal(e) if (options.inferDate) =>
+ daysToMicros(dateFormatter.parse(datum), options.zoneId)
Review Comment:
Wow great catch! I hadn't fully considered the effect of user timeZone on
TimestampNTZ parsing.
I've fixed the error and I've modified this test to have a PST user and
check that the parsed date is converted to a timestamp in UTC.
https://github.com/Jonathancui123/spark/blob/8df7ebbd6c1c0d5bb875ce554026f9b9aeae148e/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala#L368-L374
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]