This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new b0155e36 [SPARK-31557][SQL] Legacy time parser should return Gregorian
days rather than Julian days
b0155e36 is described below
commit b0155e36c3f267743c65a259e2be16324714de4a
Author: Bruce Robbins <[email protected]>
AuthorDate: Mon Apr 27 05:00:36 2020 +0000
[SPARK-31557][SQL] Legacy time parser should return Gregorian days rather
than Julian days
This PR modifies LegacyDateFormatter#parse to return proleptic Gregorian
days rather than hybrid Julian days.
The legacy time parser currently returns epoch days in the hybrid Julian
calendar. However, the callers to the legacy parser (e.g., UnivocityParser,
JacksonParser) expect epoch days in the proleptic Gregorian calendar. As a
result, pre-Gregorian dates like '1000-01-01' get interpreted as '1000-01-06'.
No
Manual testing and modified existing unit tests.
Closes #28345 from bersprockets/SPARK-31557.
Authored-by: Bruce Robbins <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit a911287244a98aa9e6464bcdd97c80e7ad732788)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/util/DateFormatter.scala | 3 +-
.../apache/spark/sql/util/DateFormatterSuite.scala | 76 ++++++++++++----------
2 files changed, 44 insertions(+), 35 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index d2e4e8b..0f79c1a 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -66,8 +66,7 @@ trait LegacyDateFormatter extends DateFormatter {
def formatDate(d: Date): String
override def parse(s: String): Int = {
- val milliseconds = parseToDate(s).getTime
- DateTimeUtils.millisToDays(milliseconds)
+ fromJavaDate(new java.sql.Date(parseToDate(s).getTime))
}
override def format(days: Int): String = {
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
index a40dbcc..2df1d49 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId,
localDateToDays}
import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
class DateFormatterSuite extends SparkFunSuite with SQLHelper {
test("parsing dates") {
@@ -47,45 +48,54 @@ class DateFormatterSuite extends SparkFunSuite with
SQLHelper {
}
test("roundtrip date -> days -> date") {
- Seq(
- "0050-01-01",
- "0953-02-02",
- "1423-03-08",
- "1969-12-31",
- "1972-08-25",
- "1975-09-26",
- "2018-12-12",
- "2038-01-01",
- "5010-11-17").foreach { date =>
- DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
- withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
- val formatter = DateFormatter(getZoneId(timeZone))
- val days = formatter.parse(date)
- val formatted = formatter.format(days)
- assert(date === formatted)
+ LegacyBehaviorPolicy.values.foreach { parserPolicy =>
+ withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key ->
parserPolicy.toString) {
+ Seq(
+ "0050-01-01",
+ "0953-02-02",
+ "1423-03-08",
+ "1582-10-15",
+ "1969-12-31",
+ "1972-08-25",
+ "1975-09-26",
+ "2018-12-12",
+ "2038-01-01",
+ "5010-11-17").foreach { date =>
+ DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+ withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+ val formatter = DateFormatter(getZoneId(timeZone))
+ val days = formatter.parse(date)
+ val formatted = formatter.format(days)
+ assert(date === formatted)
+ }
+ }
}
}
}
}
test("roundtrip days -> date -> days") {
- Seq(
- -701265,
- -371419,
- -199722,
- -1,
- 0,
- 967,
- 2094,
- 17877,
- 24837,
- 1110657).foreach { days =>
- DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
- withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
- val formatter = DateFormatter(getZoneId(timeZone))
- val date = formatter.format(days)
- val parsed = formatter.parse(date)
- assert(days === parsed)
+ LegacyBehaviorPolicy.values.foreach { parserPolicy =>
+ withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key ->
parserPolicy.toString) {
+ Seq(
+ -701265,
+ -371419,
+ -199722,
+ -1,
+ 0,
+ 967,
+ 2094,
+ 17877,
+ 24837,
+ 1110657).foreach { days =>
+ DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+ withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+ val formatter = DateFormatter(getZoneId(timeZone))
+ val date = formatter.format(days)
+ val parsed = formatter.parse(date)
+ assert(days === parsed)
+ }
+ }
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]