This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 080c51e [SPARK-31641][SQL] Fix days conversions by JSON legacy parser 080c51e is described below commit 080c51e6b6268002948dd14171233bd35d954529 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Tue May 5 14:15:31 2020 +0000 [SPARK-31641][SQL] Fix days conversions by JSON legacy parser ### What changes were proposed in this pull request? Perform days rebasing while converting days from JSON string field. In Spark 2.4 and earlier versions, the days are interpreted as days since the epoch in the hybrid calendar (Julian + Gregorian since 1582-10-15). Since Spark 3.0, the base calendar was switched to Proleptic Gregorian calendar, so, the days should be rebased to represent the same local date. ### Why are the changes needed? The changes fix a bug and restore compatibility with Spark 2.4 in which: ```scala scala> spark.read.schema("d date").json(Seq("{'d': '-141704'}").toDS).show +----------+ | d| +----------+ |1582-01-01| +----------+ ``` ### Does this PR introduce _any_ user-facing change? Yes. Before: ```scala scala> spark.read.schema("d date").json(Seq("{'d': '-141704'}").toDS).show +----------+ | d| +----------+ |1582-01-11| +----------+ ``` After: ```scala scala> spark.read.schema("d date").json(Seq("{'d': '-141704'}").toDS).show +----------+ | d| +----------+ |1582-01-01| +----------+ ``` ### How was this patch tested? Add a test to `JsonSuite`. Closes #28453 from MaxGekk/json-rebase-legacy-days. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit bd264299317bba91f2dc1dc27fd51e6bc0609d66) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/catalyst/json/JacksonParser.scala | 2 +- .../spark/sql/execution/datasources/json/JsonSuite.scala | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 8965a81..a52c345 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -259,7 +259,7 @@ class JacksonParser( // In Spark 1.5.0, we store the data as number of days since epoch in string. // So, we just convert it to Int. try { - parser.getText.toInt + RebaseDateTime.rebaseJulianToGregorianDays(parser.getText.toInt) } catch { case _: NumberFormatException => throw e } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 999eadb..4982991 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -2653,13 +2653,17 @@ abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJson } } - test("SPARK-30960: parse date/timestamp string with legacy format") { - val ds = Seq("{'t': '2020-1-12 3:23:34.12', 'd': '2020-1-12 T', 'd2': '12345'}").toDS() - val json = spark.read.schema("t timestamp, d date, d2 date").json(ds) + test("SPARK-30960, SPARK-31641: parse date/timestamp string with legacy format") { + val julianDay = -141704 // 1582-01-01 in Julian calendar + val ds = Seq( + s"{'t': '2020-1-12 3:23:34.12', 'd': '2020-1-12 T', 'd2': '12345', 'd3': '$julianDay'}" + ).toDS() + val json = spark.read.schema("t timestamp, d date, d2 date, d3 date").json(ds) checkAnswer(json, Row( Timestamp.valueOf("2020-1-12 3:23:34.12"), Date.valueOf("2020-1-12"), - Date.valueOf(LocalDate.ofEpochDay(12345)))) + Date.valueOf(LocalDate.ofEpochDay(12345)), + Date.valueOf("1582-01-01"))) } test("exception mode for parsing date/timestamp string") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org