This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 1fc09e3 [SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting 1fc09e3 is described below commit 1fc09e3cfbd08c385a8c5cffa3e85a089852f8bc Author: Kent Yao <yaooq...@hotmail.com> AuthorDate: Fri Jun 5 16:44:16 2020 +0000 [SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting ### What changes were proposed in this pull request? In this PR, we throw `SparkUpgradeException` when getting `DateTimeException` for datetime formatting in the `EXCEPTION` legacy Time Parser Policy. ### Why are the changes needed? `DateTimeException` is also declared by `java.time.format.DateTimeFormatter#format`, but in Spark, it can barely occur. We have suspected one that due to a JDK bug so far. see https://bugs.openjdk.java.net/browse/JDK-8079628. For `from_unixtime` function, we will suppress the DateTimeException caused by `DD` and result `NULL`. It is a silent date change that should be avoided in Java 8. ### Does this PR introduce _any_ user-facing change? Yes, when running on Java8 and using `from_unixtime` function with pattern `DD` to format datetimes, if dayofyear>=100, `SparkUpgradeException` will alert users instead of silently resulting null. For `date_format`, `SparkUpgradeException` take the palace of `DateTimeException`. ### How was this patch tested? add unit tests. Closes #28736 from yaooqinn/SPARK-31867-F. Authored-by: Kent Yao <yaooq...@hotmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit fc6af9d900ec6f6a1cbe8f987857a69e6ef600d1) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/util/DateFormatter.scala | 7 +++-- .../catalyst/util/DateTimeFormatterHelper.scala | 30 +++++++++++++++++++--- .../sql/catalyst/util/TimestampFormatter.scala | 7 +++-- .../catalyst/util/TimestampFormatterSuite.scala | 15 +++++++++++ 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index fbc9f56..ec8db46 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -58,12 +58,15 @@ class Iso8601DateFormatter( try { val localDate = toLocalDate(formatter.parse(s)) localDateToDays(localDate) - } catch checkDiffResult(s, legacyFormatter.parse) + } catch checkParsedDiff(s, legacyFormatter.parse) } } override def format(localDate: LocalDate): String = { - localDate.format(formatter) + try { + localDate.format(formatter) + } catch checkFormattedDiff(toJavaDate(localDateToDays(localDate)), + (d: Date) => format(d)) } override def format(days: Int): String = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 8e5c865..992a2b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -21,7 +21,7 @@ import java.time._ import java.time.chrono.IsoChronology import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle} import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} -import java.util.Locale +import java.util.{Date, Locale} import com.google.common.cache.CacheBuilder @@ -109,13 +109,17 @@ trait DateTimeFormatterHelper { formatter } + private def needConvertToSparkUpgradeException(e: Throwable): Boolean = e match { + case _: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => true + case _ => false + } // When legacy time parser policy set to EXCEPTION, check whether we will get different results // between legacy parser and new parser. If new parser fails but legacy parser works, throw a // SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED, // DateTimeParseException will address by the caller side. - protected def checkDiffResult[T]( + protected def checkParsedDiff[T]( s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = { - case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => + case e if needConvertToSparkUpgradeException(e) => try { legacyParseFunc(s) } catch { @@ -126,6 +130,25 @@ trait DateTimeFormatterHelper { s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e) } + // When legacy time parser policy set to EXCEPTION, check whether we will get different results + // between legacy formatter and new formatter. If new formatter fails but legacy formatter works, + // throw a SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED, + // DateTimeParseException will address by the caller side. + protected def checkFormattedDiff[T <: Date]( + d: T, + legacyFormatFunc: T => String): PartialFunction[Throwable, String] = { + case e if needConvertToSparkUpgradeException(e) => + val resultCandidate = try { + legacyFormatFunc(d) + } catch { + case _: Throwable => throw e + } + throw new SparkUpgradeException("3.0", s"Fail to format it to '$resultCandidate' in the new" + + s" formatter. You can set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore" + + " the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid" + + " datetime string.", e) + } + /** * When the new DateTimeFormatter failed to initialize because of invalid datetime pattern, it * will throw IllegalArgumentException. If the pattern can be recognized by the legacy formatter @@ -137,7 +160,6 @@ trait DateTimeFormatterHelper { * @param tryLegacyFormatter a func to capture exception, identically which forces a legacy * datetime formatter to be initialized */ - protected def checkLegacyFormatter( pattern: String, tryLegacyFormatter: => Unit): PartialFunction[Throwable, DateTimeFormatter] = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 6bcbb09..87d8a3e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -84,12 +84,15 @@ class Iso8601TimestampFormatter( val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND) Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond) - } catch checkDiffResult(s, legacyFormatter.parse) + } catch checkParsedDiff(s, legacyFormatter.parse) } } override def format(instant: Instant): String = { - formatter.withZone(zoneId).format(instant) + try { + formatter.withZone(zoneId).format(instant) + } catch checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)), + (t: Timestamp) => format(t)) } override def format(us: Long): String = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 311097f..88f6b0d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -415,4 +415,19 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { val t5 = f3.parse("AM") assert(t5 === date(1970)) } + + test("check result differences for datetime formatting") { + val formatter = TimestampFormatter("DD", UTC, isParsing = false) + assert(formatter.format(date(1970, 1, 3)) == "03") + assert(formatter.format(date(1970, 4, 9)) == "99") + + if (System.getProperty("java.version").split("\\D+")(0).toInt < 9) { + // https://bugs.openjdk.java.net/browse/JDK-8079628 + intercept[SparkUpgradeException] { + formatter.format(date(1970, 4, 10)) + } + } else { + assert(formatter.format(date(1970, 4, 10)) == "100") + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org