This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 64ce1c9 [SPARK-26657][SQL] Use Proleptic Gregorian calendar in DayWeek and in WeekOfYear 64ce1c9 is described below commit 64ce1c9f932b4acdf8f483265a628c124c9fd15d Author: Maxim Gekk <max.g...@gmail.com> AuthorDate: Tue Jan 22 17:33:29 2019 +0100 [SPARK-26657][SQL] Use Proleptic Gregorian calendar in DayWeek and in WeekOfYear ## What changes were proposed in this pull request? The expressions `DayWeek`, `DayOfWeek`, `WeekDay` and `WeekOfYear` are changed to use Proleptic Gregorian calendar instead of the hybrid one (Julian+Gregorian). This was achieved by using Java 8 API for date/timestamp manipulation, in particular the `LocalDate` class. Week of year calculation is performed according to ISO-8601. The first week of a week-based-year is the first Monday-based week of the standard ISO year that has at least 4 days in the new year (see https://docs.oracle.com/javase/8/docs/api/java/time/temporal/IsoFields.html). ## How was this patch tested? The changes were tested by `DateExpressionsSuite` and `DateFunctionsSuite`. Closes #23594 from MaxGekk/dayweek-gregorian. Lead-authored-by: Maxim Gekk <max.g...@gmail.com> Co-authored-by: Maxim Gekk <maxim.g...@databricks.com> Signed-off-by: Herman van Hovell <hvanhov...@databricks.com> --- docs/sql-migration-guide-upgrade.md | 2 + .../catalyst/expressions/datetimeExpressions.scala | 62 ++++++---------------- .../expressions/DateExpressionsSuite.scala | 4 +- 3 files changed, 20 insertions(+), 48 deletions(-) diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index 3d1b804..d442087 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -91,6 +91,8 @@ displayTitle: Spark SQL Upgrading Guide - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is int [...] + - Since Spark 3.0, the `weekofyear`, `weekday` and `dayofweek` functions use java.time API for calculation week number of year and day number of week based on Proleptic Gregorian calendar. In Spark version 2.4 and earlier, the hybrid calendar (Julian + Gregorian) is used for the same purpose. Results of the functions returned by Spark 3.0 and previous versions can be different for dates before October 15, 1582 (Gregorian). + ## Upgrading From Spark SQL 2.3 to 2.4 - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index e758362..ec59502 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -18,7 +18,9 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp -import java.util.{Calendar, Locale, TimeZone} +import java.time.LocalDate +import java.time.temporal.IsoFields +import java.util.{Locale, TimeZone} import scala.util.control.NonFatal @@ -430,20 +432,14 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa case class DayOfWeek(child: Expression) extends DayWeek { override protected def nullSafeEval(date: Any): Any = { - cal.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L) - cal.get(Calendar.DAY_OF_WEEK) + val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) + localDate.getDayOfWeek.plus(1).getValue } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, time => { - val cal = classOf[Calendar].getName - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - val c = "calDayOfWeek" - ctx.addImmutableStateIfNotExists(cal, c, - v => s"""$v = $cal.getInstance($dtu.getTimeZone("UTC"));""") + nullSafeCodeGen(ctx, ev, days => { s""" - $c.setTimeInMillis($time * 1000L * 3600L * 24L); - ${ev.value} = $c.get($cal.DAY_OF_WEEK); + ${ev.value} = java.time.LocalDate.ofEpochDay($days).getDayOfWeek().plus(1).getValue(); """ }) } @@ -462,20 +458,14 @@ case class DayOfWeek(child: Expression) extends DayWeek { case class WeekDay(child: Expression) extends DayWeek { override protected def nullSafeEval(date: Any): Any = { - cal.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L) - (cal.get(Calendar.DAY_OF_WEEK) + 5 ) % 7 + val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) + localDate.getDayOfWeek.ordinal() } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, time => { - val cal = classOf[Calendar].getName - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - val c = "calWeekDay" - ctx.addImmutableStateIfNotExists(cal, c, - v => s"""$v = $cal.getInstance($dtu.getTimeZone("UTC"));""") + nullSafeCodeGen(ctx, ev, days => { s""" - $c.setTimeInMillis($time * 1000L * 3600L * 24L); - ${ev.value} = ($c.get($cal.DAY_OF_WEEK) + 5) % 7; + ${ev.value} = java.time.LocalDate.ofEpochDay($days).getDayOfWeek().ordinal(); """ }) } @@ -486,10 +476,6 @@ abstract class DayWeek extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(DateType) override def dataType: DataType = IntegerType - - @transient protected lazy val cal: Calendar = { - Calendar.getInstance(DateTimeUtils.getTimeZone("UTC")) - } } // scalastyle:off line.size.limit @@ -508,32 +494,16 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa override def dataType: DataType = IntegerType - @transient private lazy val c = { - val c = Calendar.getInstance(DateTimeUtils.getTimeZone("UTC")) - c.setFirstDayOfWeek(Calendar.MONDAY) - c.setMinimalDaysInFirstWeek(4) - c - } - override protected def nullSafeEval(date: Any): Any = { - c.setTimeInMillis(date.asInstanceOf[Int] * 1000L * 3600L * 24L) - c.get(Calendar.WEEK_OF_YEAR) + val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) + localDate.get(IsoFields.WEEK_OF_WEEK_BASED_YEAR) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, time => { - val cal = classOf[Calendar].getName - val c = "calWeekOfYear" - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - ctx.addImmutableStateIfNotExists(cal, c, v => - s""" - |$v = $cal.getInstance($dtu.getTimeZone("UTC")); - |$v.setFirstDayOfWeek($cal.MONDAY); - |$v.setMinimalDaysInFirstWeek(4); - """.stripMargin) + nullSafeCodeGen(ctx, ev, days => { s""" - |$c.setTimeInMillis($time * 1000L * 3600L * 24L); - |${ev.value} = $c.get($cal.WEEK_OF_YEAR); + |${ev.value} = java.time.LocalDate.ofEpochDay($days).get( + | java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR); """.stripMargin }) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index c3b7e19..c3c29e3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -231,8 +231,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(WeekOfYear(Cast(Literal(date), DateType, gmtId)), 15) checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType, gmtId)), 45) checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType, gmtId)), 18) - checkEvaluation(WeekOfYear(Literal(new Date(toMillis("1582-10-15 13:10:15")))), 40) - checkEvaluation(WeekOfYear(Literal(new Date(toMillis("1582-10-04 13:10:15")))), 39) + checkEvaluation(WeekOfYear(Cast(Literal("1582-10-15 13:10:15"), DateType, gmtId)), 41) + checkEvaluation(WeekOfYear(Cast(Literal("1582-10-04 13:10:15"), DateType, gmtId)), 40) checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org