cloud-fan closed pull request #23000: [SPARK-26002][SQL] Fix day of year
calculation for Julian calendar days
URL: https://github.com/apache/spark/pull/23000
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 5ae75dc939303..f590c63f80b21 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -53,14 +53,30 @@ object DateTimeUtils {
final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L
- // number of days in 400 years
+ // number of days in 400 years by Gregorian calendar
final val daysIn400Years: Int = 146097
+
+ // In the Julian calendar every year that is exactly divisible by 4 is a
leap year without any
+ // exception. But in the Gregorian calendar every year that is exactly
divisible by four
+ // is a leap year, except for years that are exactly divisible by 100, but
these centurial years
+ // are leap years if they are exactly divisible by 400.
+ // So there are 3 extra days in the Julian calendar within a 400 years cycle
compared to the
+ // Gregorian calendar.
+ final val extraLeapDaysIn400YearsJulian = 3
+
+ // number of days in 400 years by Julian calendar
+ final val daysIn400YearsInJulian: Int = daysIn400Years +
extraLeapDaysIn400YearsJulian
+
// number of days between 1.1.1970 and 1.1.2001
final val to2001 = -11323
// this is year -17999, calculation: 50 * daysIn400Year
final val YearZero = -17999
final val toYearZero = to2001 + 7304850
+
+ // days to year -17999 in Julian calendar
+ final val toYearZeroInJulian = toYearZero + 49 *
extraLeapDaysIn400YearsJulian
+
final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
final val TimeZoneUTC = TimeZone.getTimeZone("UTC")
final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
@@ -585,20 +601,30 @@ object DateTimeUtils {
* Return the number of days since the start of 400 year period.
* The second year of a 400 year period (year 1) starts on day 365.
*/
- private[this] def yearBoundary(year: Int): Int = {
- year * 365 + ((year / 4 ) - (year / 100) + (year / 400))
+ private[this] def yearBoundary(year: Int, isGregorian: Boolean): Int = {
+ if (isGregorian) {
+ year * 365 + ((year / 4) - (year / 100) + (year / 400))
+ } else {
+ year * 365 + (year / 4)
+ }
}
/**
* Calculates the number of years for the given number of days. This depends
* on a 400 year period.
* @param days days since the beginning of the 400 year period
+ * @param isGregorian indicates whether leap years should be calculated
according to Gregorian
+ * (or Julian) calendar
* @return (number of year, days in year)
*/
- private[this] def numYears(days: Int): (Int, Int) = {
+ private[this] def numYears(days: Int, isGregorian: Boolean): (Int, Int) = {
val year = days / 365
- val boundary = yearBoundary(year)
- if (days > boundary) (year, days - boundary) else (year - 1, days -
yearBoundary(year - 1))
+ val boundary = yearBoundary(year, isGregorian)
+ if (days > boundary) {
+ (year, days - boundary)
+ } else {
+ (year - 1, days - yearBoundary(year - 1, isGregorian))
+ }
}
/**
@@ -609,18 +635,26 @@ object DateTimeUtils {
* equals to the period 1.1.1601 until 31.12.2000.
*/
private[this] def getYearAndDayInYear(daysSince1970: SQLDate): (Int, Int) = {
- // add the difference (in days) between 1.1.1970 and the artificial year 0
(-17999)
- var daysSince1970Tmp = daysSince1970
// Since Julian calendar was replaced with the Gregorian calendar,
// the 10 days after Oct. 4 were skipped.
// (1582-10-04) -141428 days since 1970-01-01
if (daysSince1970 <= -141428) {
- daysSince1970Tmp -= 10
+ getYearAndDayInYear(daysSince1970 - 10, toYearZeroInJulian,
daysIn400YearsInJulian, false)
+ } else {
+ getYearAndDayInYear(daysSince1970, toYearZero, daysIn400Years, true)
}
- val daysNormalized = daysSince1970Tmp + toYearZero
+ }
+
+ private def getYearAndDayInYear(
+ daysSince1970: SQLDate,
+ toYearZero: SQLDate,
+ daysIn400Years: SQLDate,
+ isGregorian: Boolean): (Int, Int) = {
+ // add the difference (in days) between 1.1.1970 and the artificial year 0
(-17999)
+ val daysNormalized = daysSince1970 + toYearZero
val numOfQuarterCenturies = daysNormalized / daysIn400Years
val daysInThis400 = daysNormalized % daysIn400Years + 1
- val (years, dayInYear) = numYears(daysInThis400)
+ val (years, dayInYear) = numYears(daysInThis400, isGregorian)
val year: Int = (2001 - 20000) + 400 * numOfQuarterCenturies + years
(year, dayInYear)
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 0182eeb171215..2cb6110e2c093 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -410,6 +410,36 @@ class DateTimeUtilsSuite extends SparkFunSuite {
assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 78)
}
+ test("SPARK-26002: correct day of year calculations for Julian calendar
years") {
+ val c = Calendar.getInstance()
+ c.set(Calendar.MILLISECOND, 0)
+ (1000 to 1600 by 100).foreach { year =>
+ // January 1 is the 1st day of year.
+ c.set(year, 0, 1, 0, 0, 0)
+ assert(getYear(getInUTCDays(c.getTimeInMillis)) === year)
+ assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 1)
+ assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 1)
+
+ // March 1 is the 61st day of the year as they are leap years. It is
true for
+ // even the multiples of 100 as before 1582-10-4 the Julian calendar
leap year calculation
+ // is used in which every multiples of 4 are leap years
+ c.set(year, 2, 1, 0, 0, 0)
+ assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 61)
+ assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 3)
+
+ // testing leap day (February 29) in leap years
+ c.set(year, 1, 29, 0, 0, 0)
+ assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
+
+ // For non-leap years:
+ c.set(year + 1, 2, 1, 0, 0, 0)
+ assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
+ }
+
+ c.set(1582, 2, 1, 0, 0, 0)
+ assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
+ }
+
test("get year") {
val c = Calendar.getInstance()
c.set(2015, 2, 18, 0, 0, 0)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index 547c2bef02b24..8bd8bc2b94b8e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -27,3 +27,5 @@ select current_date = current_date(), current_timestamp =
current_timestamp(), a
select a, b from ttf2 order by a, current_date;
select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'),
weekday(null), weekday('1582-10-15 13:10:15');
+
+select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01');
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 63aa00426ea32..2090633802e26 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
+-- Number of queries: 11
-- !query 0
@@ -89,3 +89,11 @@ select weekday('2007-02-03'), weekday('2009-07-30'),
weekday('2017-05-27'), week
struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS
DATE)):int,weekday(CAST(2017-05-27 AS DATE)):int,weekday(CAST(NULL AS
DATE)):int,weekday(CAST(1582-10-15 13:10:15 AS DATE)):int>
-- !query 9 output
5 3 5 NULL 4
+
+
+-- !query 10
+select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
+-- !query 10 schema
+struct<year(CAST(1500-01-01 AS DATE)):int,month(CAST(1500-01-01 AS
DATE)):int,dayofyear(CAST(1500-01-01 AS DATE)):int>
+-- !query 10 output
+1500 1 1
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]