[
https://issues.apache.org/jira/browse/SPARK-31449?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17092824#comment-17092824
]
Maxim Gekk commented on SPARK-31449:
------------------------------------
[~cloud_fan] [~hyukjin.kwon] I compared results of those 2 functions for all
time zones with step of 1 day, and found many differences in results:
{code:scala}
test("Investigate the difference between JDK and Spark's time zone offset
calculation") {
import java.util.{Calendar, TimeZone}
import sun.util.calendar.ZoneInfo
def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = {
var guess = tz.getRawOffset
// the actual offset should be calculated based on milliseconds in UTC
val offset = tz.getOffset(millisLocal - guess)
if (offset != guess) {
guess = tz.getOffset(millisLocal - offset)
if (guess != offset) {
// fallback to do the reverse lookup using java.sql.Timestamp
// this should only happen near the start or end of DST
val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
val year = getYear(days)
val month = getMonth(days)
val day = getDayOfMonth(days)
var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
if (millisOfDay < 0) {
millisOfDay += MILLIS_PER_DAY.toInt
}
val seconds = (millisOfDay / 1000L).toInt
val hh = seconds / 3600
val mm = seconds / 60 % 60
val ss = seconds % 60
val ms = millisOfDay % 1000
val calendar = Calendar.getInstance(tz)
calendar.set(year, month - 1, day, hh, mm, ss)
calendar.set(Calendar.MILLISECOND, ms)
guess = (millisLocal - calendar.getTimeInMillis()).toInt
}
}
guess
}
def getOffsetFromLocalMillis2(millisLocal: Long, tz: TimeZone): Long = {
tz match {
case zoneInfo: ZoneInfo => zoneInfo.getOffsetsByWall(millisLocal, null)
case timeZone: TimeZone => timeZone.getOffset(millisLocal -
timeZone.getRawOffset)
}
}
ALL_TIMEZONES
.sortBy(_.getId)
.foreach { zid =>
withDefaultTimeZone(zid) {
val start = microsToMillis(instantToMicros(LocalDateTime.of(1, 1, 1,
0, 0, 0)
.atZone(zid)
.toInstant))
val end = microsToMillis(instantToMicros(LocalDateTime.of(2037, 1, 1,
0, 0, 0)
.atZone(zid)
.toInstant))
var millis = start
var step: Long = MILLIS_PER_DAY
while (millis < end) {
val offset1 = getOffsetFromLocalMillis(millis,
TimeZone.getTimeZone(zid))
val offset2 = getOffsetFromLocalMillis2(millis,
TimeZone.getTimeZone(zid))
if (offset1 != offset2) {
println(s"${zid.getId} ${new Timestamp(millis)} $offset1
$offset2")
}
millis += step
}
}
}
}
{code}
{code}
Africa/Algiers 1916-10-01 23:47:48.0 3600000 0
Africa/Algiers 1917-10-07 23:47:48.0 3600000 0
Africa/Algiers 1918-10-06 23:47:48.0 3600000 0
Africa/Algiers 1919-10-05 23:47:48.0 3600000 0
Africa/Algiers 1920-10-23 23:47:48.0 3600000 0
Africa/Algiers 1921-06-21 23:47:48.0 3600000 0
Africa/Algiers 1946-10-06 23:47:48.0 3600000 0
Africa/Algiers 1963-04-13 23:47:48.0 3600000 0
Africa/Algiers 1971-09-26 23:47:48.0 3600000 0
Africa/Algiers 1979-10-25 23:47:48.0 3600000 0
Africa/Ceuta 1900-01-01 00:00:00.0 3600000 -1276000
Africa/Ceuta 1924-10-05 00:21:16.0 3600000 0
Africa/Ceuta 1926-10-03 00:21:16.0 3600000 0
Africa/Ceuta 1927-10-02 00:21:16.0 3600000 0
Africa/Ceuta 1928-10-07 00:21:16.0 3600000 0
Africa/Sao_Tome 1899-12-31 23:33:04.0 0 -2205000
Africa/Tripoli 1952-01-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1954-01-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1956-01-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1982-01-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1982-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1983-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1984-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1985-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1986-10-03 00:07:16.0 7200000 3600000
Africa/Tripoli 1987-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1988-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1989-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1996-09-30 00:07:16.0 7200000 3600000
America/Inuvik 1965-10-30 18:00:00.0 -21600000 -28800000
America/Iqaluit 1999-10-30 20:00:00.0 -14400000 -21600000
America/Pangnirtung 1999-10-30 20:00:00.0 -14400000 -21600000
Antarctica/Casey 1900-01-01 00:00:00.0 28800000 0
Antarctica/Davis 1900-01-01 00:00:00.0 25200000 0
Antarctica/Davis 2009-10-18 05:00:00.0 25200000 18000000
Antarctica/Davis 2011-10-28 05:00:00.0 25200000 18000000
Antarctica/DumontDUrville 1900-01-01 00:00:00.0 36000000 0
Antarctica/Mawson 1900-01-01 00:00:00.0 18000000 0
Antarctica/Syowa 1900-01-01 00:00:00.0 10800000 0
Antarctica/Vostok 1900-01-01 00:00:00.0 21600000 0
Atlantic/Reykjavik 1939-10-29 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1940-11-03 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1941-11-02 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1942-10-25 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1943-10-24 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1944-10-22 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1945-10-28 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1946-10-27 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1947-10-26 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1948-10-24 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1949-10-30 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1950-10-22 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1951-10-28 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1952-10-26 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1953-10-25 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1954-10-24 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1955-10-23 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1956-10-28 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1957-10-27 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1958-10-26 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1959-10-25 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1960-10-23 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1961-10-22 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1962-10-28 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1963-10-27 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1964-10-25 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1965-10-24 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1966-10-23 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1967-10-29 01:28:00.0 0 -3600000
Europe/Andorra 1900-12-31 23:53:56.0 364000 0
Europe/Brussels 1919-10-04 23:42:30.0 3600000 0
Europe/Brussels 1920-10-23 23:42:30.0 3600000 0
Europe/Brussels 1921-10-25 23:42:30.0 3600000 0
Europe/Brussels 1922-10-07 23:42:30.0 3600000 0
Europe/Brussels 1923-10-06 23:42:30.0 3600000 0
Europe/Brussels 1924-10-04 23:42:30.0 3600000 0
Europe/Brussels 1925-10-03 23:42:30.0 3600000 0
Europe/Brussels 1926-10-02 23:42:30.0 3600000 0
Europe/Brussels 1927-10-01 23:42:30.0 3600000 0
Europe/Gibraltar 1900-01-01 00:21:24.0 3600000 0
Europe/Luxembourg 1918-11-24 23:35:24.0 3600000 0
Europe/Madrid 1900-01-01 00:00:00.0 3600000 -884000
Europe/Madrid 1918-10-07 00:14:44.0 3600000 0
Europe/Madrid 1919-10-07 00:14:44.0 3600000 0
Europe/Madrid 1924-10-05 00:14:44.0 3600000 0
Europe/Madrid 1926-10-03 00:14:44.0 3600000 0
Europe/Madrid 1927-10-02 00:14:44.0 3600000 0
Europe/Madrid 1928-10-07 00:14:44.0 3600000 0
Europe/Madrid 1929-10-06 00:14:44.0 3600000 0
Europe/Madrid 1937-10-03 00:14:44.0 3600000 0
Europe/Madrid 1939-10-08 00:14:44.0 3600000 0
Europe/Monaco 1916-10-01 23:30:28.0 3600000 0
Europe/Monaco 1917-10-07 23:30:28.0 3600000 0
Europe/Monaco 1918-10-06 23:30:28.0 3600000 0
Europe/Monaco 1919-10-05 23:30:28.0 3600000 0
Europe/Monaco 1920-10-23 23:30:28.0 3600000 0
Europe/Monaco 1921-10-25 23:30:28.0 3600000 0
Europe/Monaco 1922-10-07 23:30:28.0 3600000 0
Europe/Monaco 1923-10-06 23:30:28.0 3600000 0
Europe/Monaco 1924-10-04 23:30:28.0 3600000 0
Europe/Monaco 1925-10-03 23:30:28.0 3600000 0
Europe/Monaco 1926-10-02 23:30:28.0 3600000 0
Europe/Monaco 1927-10-01 23:30:28.0 3600000 0
Europe/Monaco 1928-10-06 23:30:28.0 3600000 0
Europe/Monaco 1929-10-05 23:30:28.0 3600000 0
Europe/Monaco 1930-10-04 23:30:28.0 3600000 0
Europe/Monaco 1931-10-03 23:30:28.0 3600000 0
Europe/Monaco 1932-10-01 23:30:28.0 3600000 0
Europe/Monaco 1933-10-07 23:30:28.0 3600000 0
Europe/Monaco 1934-10-06 23:30:28.0 3600000 0
Europe/Monaco 1935-10-05 23:30:28.0 3600000 0
Europe/Monaco 1936-10-03 23:30:28.0 3600000 0
Europe/Monaco 1937-10-02 23:30:28.0 3600000 0
Europe/Monaco 1938-10-01 23:30:28.0 3600000 0
Europe/Monaco 1939-11-18 23:30:28.0 3600000 0
Europe/Paris 1916-10-01 23:50:39.0 3600000 0
Europe/Paris 1917-10-07 23:50:39.0 3600000 0
Europe/Paris 1918-10-06 23:50:39.0 3600000 0
Europe/Paris 1919-10-05 23:50:39.0 3600000 0
Europe/Paris 1920-10-23 23:50:39.0 3600000 0
Europe/Paris 1921-10-25 23:50:39.0 3600000 0
Europe/Paris 1922-10-07 23:50:39.0 3600000 0
Europe/Paris 1923-10-06 23:50:39.0 3600000 0
Europe/Paris 1924-10-04 23:50:39.0 3600000 0
Europe/Paris 1925-10-03 23:50:39.0 3600000 0
Europe/Paris 1926-10-02 23:50:39.0 3600000 0
Europe/Paris 1927-10-01 23:50:39.0 3600000 0
Europe/Paris 1928-10-06 23:50:39.0 3600000 0
Europe/Paris 1929-10-05 23:50:39.0 3600000 0
Europe/Paris 1930-10-04 23:50:39.0 3600000 0
Europe/Paris 1931-10-03 23:50:39.0 3600000 0
Europe/Paris 1932-10-01 23:50:39.0 3600000 0
Europe/Paris 1933-10-07 23:50:39.0 3600000 0
Europe/Paris 1934-10-06 23:50:39.0 3600000 0
Europe/Paris 1935-10-05 23:50:39.0 3600000 0
Europe/Paris 1936-10-03 23:50:39.0 3600000 0
Europe/Paris 1937-10-02 23:50:39.0 3600000 0
Europe/Paris 1938-10-01 23:50:39.0 3600000 0
Europe/Paris 1939-11-18 23:50:39.0 3600000 0
Iceland 1939-10-29 01:28:00.0 0 -3600000
Iceland 1940-11-03 01:28:00.0 0 -3600000
Iceland 1941-11-02 01:28:00.0 0 -3600000
Iceland 1942-10-25 01:28:00.0 0 -3600000
Iceland 1943-10-24 01:28:00.0 0 -3600000
Iceland 1944-10-22 01:28:00.0 0 -3600000
Iceland 1945-10-28 01:28:00.0 0 -3600000
Iceland 1946-10-27 01:28:00.0 0 -3600000
Iceland 1947-10-26 01:28:00.0 0 -3600000
Iceland 1948-10-24 01:28:00.0 0 -3600000
Iceland 1949-10-30 01:28:00.0 0 -3600000
Iceland 1950-10-22 01:28:00.0 0 -3600000
Iceland 1951-10-28 01:28:00.0 0 -3600000
Iceland 1952-10-26 01:28:00.0 0 -3600000
Iceland 1953-10-25 01:28:00.0 0 -3600000
Iceland 1954-10-24 01:28:00.0 0 -3600000
Iceland 1955-10-23 01:28:00.0 0 -3600000
Iceland 1956-10-28 01:28:00.0 0 -3600000
Iceland 1957-10-27 01:28:00.0 0 -3600000
Iceland 1958-10-26 01:28:00.0 0 -3600000
Iceland 1959-10-25 01:28:00.0 0 -3600000
Iceland 1960-10-23 01:28:00.0 0 -3600000
Iceland 1961-10-22 01:28:00.0 0 -3600000
Iceland 1962-10-28 01:28:00.0 0 -3600000
Iceland 1963-10-27 01:28:00.0 0 -3600000
Iceland 1964-10-25 01:28:00.0 0 -3600000
Iceland 1965-10-24 01:28:00.0 0 -3600000
Iceland 1966-10-23 01:28:00.0 0 -3600000
Iceland 1967-10-29 01:28:00.0 0 -3600000
Indian/Kerguelen 1900-01-01 00:00:00.0 18000000 0
Kwajalein 1969-09-30 23:50:40.0 39600000 -43200000
Libya 1952-01-01 00:07:16.0 7200000 3600000
Libya 1954-01-01 00:07:16.0 7200000 3600000
Libya 1956-01-01 00:07:16.0 7200000 3600000
Libya 1982-01-01 00:07:16.0 7200000 3600000
Libya 1982-10-01 00:07:16.0 7200000 3600000
Libya 1983-10-01 00:07:16.0 7200000 3600000
Libya 1984-10-01 00:07:16.0 7200000 3600000
Libya 1985-10-01 00:07:16.0 7200000 3600000
Libya 1986-10-03 00:07:16.0 7200000 3600000
Libya 1987-10-01 00:07:16.0 7200000 3600000
Libya 1988-10-01 00:07:16.0 7200000 3600000
Libya 1989-10-01 00:07:16.0 7200000 3600000
Libya 1996-09-30 00:07:16.0 7200000 3600000
Pacific/Apia 1900-01-01 00:00:00.0 46800000 -41216000
Pacific/Enderbury 1900-01-01 00:00:00.0 46800000 -41060000
Pacific/Fakaofo 1900-01-01 00:00:00.0 46800000 -41096000
Pacific/Kiritimati 1900-01-01 00:00:00.0 50400000 -37760000
Pacific/Kwajalein 1969-09-30 23:50:40.0 39600000 -43200000
{code}
> Investigate the difference between JDK and Spark's time zone offset
> calculation
> -------------------------------------------------------------------------------
>
> Key: SPARK-31449
> URL: https://issues.apache.org/jira/browse/SPARK-31449
> Project: Spark
> Issue Type: Improvement
> Components: SQL
> Affects Versions: 2.4.5
> Reporter: Maxim Gekk
> Priority: Major
>
> Spark 2.4 calculates time zone offsets from wall clock timestamp using
> `DateTimeUtils.getOffsetFromLocalMillis()` (see
> https://github.com/apache/spark/blob/branch-2.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala#L1088-L1118):
> {code:scala}
> private[sql] def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone):
> Long = {
> var guess = tz.getRawOffset
> // the actual offset should be calculated based on milliseconds in UTC
> val offset = tz.getOffset(millisLocal - guess)
> if (offset != guess) {
> guess = tz.getOffset(millisLocal - offset)
> if (guess != offset) {
> // fallback to do the reverse lookup using java.sql.Timestamp
> // this should only happen near the start or end of DST
> val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
> val year = getYear(days)
> val month = getMonth(days)
> val day = getDayOfMonth(days)
> var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
> if (millisOfDay < 0) {
> millisOfDay += MILLIS_PER_DAY.toInt
> }
> val seconds = (millisOfDay / 1000L).toInt
> val hh = seconds / 3600
> val mm = seconds / 60 % 60
> val ss = seconds % 60
> val ms = millisOfDay % 1000
> val calendar = Calendar.getInstance(tz)
> calendar.set(year, month - 1, day, hh, mm, ss)
> calendar.set(Calendar.MILLISECOND, ms)
> guess = (millisLocal - calendar.getTimeInMillis()).toInt
> }
> }
> guess
> }
> {code}
> Meanwhile, JDK's GregorianCalendar uses special methods of ZoneInfo, see
> https://github.com/AdoptOpenJDK/openjdk-jdk8u/blob/aa318070b27849f1fe00d14684b2a40f7b29bf79/jdk/src/share/classes/java/util/GregorianCalendar.java#L2795-L2801:
> {code:java}
> if (zone instanceof ZoneInfo) {
> ((ZoneInfo)zone).getOffsetsByWall(millis, zoneOffsets);
> } else {
> int gmtOffset = isFieldSet(fieldMask, ZONE_OFFSET) ?
> internalGet(ZONE_OFFSET) :
> zone.getRawOffset();
> zone.getOffsets(millis - gmtOffset, zoneOffsets);
> }
> {code}
> Need to investigate are there any differences in results between 2 approaches.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]