Repository: spark
Updated Branches:
  refs/heads/master 43b04b7ec -> 20d411bc5


[SPARK-16078][SQL] from_utc_timestamp/to_utc_timestamp should not depends on 
local timezone

## What changes were proposed in this pull request?

Currently, we use local timezone to parse or format a timestamp 
(TimestampType), then use Long as the microseconds since epoch UTC.

In from_utc_timestamp() and to_utc_timestamp(), we did not consider the local 
timezone, they could return different results with different local timezone.

This PR will do the conversion based on human time (in local timezone), it 
should return same result in whatever timezone. But because the mapping from 
absolute timestamp to human time is not exactly one-to-one mapping, it will 
still return wrong result in some timezone (also in the begging or ending of 
DST).

This PR is kind of the best effort fix. In long term, we should make the 
TimestampType be timezone aware to fix this totally.

## How was this patch tested?

Tested these function in all timezone.

Author: Davies Liu <dav...@databricks.com>

Closes #13784 from davies/convert_tz.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20d411bc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20d411bc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20d411bc

Branch: refs/heads/master
Commit: 20d411bc5d05dd099f6d5234a24e10a519a39bdf
Parents: 43b04b7
Author: Davies Liu <dav...@databricks.com>
Authored: Wed Jun 22 13:40:24 2016 -0700
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Wed Jun 22 13:40:24 2016 -0700

----------------------------------------------------------------------
 .../expressions/datetimeExpressions.scala       | 10 +--
 .../spark/sql/catalyst/util/DateTimeUtils.scala | 34 ++++++++--
 .../sql/catalyst/util/DateTimeUtilsSuite.scala  | 65 ++++++++++++--------
 3 files changed, 73 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/20d411bc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 773431d..04c17bd 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -730,16 +730,17 @@ case class FromUTCTimestamp(left: Expression, right: 
Expression)
          """.stripMargin)
       } else {
         val tzTerm = ctx.freshName("tz")
+        val utcTerm = ctx.freshName("utc")
         val tzClass = classOf[TimeZone].getName
         ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = 
$tzClass.getTimeZone("$tz");""")
+        ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = 
$tzClass.getTimeZone("UTC");""")
         val eval = left.genCode(ctx)
         ev.copy(code = s"""
            |${eval.code}
            |boolean ${ev.isNull} = ${eval.isNull};
            |long ${ev.value} = 0;
            |if (!${ev.isNull}) {
-           |  ${ev.value} = ${eval.value} +
-           |   ${tzTerm}.getOffset(${eval.value} / 1000) * 1000L;
+           |  ${ev.value} = $dtu.convertTz(${eval.value}, $utcTerm, $tzTerm);
            |}
          """.stripMargin)
       }
@@ -869,16 +870,17 @@ case class ToUTCTimestamp(left: Expression, right: 
Expression)
          """.stripMargin)
       } else {
         val tzTerm = ctx.freshName("tz")
+        val utcTerm = ctx.freshName("utc")
         val tzClass = classOf[TimeZone].getName
         ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = 
$tzClass.getTimeZone("$tz");""")
+        ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = 
$tzClass.getTimeZone("UTC");""")
         val eval = left.genCode(ctx)
         ev.copy(code = s"""
            |${eval.code}
            |boolean ${ev.isNull} = ${eval.isNull};
            |long ${ev.value} = 0;
            |if (!${ev.isNull}) {
-           |  ${ev.value} = ${eval.value} -
-           |   ${tzTerm}.getOffset(${eval.value} / 1000) * 1000L;
+           |  ${ev.value} = $dtu.convertTz(${eval.value}, $tzTerm, $utcTerm);
            |}
          """.stripMargin)
       }

http://git-wip-us.apache.org/repos/asf/spark/blob/20d411bc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 56bf9a7..df480a1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -886,13 +886,37 @@ object DateTimeUtils {
   }
 
   /**
+   * Convert the timestamp `ts` from one timezone to another.
+   *
+   * TODO: Because of DST, the conversion between UTC and human time is not 
exactly one-to-one
+   * mapping, the conversion here may return wrong result, we should make the 
timestamp
+   * timezone-aware.
+   */
+  def convertTz(ts: SQLTimestamp, fromZone: TimeZone, toZone: TimeZone): 
SQLTimestamp = {
+    // We always use local timezone to parse or format a timestamp
+    val localZone = threadLocalLocalTimeZone.get()
+    val utcTs = if (fromZone.getID == localZone.getID) {
+      ts
+    } else {
+      // get the human time using local time zone, that actually is in 
fromZone.
+      val localTs = ts + localZone.getOffset(ts / 1000L) * 1000L  // in 
fromZone
+      localTs - getOffsetFromLocalMillis(localTs / 1000L, fromZone) * 1000L
+    }
+    if (toZone.getID == localZone.getID) {
+      utcTs
+    } else {
+      val localTs2 = utcTs + toZone.getOffset(utcTs / 1000L) * 1000L  // in 
toZone
+      // treat it as local timezone, convert to UTC (we could get the expected 
human time back)
+      localTs2 - getOffsetFromLocalMillis(localTs2 / 1000L, localZone) * 1000L
+    }
+  }
+
+  /**
    * Returns a timestamp of given timezone from utc timestamp, with the same 
string
    * representation in their timezone.
    */
   def fromUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
-    val tz = TimeZone.getTimeZone(timeZone)
-    val offset = tz.getOffset(time / 1000L)
-    time + offset * 1000L
+    convertTz(time, TimeZoneGMT, TimeZone.getTimeZone(timeZone))
   }
 
   /**
@@ -900,9 +924,7 @@ object DateTimeUtils {
    * string representation in their timezone.
    */
   def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
-    val tz = TimeZone.getTimeZone(timeZone)
-    val offset = getOffsetFromLocalMillis(time / 1000L, tz)
-    time - offset * 1000L
+    convertTz(time, TimeZone.getTimeZone(timeZone), TimeZoneGMT)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/20d411bc/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index f9cb976..059a5b7 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -488,17 +488,23 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       
assert(toJavaTimestamp(fromUTCTime(fromJavaTimestamp(Timestamp.valueOf(utc)), 
tz)).toString
         === expected)
     }
-    test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
-    test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
-    test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
-    test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 
17:00:00.123456")
-
-    // Daylight Saving Time
-    test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
-    test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
-    test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0")
-    test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0")
-    test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0")
+    for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
+      DateTimeTestUtils.withDefaultTimeZone(tz) {
+        test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
+        test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
+        test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
+        test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 
17:00:00.123456")
+      }
+    }
+
+    DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
+      // Daylight Saving Time
+      test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
+      test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
+      test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0")
+      test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0")
+      test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0")
+    }
   }
 
   test("to UTC timestamp") {
@@ -506,21 +512,28 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       
assert(toJavaTimestamp(toUTCTime(fromJavaTimestamp(Timestamp.valueOf(utc)), 
tz)).toString
         === expected)
     }
-    test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
-    test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
-    test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
-    test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 
09:00:00.123456")
-
-    // Daylight Saving Time
-    test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
-    // 2016-03-13 02:00:00 PST does not exists
-    test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0")
-    test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0")
-    test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0")
-    // 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 2016-11-06 
09:00:00 UTC
-    test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0")
-    test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0")
-    test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0")
+
+    for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
+      DateTimeTestUtils.withDefaultTimeZone(tz) {
+        test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
+        test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
+        test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
+        test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 
09:00:00.123456")
+      }
+    }
+
+    DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
+      // Daylight Saving Time
+      test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
+      // 2016-03-13 02:00:00 PST does not exists
+      test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0")
+      test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0")
+      test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0")
+      // 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 
2016-11-06 09:00:00 UTC
+      test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0")
+      test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0")
+      test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0")
+    }
   }
 
   test("daysToMillis and millisToDays") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to