[spark] branch master updated: [SPARK-27401][SQL] Refactoring conversion of Timestamp to/from java.sql.Timestamp

dongjoon Tue, 09 Apr 2019 15:43:12 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 63e4bf4  [SPARK-27401][SQL] Refactoring conversion of Timestamp 
to/from java.sql.Timestamp
63e4bf4 is described below

commit 63e4bf42c2b0227359d2c3eab124083d8cd67ccd
Author: Maxim Gekk <[email protected]>
AuthorDate: Tue Apr 9 15:42:27 2019 -0700

    [SPARK-27401][SQL] Refactoring conversion of Timestamp to/from 
java.sql.Timestamp
    
    ## What changes were proposed in this pull request?
    
    In the PR, I propose simpler implementation of 
`toJavaTimestamp()`/`fromJavaTimestamp()` by reusing existing functions of 
`DateTimeUtils`. This will allow to:
    - Simply implementation of `toJavaTimestamp()`, and handle properly 
negative inputs.
    - Detect `Long` overflow in conversion of milliseconds 
(`java.sql.Timestamp`) to microseconds (Catalyst's Timestamp).
    
    ## How was this patch tested?
    
    By existing test suites `DateTimeUtilsSuite`, `DateFunctionsSuite`, 
`DateExpressionsSuite` and `CastSuite`. And by new benchmark for export/import 
timestamps added to `DateTimeBenchmark`:
    
    Before:
    ```
    To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
    
------------------------------------------------------------------------------------------------------------------------
    From java.sql.Timestamp                             290            335      
    49         17.2          58.0       1.0X
    Collect longs                                      1234           1681      
   487          4.1         246.8       0.2X
    Collect timestamps                                 1718           1755      
    63          2.9         343.7       0.2X
    ```
    
    After:
    ```
    To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
    
------------------------------------------------------------------------------------------------------------------------
    From java.sql.Timestamp                             283            301      
    19         17.7          56.6       1.0X
    Collect longs                                      1048           1087      
    36          4.8         209.6       0.3X
    Collect timestamps                                 1425           1479      
    56          3.5         285.1       0.2X
    ```
    
    Closes #24311 from MaxGekk/conv-java-sql-date-timestamp.
    
    Authored-by: Maxim Gekk <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../spark/sql/catalyst/util/DateTimeUtils.scala    | 19 ++---------------
 sql/core/benchmarks/DateTimeBenchmark-results.txt  |  9 ++++++++
 .../execution/benchmark/DateTimeBenchmark.scala    | 24 ++++++++++++++++++++++
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 3004336..7687afa 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -120,29 +120,14 @@ object DateTimeUtils {
    * Returns a java.sql.Timestamp from number of micros since epoch.
    */
   def toJavaTimestamp(us: SQLTimestamp): Timestamp = {
-    // setNanos() will overwrite the millisecond part, so the milliseconds 
should be
-    // cut off at seconds
-    var seconds = us / MICROS_PER_SECOND
-    var micros = us % MICROS_PER_SECOND
-    // setNanos() can not accept negative value
-    if (micros < 0) {
-      micros += MICROS_PER_SECOND
-      seconds -= 1
-    }
-    val t = new Timestamp(SECONDS.toMillis(seconds))
-    t.setNanos(MICROSECONDS.toNanos(micros).toInt)
-    t
+    Timestamp.from(microsToInstant(us))
   }
 
   /**
    * Returns the number of micros since epoch from java.sql.Timestamp.
    */
   def fromJavaTimestamp(t: Timestamp): SQLTimestamp = {
-    if (t != null) {
-      MILLISECONDS.toMicros(t.getTime()) + NANOSECONDS.toMicros(t.getNanos()) 
% NANOS_PER_MICROS
-    } else {
-      0L
-    }
+    instantToMicros(t.toInstant)
   }
 
   /**
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt 
b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 6c39f8f..d994752 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -414,3 +414,12 @@ to_date wholestage off                        1477 / 1479  
        0.7        14
 to_date wholestage on                         1468 / 1473          0.7        
1468.2       1.0X
 
 
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+From java.sql.Timestamp                             283            301         
 19         17.7          56.6       1.0X
+Collect longs                                      1048           1087         
 36          4.8         209.6       0.3X
+Collect timestamps                                 1425           1479         
 56          3.5         285.1       0.2X
+
+
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index d3fe697..df0f87e 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.execution.benchmark
 
+import java.sql.Timestamp
+
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -121,5 +124,26 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       run(n, "to date str", dateStrExpr)
       run(n, "to_date", s"to_date($dateStrExpr, 'yyyy-MM-dd')")
     }
+    runBenchmark("Conversion from/to external types") {
+      import spark.implicits._
+      val rowsNum = 5000000
+      val numIters = 3
+      val benchmark = new Benchmark("To/from java.sql.Timestamp", rowsNum, 
output = output)
+      benchmark.addCase("From java.sql.Timestamp", numIters) { _ =>
+        spark.range(rowsNum)
+          .map(millis => new Timestamp(millis))
+          .write.format("noop").save()
+      }
+      benchmark.addCase("Collect longs", numIters) { _ =>
+        spark.range(0, rowsNum, 1, 1)
+          .collect()
+      }
+      benchmark.addCase("Collect timestamps", numIters) { _ =>
+        spark.range(0, rowsNum, 1, 1)
+          .map(millis => new Timestamp(millis))
+          .collect()
+      }
+      benchmark.run()
+    }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-27401][SQL] Refactoring conversion of Timestamp to/from java.sql.Timestamp

Reply via email to