This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 63e4bf4 [SPARK-27401][SQL] Refactoring conversion of Timestamp
to/from java.sql.Timestamp
63e4bf4 is described below
commit 63e4bf42c2b0227359d2c3eab124083d8cd67ccd
Author: Maxim Gekk <[email protected]>
AuthorDate: Tue Apr 9 15:42:27 2019 -0700
[SPARK-27401][SQL] Refactoring conversion of Timestamp to/from
java.sql.Timestamp
## What changes were proposed in this pull request?
In the PR, I propose simpler implementation of
`toJavaTimestamp()`/`fromJavaTimestamp()` by reusing existing functions of
`DateTimeUtils`. This will allow to:
- Simply implementation of `toJavaTimestamp()`, and handle properly
negative inputs.
- Detect `Long` overflow in conversion of milliseconds
(`java.sql.Timestamp`) to microseconds (Catalyst's Timestamp).
## How was this patch tested?
By existing test suites `DateTimeUtilsSuite`, `DateFunctionsSuite`,
`DateExpressionsSuite` and `CastSuite`. And by new benchmark for export/import
timestamps added to `DateTimeBenchmark`:
Before:
```
To/from java.sql.Timestamp: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
From java.sql.Timestamp 290 335
49 17.2 58.0 1.0X
Collect longs 1234 1681
487 4.1 246.8 0.2X
Collect timestamps 1718 1755
63 2.9 343.7 0.2X
```
After:
```
To/from java.sql.Timestamp: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
From java.sql.Timestamp 283 301
19 17.7 56.6 1.0X
Collect longs 1048 1087
36 4.8 209.6 0.3X
Collect timestamps 1425 1479
56 3.5 285.1 0.2X
```
Closes #24311 from MaxGekk/conv-java-sql-date-timestamp.
Authored-by: Maxim Gekk <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/catalyst/util/DateTimeUtils.scala | 19 ++---------------
sql/core/benchmarks/DateTimeBenchmark-results.txt | 9 ++++++++
.../execution/benchmark/DateTimeBenchmark.scala | 24 ++++++++++++++++++++++
3 files changed, 35 insertions(+), 17 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 3004336..7687afa 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -120,29 +120,14 @@ object DateTimeUtils {
* Returns a java.sql.Timestamp from number of micros since epoch.
*/
def toJavaTimestamp(us: SQLTimestamp): Timestamp = {
- // setNanos() will overwrite the millisecond part, so the milliseconds
should be
- // cut off at seconds
- var seconds = us / MICROS_PER_SECOND
- var micros = us % MICROS_PER_SECOND
- // setNanos() can not accept negative value
- if (micros < 0) {
- micros += MICROS_PER_SECOND
- seconds -= 1
- }
- val t = new Timestamp(SECONDS.toMillis(seconds))
- t.setNanos(MICROSECONDS.toNanos(micros).toInt)
- t
+ Timestamp.from(microsToInstant(us))
}
/**
* Returns the number of micros since epoch from java.sql.Timestamp.
*/
def fromJavaTimestamp(t: Timestamp): SQLTimestamp = {
- if (t != null) {
- MILLISECONDS.toMicros(t.getTime()) + NANOSECONDS.toMicros(t.getNanos())
% NANOS_PER_MICROS
- } else {
- 0L
- }
+ instantToMicros(t.toInstant)
}
/**
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt
b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 6c39f8f..d994752 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -414,3 +414,12 @@ to_date wholestage off 1477 / 1479
0.7 14
to_date wholestage on 1468 / 1473 0.7
1468.2 1.0X
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+To/from java.sql.Timestamp: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+From java.sql.Timestamp 283 301
19 17.7 56.6 1.0X
+Collect longs 1048 1087
36 4.8 209.6 0.3X
+Collect timestamps 1425 1479
56 3.5 285.1 0.2X
+
+
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index d3fe697..df0f87e 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -17,6 +17,9 @@
package org.apache.spark.sql.execution.benchmark
+import java.sql.Timestamp
+
+import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.internal.SQLConf
/**
@@ -121,5 +124,26 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
run(n, "to date str", dateStrExpr)
run(n, "to_date", s"to_date($dateStrExpr, 'yyyy-MM-dd')")
}
+ runBenchmark("Conversion from/to external types") {
+ import spark.implicits._
+ val rowsNum = 5000000
+ val numIters = 3
+ val benchmark = new Benchmark("To/from java.sql.Timestamp", rowsNum,
output = output)
+ benchmark.addCase("From java.sql.Timestamp", numIters) { _ =>
+ spark.range(rowsNum)
+ .map(millis => new Timestamp(millis))
+ .write.format("noop").save()
+ }
+ benchmark.addCase("Collect longs", numIters) { _ =>
+ spark.range(0, rowsNum, 1, 1)
+ .collect()
+ }
+ benchmark.addCase("Collect timestamps", numIters) { _ =>
+ spark.range(0, rowsNum, 1, 1)
+ .map(millis => new Timestamp(millis))
+ .collect()
+ }
+ benchmark.run()
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]