This is an automated email from the ASF dual-hosted git repository.

yumwang pushed a commit to branch branch-3.0-preview
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0-preview by this 
push:
     new 0333d82  [SPARK-29364][SQL] Return an interval from date subtract 
according to SQL standard
0333d82 is described below

commit 0333d8226435738f46d026433cb6086e9b6448cb
Author: Maxim Gekk <[email protected]>
AuthorDate: Wed Oct 16 06:26:01 2019 -0700

    [SPARK-29364][SQL] Return an interval from date subtract according to SQL 
standard
    
    ### What changes were proposed in this pull request?
    Proposed new expression `SubtractDates` which is used in `date1` - `date2`. 
It has the `INTERVAL` type, and returns the interval from `date1` (inclusive) 
and `date2` (exclusive). For example:
    ```sql
    > select date'tomorrow' - date'yesterday';
    interval 2 days
    ```
    
    Closes #26034
    
    ### Why are the changes needed?
    - To conform the SQL standard which states the result type of `date operand 
1` - `date operand 2` must be the interval type. See [4.5.3  Operations 
involving datetimes and 
intervals](http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt).
    - Improve Spark SQL UX and allow mixing date and timestamp in subtractions. 
For example: `select timestamp'now' + (date'2019-10-01' - date'2019-09-15')`
    
    ### Does this PR introduce any user-facing change?
    Before the query below returns number of days:
    ```sql
    spark-sql> select date'2019-10-05' - date'2018-09-01';
    399
    ```
    After it returns an interval:
    ```sql
    spark-sql> select date'2019-10-05' - date'2018-09-01';
    interval 1 years 1 months 4 days
    ```
    
    ### How was this patch tested?
    - by new tests in `DateExpressionsSuite` and `TypeCoercionSuite`.
    - by existing tests in `date.sql`
    
    Closes #26112 from MaxGekk/date-subtract.
    
    Authored-by: Maxim Gekk <[email protected]>
    Signed-off-by: Yuming Wang <[email protected]>
    (cherry picked from commit d11cbf2e367317e6a2bedfcd71819ddf485c39c5)
    Signed-off-by: Yuming Wang <[email protected]>
---
 .../spark/sql/catalyst/analysis/TypeCoercion.scala | 15 ++++++-----
 .../catalyst/expressions/datetimeExpressions.scala | 24 ++++++++++++++++-
 .../spark/sql/catalyst/util/DateTimeUtils.scala    | 18 ++++++++++++-
 .../sql/catalyst/analysis/TypeCoercionSuite.scala  |  8 +++---
 .../expressions/DateExpressionsSuite.scala         | 30 ++++++++++++++++++----
 .../resources/sql-tests/results/datetime.sql.out   |  8 +++---
 6 files changed, 82 insertions(+), 21 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 0983810..c451eb2 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -828,8 +828,9 @@ object TypeCoercion {
   /**
    * 1. Turns Add/Subtract of DateType/TimestampType/StringType and 
CalendarIntervalType
    *    to TimeAdd/TimeSub.
-   * 2. Turns Add/Subtract of DateType/IntegerType and IntegerType/DateType
-   *    to DateAdd/DateSub/DateDiff.
+   * 2. Turns Add/Subtract of TimestampType/DateType/IntegerType
+   *    and TimestampType/IntegerType/DateType to 
DateAdd/DateSub/SubtractDates and
+   *    to SubtractTimestamps.
    */
   object DateTimeOperations extends Rule[LogicalPlan] {
 
@@ -849,12 +850,14 @@ object TypeCoercion {
       case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r)
       case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l)
       case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r)
-      case Subtract(l @ DateType(), r @ DateType()) => DateDiff(l, r)
-      case Subtract(l @ TimestampType(), r @ TimestampType()) => 
TimestampDiff(l, r)
+      case Subtract(l @ DateType(), r @ DateType()) =>
+        if (SQLConf.get.usePostgreSQLDialect) DateDiff(l, r) else 
SubtractDates(l, r)
+      case Subtract(l @ TimestampType(), r @ TimestampType()) =>
+        SubtractTimestamps(l, r)
       case Subtract(l @ TimestampType(), r @ DateType()) =>
-        TimestampDiff(l, Cast(r, TimestampType))
+        SubtractTimestamps(l, Cast(r, TimestampType))
       case Subtract(l @ DateType(), r @ TimestampType()) =>
-        TimestampDiff(Cast(l, TimestampType), r)
+        SubtractTimestamps(Cast(l, TimestampType), r)
     }
   }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 5aea884..cddd8c9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -2099,7 +2099,7 @@ case class DatePart(field: Expression, source: 
Expression, child: Expression)
  * is set to 0 and the `microseconds` field is initialized to the microsecond 
difference
  * between the given timestamps.
  */
-case class TimestampDiff(endTimestamp: Expression, startTimestamp: Expression)
+case class SubtractTimestamps(endTimestamp: Expression, startTimestamp: 
Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
   override def left: Expression = endTimestamp
@@ -2116,3 +2116,25 @@ case class TimestampDiff(endTimestamp: Expression, 
startTimestamp: Expression)
       s"new org.apache.spark.unsafe.types.CalendarInterval(0, $end - $start)")
   }
 }
+
+/**
+ * Returns the interval from the `left` date (inclusive) to the `right` date 
(exclusive).
+ */
+case class SubtractDates(left: Expression, right: Expression)
+  extends BinaryExpression with ImplicitCastInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
+  override def dataType: DataType = CalendarIntervalType
+
+  override def nullSafeEval(leftDays: Any, rightDays: Any): Any = {
+    DateTimeUtils.subtractDates(leftDays.asInstanceOf[Int], 
rightDays.asInstanceOf[Int])
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, (leftDays, rightDays) => {
+      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+      s"$dtu.subtractDates($leftDays, $rightDays)"
+    })
+  }
+}
+
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 34e8012..0888769 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -27,7 +27,7 @@ import java.util.concurrent.TimeUnit._
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.types.Decimal
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 /**
  * Helper functions for converting between internal and external date and time 
representations.
@@ -950,4 +950,20 @@ object DateTimeUtils {
       None
     }
   }
+
+  /**
+   * Subtracts two dates.
+   * @param endDate - the end date, exclusive
+   * @param startDate - the start date, inclusive
+   * @return an interval between two dates. The interval can be negative
+   *         if the end date is before the start date.
+   */
+  def subtractDates(endDate: SQLDate, startDate: SQLDate): CalendarInterval = {
+    val period = Period.between(
+      LocalDate.ofEpochDay(startDate),
+      LocalDate.ofEpochDay(endDate))
+    val months = period.getMonths + 12 * period.getYears
+    val microseconds = period.getDays * MICROS_PER_DAY
+    new CalendarInterval(months, microseconds)
+  }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index f60e0f2..4f9e4ec 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -1430,13 +1430,13 @@ class TypeCoercionSuite extends AnalysisTest {
     ruleTest(dateTimeOperations, Add(date, intValue), DateAdd(date, intValue))
     ruleTest(dateTimeOperations, Add(intValue, date), DateAdd(date, intValue))
     ruleTest(dateTimeOperations, Subtract(date, intValue), DateSub(date, 
intValue))
-    ruleTest(dateTimeOperations, Subtract(date, date), DateDiff(date, date))
+    ruleTest(dateTimeOperations, Subtract(date, date), SubtractDates(date, 
date))
     ruleTest(dateTimeOperations, Subtract(timestamp, timestamp),
-      TimestampDiff(timestamp, timestamp))
+      SubtractTimestamps(timestamp, timestamp))
     ruleTest(dateTimeOperations, Subtract(timestamp, date),
-      TimestampDiff(timestamp, Cast(date, TimestampType)))
+      SubtractTimestamps(timestamp, Cast(date, TimestampType)))
     ruleTest(dateTimeOperations, Subtract(date, timestamp),
-      TimestampDiff(Cast(date, TimestampType), timestamp))
+      SubtractTimestamps(Cast(date, TimestampType), timestamp))
   }
 
   /**
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 8680a15..e893e86 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset}
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneId, ZoneOffset}
 import java.util.{Calendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 import java.util.concurrent.TimeUnit._
@@ -1072,19 +1072,39 @@ class DateExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 
   test("timestamps difference") {
     val end = Instant.parse("2019-10-04T11:04:01.123456Z")
-    checkEvaluation(TimestampDiff(Literal(end), Literal(end)),
+    checkEvaluation(SubtractTimestamps(Literal(end), Literal(end)),
       new CalendarInterval(0, 0))
-    checkEvaluation(TimestampDiff(Literal(end), Literal(Instant.EPOCH)),
+    checkEvaluation(SubtractTimestamps(Literal(end), Literal(Instant.EPOCH)),
       CalendarInterval.fromString("interval 18173 days " +
         "11 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds"))
-    checkEvaluation(TimestampDiff(Literal(Instant.EPOCH), Literal(end)),
+    checkEvaluation(SubtractTimestamps(Literal(Instant.EPOCH), Literal(end)),
       CalendarInterval.fromString("interval -18173 days " +
         "-11 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds"))
     checkEvaluation(
-      TimestampDiff(
+      SubtractTimestamps(
         Literal(Instant.parse("9999-12-31T23:59:59.999999Z")),
         Literal(Instant.parse("0001-01-01T00:00:00Z"))),
       CalendarInterval.fromString("interval 521722 weeks 4 days " +
         "23 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds"))
   }
+
+  test("subtract dates") {
+    val end = LocalDate.of(2019, 10, 5)
+    checkEvaluation(SubtractDates(Literal(end), Literal(end)),
+      new CalendarInterval(0, 0))
+    checkEvaluation(SubtractDates(Literal(end.plusDays(1)), Literal(end)),
+      CalendarInterval.fromString("interval 1 days"))
+    checkEvaluation(SubtractDates(Literal(end.minusDays(1)), Literal(end)),
+      CalendarInterval.fromString("interval -1 days"))
+    val epochDate = Literal(LocalDate.ofEpochDay(0))
+    checkEvaluation(SubtractDates(Literal(end), epochDate),
+      CalendarInterval.fromString("interval 49 years 9 months 4 days"))
+    checkEvaluation(SubtractDates(epochDate, Literal(end)),
+      CalendarInterval.fromString("interval -49 years -9 months -4 days"))
+    checkEvaluation(
+      SubtractDates(
+        Literal(LocalDate.of(10000, 1, 1)),
+        Literal(LocalDate.of(1, 1, 1))),
+      CalendarInterval.fromString("interval 9999 years"))
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out 
b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index c3c131d..0f4036c 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -126,15 +126,15 @@ struct<date_sub(DATE '2001-10-01', 7):date>
 -- !query 14
 select date '2001-10-01' - date '2001-09-28'
 -- !query 14 schema
-struct<datediff(DATE '2001-10-01', DATE '2001-09-28'):int>
+struct<subtractdates(DATE '2001-10-01', DATE '2001-09-28'):interval>
 -- !query 14 output
-3
+interval 3 days
 
 
 -- !query 15
 select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678'
 -- !query 15 schema
-struct<timestampdiff(CAST(DATE '2020-01-01' AS TIMESTAMP), 
TIMESTAMP('2019-10-06 10:11:12.345678')):interval>
+struct<subtracttimestamps(CAST(DATE '2020-01-01' AS TIMESTAMP), 
TIMESTAMP('2019-10-06 10:11:12.345678')):interval>
 -- !query 15 output
 interval 12 weeks 2 days 14 hours 48 minutes 47 seconds 654 milliseconds 322 
microseconds
 
@@ -142,6 +142,6 @@ interval 12 weeks 2 days 14 hours 48 minutes 47 seconds 654 
milliseconds 322 mic
 -- !query 16
 select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
 -- !query 16 schema
-struct<timestampdiff(TIMESTAMP('2019-10-06 10:11:12.345678'), CAST(DATE 
'2020-01-01' AS TIMESTAMP)):interval>
+struct<subtracttimestamps(TIMESTAMP('2019-10-06 10:11:12.345678'), CAST(DATE 
'2020-01-01' AS TIMESTAMP)):interval>
 -- !query 16 output
 interval -12 weeks -2 days -14 hours -48 minutes -47 seconds -654 milliseconds 
-322 microseconds


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to