This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 2fc57bba [SPARK-36015][SQL] Support TimestampNTZType in the Window 
spec definition
2fc57bba is described below

commit 2fc57bba31a9bea3b97582751a693acd268158e9
Author: gengjiaan <gengji...@360.cn>
AuthorDate: Wed Jul 7 20:27:05 2021 +0300

    [SPARK-36015][SQL] Support TimestampNTZType in the Window spec definition
    
    ### What changes were proposed in this pull request?
    The method `WindowSpecDefinition.isValidFrameType` doesn't consider 
`TimestampNTZType`. We should support it as for `TimestampType`.
    
    ### Why are the changes needed?
    Support `TimestampNTZType` in the Window spec definition.
    
    ### Does this PR introduce _any_ user-facing change?
    'Yes'. This PR allows users use  `TimestampNTZType` as the sort spec in 
window spec definition.
    
    ### How was this patch tested?
    New tests.
    
    Closes #33246 from beliefer/SPARK-36015.
    
    Authored-by: gengjiaan <gengji...@360.cn>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
    (cherry picked from commit 62ff2add9444fbd54802548b3daf7cde5820feef)
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../catalyst/expressions/windowExpressions.scala   |  6 +--
 .../sql/execution/window/WindowExecBase.scala      | 10 ++--
 .../src/test/resources/sql-tests/inputs/window.sql |  9 ++++
 .../resources/sql-tests/results/window.sql.out     | 56 +++++++++++++++++++++-
 4 files changed, 73 insertions(+), 8 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 2555c6a..fc2e449 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -102,9 +102,9 @@ case class WindowSpecDefinition(
   private def isValidFrameType(ft: DataType): Boolean = 
(orderSpec.head.dataType, ft) match {
     case (DateType, IntegerType) => true
     case (DateType, _: YearMonthIntervalType) => true
-    case (TimestampType, CalendarIntervalType) => true
-    case (TimestampType, _: YearMonthIntervalType) => true
-    case (TimestampType, _: DayTimeIntervalType) => true
+    case (TimestampType | TimestampNTZType, CalendarIntervalType) => true
+    case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) => true
+    case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) => true
     case (a, b) => a == b
   }
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index 2aa0b02..f3b3b34 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.execution.UnaryExecNode
-import org.apache.spark.sql.types.{CalendarIntervalType, DateType, 
DayTimeIntervalType, IntegerType, TimestampType, YearMonthIntervalType}
+import org.apache.spark.sql.types._
 
 trait WindowExecBase extends UnaryExecNode {
   def windowExpression: Seq[NamedExpression]
@@ -96,10 +96,12 @@ trait WindowExecBase extends UnaryExecNode {
         val boundExpr = (expr.dataType, boundOffset.dataType) match {
           case (DateType, IntegerType) => DateAdd(expr, boundOffset)
           case (DateType, _: YearMonthIntervalType) => DateAddYMInterval(expr, 
boundOffset)
-          case (TimestampType, CalendarIntervalType) => TimeAdd(expr, 
boundOffset, Some(timeZone))
-          case (TimestampType, _: YearMonthIntervalType) =>
+          case (TimestampType | TimestampNTZType, CalendarIntervalType) =>
+            TimeAdd(expr, boundOffset, Some(timeZone))
+          case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
             TimestampAddYMInterval(expr, boundOffset, Some(timeZone))
-          case (TimestampType, _: DayTimeIntervalType) => TimeAdd(expr, 
boundOffset, Some(timeZone))
+          case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) =>
+            TimeAdd(expr, boundOffset, Some(timeZone))
           case (a, b) if a == b => Add(expr, boundOffset)
         }
         val bound = MutableProjection.create(boundExpr :: Nil, child.output)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql 
b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index 46d3629..9766aaf 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -70,12 +70,21 @@ RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING) FROM testData 
ORDER BY cate, val_date
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
 ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM 
testData
 ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM 
testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) 
FROM testData
 ORDER BY cate, val_timestamp;
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) 
FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp);
 SELECT val_date, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
val_date
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM 
testData
 ORDER BY cate, val_date;
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out 
b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index b3f9e6c..455015b 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 50
+-- Number of queries: 53
 
 
 -- !query
@@ -212,6 +212,24 @@ NULL       NULL    NULL
 
 
 -- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION 
BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN 
CURRENT ROW AND INTERVAL '23 days 4 hours' FOLLOWING):double>
+-- !query output
+NULL   NULL    NULL
+2017-07-31 17:00:00    NULL    1.5015456E9
+2017-07-31 17:00:00    a       1.5016970666666667E9
+2017-07-31 17:00:00    a       1.5016970666666667E9
+2017-08-05 23:13:20    a       1.502E9
+2020-12-30 16:00:00    a       1.6093728E9
+2017-07-31 17:00:00    b       1.5022728E9
+2017-08-17 13:00:00    b       1.503E9
+2020-12-30 16:00:00    b       1.6093728E9
+
+
+-- !query
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM 
testData
 ORDER BY cate, val_timestamp
@@ -230,6 +248,24 @@ NULL       NULL    NULL
 
 
 -- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM 
testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION 
BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN 
CURRENT ROW AND INTERVAL '1-1' YEAR TO MONTH FOLLOWING):double>
+-- !query output
+NULL   NULL    NULL
+2017-07-31 17:00:00    NULL    1.5015456E9
+2017-07-31 17:00:00    a       1.5016970666666667E9
+2017-07-31 17:00:00    a       1.5016970666666667E9
+2017-08-05 23:13:20    a       1.502E9
+2020-12-30 16:00:00    a       1.6093728E9
+2017-07-31 17:00:00    b       1.5022728E9
+2017-08-17 13:00:00    b       1.503E9
+2020-12-30 16:00:00    b       1.6093728E9
+
+
+-- !query
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) 
FROM testData
 ORDER BY cate, val_timestamp
@@ -248,6 +284,24 @@ NULL       NULL    NULL
 
 
 -- !query
+SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
to_timestamp_ntz(val_timestamp)
+RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) 
FROM testData
+ORDER BY cate, to_timestamp_ntz(val_timestamp)
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION 
BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN 
CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING):double>
+-- !query output
+NULL   NULL    NULL
+2017-07-31 17:00:00    NULL    1.5015456E9
+2017-07-31 17:00:00    a       1.5015456E9
+2017-07-31 17:00:00    a       1.5015456E9
+2017-08-05 23:13:20    a       1.502E9
+2020-12-30 16:00:00    a       1.6093728E9
+2017-07-31 17:00:00    b       1.5015456E9
+2017-08-17 13:00:00    b       1.503E9
+2020-12-30 16:00:00    b       1.6093728E9
+
+
+-- !query
 SELECT val_date, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY 
val_date
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM 
testData
 ORDER BY cate, val_date

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to