Repository: spark Updated Branches: refs/heads/master 243ce319a -> 9cf9a83af
[SPARK-25862][SQL] Remove rangeBetween APIs introduced in SPARK-21608 ## What changes were proposed in this pull request? This patch removes the rangeBetween functions introduced in SPARK-21608. As explained in SPARK-25841, these functions are confusing and don't quite work. We will redesign them and introduce better ones in SPARK-25843. ## How was this patch tested? Removed relevant test cases as well. These test cases will need to be added back in SPARK-25843. Closes #22870 from rxin/SPARK-25862. Lead-authored-by: Reynold Xin <r...@databricks.com> Co-authored-by: hyukjinkwon <gurwls...@apache.org> Signed-off-by: gatorsmile <gatorsm...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9cf9a83a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9cf9a83a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9cf9a83a Branch: refs/heads/master Commit: 9cf9a83afafb88668c95ca704a1f65a91b5e591c Parents: 243ce31 Author: Reynold Xin <r...@databricks.com> Authored: Tue Oct 30 21:27:17 2018 -0700 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Tue Oct 30 21:27:17 2018 -0700 ---------------------------------------------------------------------- .../expressions/windowExpressions.scala | 2 +- .../apache/spark/sql/expressions/Window.scala | 9 --- .../spark/sql/expressions/WindowSpec.scala | 12 ---- .../scala/org/apache/spark/sql/functions.scala | 26 -------- .../resources/sql-tests/results/window.sql.out | 2 +- .../spark/sql/DataFrameWindowFramesSuite.scala | 68 +------------------- 6 files changed, 3 insertions(+), 116 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/9cf9a83a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 7de6ddd..0b674d0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -206,7 +206,7 @@ case class SpecifiedWindowFrame( // Check combination (of expressions). (lower, upper) match { case (l: Expression, u: Expression) if !isValidFrameBoundary(l, u) => - TypeCheckFailure(s"Window frame upper bound '$upper' does not followes the lower bound " + + TypeCheckFailure(s"Window frame upper bound '$upper' does not follow the lower bound " + s"'$lower'.") case (l: SpecialFrameBoundary, _) => TypeCheckSuccess case (_, u: SpecialFrameBoundary) => TypeCheckSuccess http://git-wip-us.apache.org/repos/asf/spark/blob/9cf9a83a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala index 14dec8f..d50031b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala @@ -214,15 +214,6 @@ object Window { spec.rangeBetween(start, end) } - /** - * This function has been deprecated in Spark 2.4. See SPARK-25842 for more information. - * @since 2.3.0 - */ - @deprecated("Use the version with Long parameter types", "2.4.0") - def rangeBetween(start: Column, end: Column): WindowSpec = { - spec.rangeBetween(start, end) - } - private[sql] def spec: WindowSpec = { new WindowSpec(Seq.empty, Seq.empty, UnspecifiedFrame) } http://git-wip-us.apache.org/repos/asf/spark/blob/9cf9a83a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala index 0cc43a5..b7f3000 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala @@ -210,18 +210,6 @@ class WindowSpec private[sql]( } /** - * This function has been deprecated in Spark 2.4. See SPARK-25842 for more information. - * @since 2.3.0 - */ - @deprecated("Use the version with Long parameter types", "2.4.0") - def rangeBetween(start: Column, end: Column): WindowSpec = { - new WindowSpec( - partitionSpec, - orderSpec, - SpecifiedWindowFrame(RangeFrame, start.expr, end.expr)) - } - - /** * Converts this [[WindowSpec]] into a [[Column]] with an aggregate expression. */ private[sql] def withAggregate(aggregate: Column): Column = { http://git-wip-us.apache.org/repos/asf/spark/blob/9cf9a83a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 757a322..5348b65 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -829,32 +829,6 @@ object functions { ////////////////////////////////////////////////////////////////////////////////////////////// // Window functions ////////////////////////////////////////////////////////////////////////////////////////////// - /** - * This function has been deprecated in Spark 2.4. See SPARK-25842 for more information. - * - * @group window_funcs - * @since 2.3.0 - */ - @deprecated("Use Window.unboundedPreceding", "2.4.0") - def unboundedPreceding(): Column = Column(UnboundedPreceding) - - /** - * This function has been deprecated in Spark 2.4. See SPARK-25842 for more information. - * - * @group window_funcs - * @since 2.3.0 - */ - @deprecated("Use Window.unboundedFollowing", "2.4.0") - def unboundedFollowing(): Column = Column(UnboundedFollowing) - - /** - * This function has been deprecated in Spark 2.4. See SPARK-25842 for more information. - * - * @group window_funcs - * @since 2.3.0 - */ - @deprecated("Use Window.currentRow", "2.4.0") - def currentRow(): Column = Column(CurrentRow) /** * Window function: returns the cumulative distribution of values within a window partition, http://git-wip-us.apache.org/repos/asf/spark/blob/9cf9a83a/sql/core/src/test/resources/sql-tests/results/window.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index 4afbcd6..5071e0b 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -191,7 +191,7 @@ ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, v struct<> -- !query 11 output org.apache.spark.sql.AnalysisException -cannot resolve 'ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING' due to data type mismatch: Window frame upper bound '1' does not followes the lower bound 'unboundedfollowing$()'.; line 1 pos 33 +cannot resolve 'ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING' due to data type mismatch: Window frame upper bound '1' does not follow the lower bound 'unboundedfollowing$()'.; line 1 pos 33 -- !query 12 http://git-wip-us.apache.org/repos/asf/spark/blob/9cf9a83a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala index 9c28074..002c17f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala @@ -17,12 +17,11 @@ package org.apache.spark.sql -import java.sql.{Date, Timestamp} +import java.sql.Date import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.unsafe.types.CalendarInterval /** * Window frame testing for DataFrame API. @@ -219,71 +218,6 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSQLContext { Window.partitionBy($"value").orderBy($"key").rangeBetween(-2147483649L, 0))), Seq(Row(1, 2), Row(1, 2), Row(2, 3), Row(2147483650L, 2), Row(2147483650L, 4), Row(3, 1)) ) - - def dt(date: String): Date = Date.valueOf(date) - - val df2 = Seq((dt("2017-08-01"), "1"), (dt("2017-08-01"), "1"), (dt("2020-12-31"), "1"), - (dt("2017-08-03"), "2"), (dt("2017-08-02"), "1"), (dt("2020-12-31"), "2")) - .toDF("key", "value") - val window = Window.partitionBy($"value").orderBy($"key").rangeBetween(lit(0), lit(2)) - - checkAnswer( - df2.select( - $"key", - count("key").over(window)), - Seq(Row(dt("2017-08-01"), 3), Row(dt("2017-08-01"), 3), Row(dt("2020-12-31"), 1), - Row(dt("2017-08-03"), 1), Row(dt("2017-08-02"), 1), Row(dt("2020-12-31"), 1)) - ) - } - - test("range between should accept double values as boundary") { - val df = Seq((1.0D, "1"), (1.0D, "1"), (100.001D, "1"), (3.3D, "2"), (2.02D, "1"), - (100.001D, "2")).toDF("key", "value") - val window = Window.partitionBy($"value").orderBy($"key").rangeBetween(currentRow, lit(2.5D)) - - checkAnswer( - df.select( - $"key", - count("key").over(window)), - Seq(Row(1.0, 3), Row(1.0, 3), Row(100.001, 1), Row(3.3, 1), Row(2.02, 1), Row(100.001, 1)) - ) - } - - test("range between should accept interval values as boundary") { - def ts(timestamp: Long): Timestamp = new Timestamp(timestamp * 1000) - - val df = Seq((ts(1501545600), "1"), (ts(1501545600), "1"), (ts(1609372800), "1"), - (ts(1503000000), "2"), (ts(1502000000), "1"), (ts(1609372800), "2")) - .toDF("key", "value") - val window = Window.partitionBy($"value").orderBy($"key") - .rangeBetween(currentRow, lit(CalendarInterval.fromString("interval 23 days 4 hours"))) - - checkAnswer( - df.select( - $"key", - count("key").over(window)), - Seq(Row(ts(1501545600), 3), Row(ts(1501545600), 3), Row(ts(1609372800), 1), - Row(ts(1503000000), 1), Row(ts(1502000000), 1), Row(ts(1609372800), 1)) - ) - } - - test("range between should accept interval values as both boundaries") { - def ts(timestamp: Long): Timestamp = new Timestamp(timestamp * 1000) - - val df = Seq((ts(1501545600), "1"), (ts(1501545600), "1"), (ts(1609372800), "1"), - (ts(1503000000), "2"), (ts(1502000000), "1"), (ts(1609372800), "2")) - .toDF("key", "value") - val window = Window.partitionBy($"value").orderBy($"key") - .rangeBetween(lit(CalendarInterval.fromString("interval 3 hours")), - lit(CalendarInterval.fromString("interval 23 days 4 hours"))) - - checkAnswer( - df.select( - $"key", - count("key").over(window)), - Seq(Row(ts(1501545600), 1), Row(ts(1501545600), 1), Row(ts(1609372800), 0), - Row(ts(1503000000), 0), Row(ts(1502000000), 0), Row(ts(1609372800), 0)) - ) } test("unbounded rows/range between with aggregation") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org