This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 521c2e0ad64d [SPARK-45430] Fix for FramelessOffsetWindowFunction when IGNORE NULLS and offset > rowCount 521c2e0ad64d is described below commit 521c2e0ad64da95a31d0ecc71105f613771c5616 Author: Vitalii Li <vitalii...@databricks.com> AuthorDate: Tue Oct 24 15:13:00 2023 +0800 [SPARK-45430] Fix for FramelessOffsetWindowFunction when IGNORE NULLS and offset > rowCount ### What changes were proposed in this pull request? This is a fix for the failure when function that utilized `FramelessOffsetWindowFunctionFrame` is used with `ignoreNulls = true` and `offset > rowCount`. e.g. ``` select x, lead(x, 5) IGNORE NULLS over (order by x) from (select explode(sequence(1, 3)) x) ``` ### Why are the changes needed? Fix existing bug ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Modify existing unit test to cover this case ### Was this patch authored or co-authored using generative AI tooling? No Closes #43236 from vitaliili-db/SPARK-45430. Authored-by: Vitalii Li <vitalii...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 32e1e58411913517c87d7e75942437f4e1c1d40e) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/execution/window/WindowFunctionFrame.scala | 6 +++- .../spark/sql/DataFrameWindowFunctionsSuite.scala | 40 ++++++++++++---------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala index 2b7f702a7f20..a849c3894f0d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala @@ -201,7 +201,11 @@ class FrameLessOffsetWindowFunctionFrame( override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = { resetStates(rows) if (ignoreNulls) { - findNextRowWithNonNullInput() + if (Math.abs(offset) > rows.length) { + fillDefaultValue(EmptyRow) + } else { + findNextRowWithNonNullInput() + } } else { // drain the first few rows if offset is larger than zero while (inputIndex < offset) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 3f8e2588fbc2..433b47419796 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -818,6 +818,8 @@ class DataFrameWindowFunctionsSuite extends QueryTest lead($"value", 1, null, true).over(window), lead($"value", 2, null, true).over(window), lead($"value", 3, null, true).over(window), + // offset > rowCount: SPARK-45430 + lead($"value", 100, null, true).over(window), lead(concat($"value", $"key"), 1, null, true).over(window), lag($"value", 1).over(window), lag($"value", 2).over(window), @@ -825,27 +827,29 @@ class DataFrameWindowFunctionsSuite extends QueryTest lag($"value", 1, null, true).over(window), lag($"value", 2, null, true).over(window), lag($"value", 3, null, true).over(window), + // abs(offset) > rowCount: SPARK-45430 + lag($"value", -100, null, true).over(window), lag(concat($"value", $"key"), 1, null, true).over(window)) .orderBy($"order"), Seq( - Row("a", 0, null, "x", null, null, "x", "y", "z", "xa", - null, null, null, null, null, null, null), - Row("a", 1, "x", null, null, "x", "y", "z", "v", "ya", - null, null, "x", null, null, null, null), - Row("b", 2, null, null, "y", null, "y", "z", "v", "ya", - "x", null, null, "x", null, null, "xa"), - Row("c", 3, null, "y", null, null, "y", "z", "v", "ya", - null, "x", null, "x", null, null, "xa"), - Row("a", 4, "y", null, "z", "y", "z", "v", null, "za", - null, null, "y", "x", null, null, "xa"), - Row("b", 5, null, "z", "v", null, "z", "v", null, "za", - "y", null, null, "y", "x", null, "ya"), - Row("a", 6, "z", "v", null, "z", "v", null, null, "va", - null, "y", "z", "y", "x", null, "ya"), - Row("a", 7, "v", null, null, "v", null, null, null, null, - "z", null, "v", "z", "y", "x", "za"), - Row("a", 8, null, null, null, null, null, null, null, null, - "v", "z", null, "v", "z", "y", "va"))) + Row("a", 0, null, "x", null, null, "x", "y", "z", null, "xa", + null, null, null, null, null, null, null, null), + Row("a", 1, "x", null, null, "x", "y", "z", "v", null, "ya", + null, null, "x", null, null, null, null, null), + Row("b", 2, null, null, "y", null, "y", "z", "v", null, "ya", + "x", null, null, "x", null, null, null, "xa"), + Row("c", 3, null, "y", null, null, "y", "z", "v", null, "ya", + null, "x", null, "x", null, null, null, "xa"), + Row("a", 4, "y", null, "z", "y", "z", "v", null, null, "za", + null, null, "y", "x", null, null, null, "xa"), + Row("b", 5, null, "z", "v", null, "z", "v", null, null, "za", + "y", null, null, "y", "x", null, null, "ya"), + Row("a", 6, "z", "v", null, "z", "v", null, null, null, "va", + null, "y", "z", "y", "x", null, null, "ya"), + Row("a", 7, "v", null, null, "v", null, null, null, null, null, + "z", null, "v", "z", "y", "x", null, "za"), + Row("a", 8, null, null, null, null, null, null, null, null, null, + "v", "z", null, "v", "z", "y", null, "va"))) } test("lag - Offset expression <offset> must be a literal") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org