This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 521c2e0ad64d [SPARK-45430] Fix for FramelessOffsetWindowFunction when 
IGNORE NULLS and offset > rowCount
521c2e0ad64d is described below

commit 521c2e0ad64da95a31d0ecc71105f613771c5616
Author: Vitalii Li <vitalii...@databricks.com>
AuthorDate: Tue Oct 24 15:13:00 2023 +0800

    [SPARK-45430] Fix for FramelessOffsetWindowFunction when IGNORE NULLS and 
offset > rowCount
    
    ### What changes were proposed in this pull request?
    
    This is a fix for the failure when function that utilized 
`FramelessOffsetWindowFunctionFrame` is used with `ignoreNulls = true` and 
`offset > rowCount`.
    
    e.g.
    
    ```
    select x, lead(x, 5) IGNORE NULLS over (order by x) from (select 
explode(sequence(1, 3)) x)
    ```
    
    ### Why are the changes needed?
    
    Fix existing bug
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Modify existing unit test to cover this case
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #43236 from vitaliili-db/SPARK-45430.
    
    Authored-by: Vitalii Li <vitalii...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit 32e1e58411913517c87d7e75942437f4e1c1d40e)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../sql/execution/window/WindowFunctionFrame.scala |  6 +++-
 .../spark/sql/DataFrameWindowFunctionsSuite.scala  | 40 ++++++++++++----------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index 2b7f702a7f20..a849c3894f0d 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -201,7 +201,11 @@ class FrameLessOffsetWindowFunctionFrame(
   override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
     resetStates(rows)
     if (ignoreNulls) {
-      findNextRowWithNonNullInput()
+      if (Math.abs(offset) > rows.length) {
+        fillDefaultValue(EmptyRow)
+      } else {
+        findNextRowWithNonNullInput()
+      }
     } else {
       // drain the first few rows if offset is larger than zero
       while (inputIndex < offset) {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 3f8e2588fbc2..433b47419796 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -818,6 +818,8 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         lead($"value", 1, null, true).over(window),
         lead($"value", 2, null, true).over(window),
         lead($"value", 3, null, true).over(window),
+        // offset > rowCount: SPARK-45430
+        lead($"value", 100, null, true).over(window),
         lead(concat($"value", $"key"), 1, null, true).over(window),
         lag($"value", 1).over(window),
         lag($"value", 2).over(window),
@@ -825,27 +827,29 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         lag($"value", 1, null, true).over(window),
         lag($"value", 2, null, true).over(window),
         lag($"value", 3, null, true).over(window),
+        // abs(offset) > rowCount: SPARK-45430
+        lag($"value", -100, null, true).over(window),
         lag(concat($"value", $"key"), 1, null, true).over(window))
         .orderBy($"order"),
       Seq(
-        Row("a", 0, null, "x", null, null, "x", "y", "z", "xa",
-          null, null, null, null, null, null, null),
-        Row("a", 1, "x", null, null, "x", "y", "z", "v", "ya",
-          null, null, "x", null, null, null, null),
-        Row("b", 2, null, null, "y", null, "y", "z", "v", "ya",
-          "x", null, null, "x", null, null, "xa"),
-        Row("c", 3, null, "y", null, null, "y", "z", "v", "ya",
-          null, "x", null, "x", null, null, "xa"),
-        Row("a", 4, "y", null, "z", "y", "z", "v", null, "za",
-          null, null, "y", "x", null, null, "xa"),
-        Row("b", 5, null, "z", "v", null, "z", "v", null, "za",
-          "y", null, null, "y", "x", null, "ya"),
-        Row("a", 6, "z", "v", null, "z", "v", null, null, "va",
-          null, "y", "z", "y", "x", null, "ya"),
-        Row("a", 7, "v", null, null, "v", null, null, null, null,
-          "z", null, "v", "z", "y", "x", "za"),
-        Row("a", 8, null, null, null, null, null, null, null, null,
-          "v", "z", null, "v", "z", "y", "va")))
+        Row("a", 0, null, "x", null, null, "x", "y", "z", null, "xa",
+          null, null, null, null, null, null, null, null),
+        Row("a", 1, "x", null, null, "x", "y", "z", "v", null, "ya",
+          null, null, "x", null, null, null, null, null),
+        Row("b", 2, null, null, "y", null, "y", "z", "v", null, "ya",
+          "x", null, null, "x", null, null, null, "xa"),
+        Row("c", 3, null, "y", null, null, "y", "z", "v", null, "ya",
+          null, "x", null, "x", null, null, null, "xa"),
+        Row("a", 4, "y", null, "z", "y", "z", "v", null, null, "za",
+          null, null, "y", "x", null, null, null, "xa"),
+        Row("b", 5, null, "z", "v", null, "z", "v", null, null, "za",
+          "y", null, null, "y", "x", null, null, "ya"),
+        Row("a", 6, "z", "v", null, "z", "v", null, null, null, "va",
+          null, "y", "z", "y", "x", null, null, "ya"),
+        Row("a", 7, "v", null, null, "v", null, null, null, null, null,
+          "z", null, "v", "z", "y", "x", null, "za"),
+        Row("a", 8, null, null, null, null, null, null, null, null, null,
+          "v", "z", null, "v", "z", "y", null, "va")))
   }
 
   test("lag - Offset expression <offset> must be a literal") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to