This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 1bb41437320c [SPARK-45430] Fix for FramelessOffsetWindowFunction when
IGNORE NULLS and offset > rowCount
1bb41437320c is described below
commit 1bb41437320cc9689e8ed987a0924247f171bb88
Author: Vitalii Li <[email protected]>
AuthorDate: Tue Oct 24 15:13:00 2023 +0800
[SPARK-45430] Fix for FramelessOffsetWindowFunction when IGNORE NULLS and
offset > rowCount
### What changes were proposed in this pull request?
This is a fix for the failure when function that utilized
`FramelessOffsetWindowFunctionFrame` is used with `ignoreNulls = true` and
`offset > rowCount`.
e.g.
```
select x, lead(x, 5) IGNORE NULLS over (order by x) from (select
explode(sequence(1, 3)) x)
```
### Why are the changes needed?
Fix existing bug
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Modify existing unit test to cover this case
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #43236 from vitaliili-db/SPARK-45430.
Authored-by: Vitalii Li <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 32e1e58411913517c87d7e75942437f4e1c1d40e)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/execution/window/WindowFunctionFrame.scala | 6 +++-
.../spark/sql/DataFrameWindowFunctionsSuite.scala | 40 ++++++++++++----------
2 files changed, 27 insertions(+), 19 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index 2b7f702a7f20..a849c3894f0d 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -201,7 +201,11 @@ class FrameLessOffsetWindowFunctionFrame(
override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
resetStates(rows)
if (ignoreNulls) {
- findNextRowWithNonNullInput()
+ if (Math.abs(offset) > rows.length) {
+ fillDefaultValue(EmptyRow)
+ } else {
+ findNextRowWithNonNullInput()
+ }
} else {
// drain the first few rows if offset is larger than zero
while (inputIndex < offset) {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 097188186e71..1adbd3c3e6a5 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -814,6 +814,8 @@ class DataFrameWindowFunctionsSuite extends QueryTest
lead($"value", 1, null, true).over(window),
lead($"value", 2, null, true).over(window),
lead($"value", 3, null, true).over(window),
+ // offset > rowCount: SPARK-45430
+ lead($"value", 100, null, true).over(window),
lead(concat($"value", $"key"), 1, null, true).over(window),
lag($"value", 1).over(window),
lag($"value", 2).over(window),
@@ -821,27 +823,29 @@ class DataFrameWindowFunctionsSuite extends QueryTest
lag($"value", 1, null, true).over(window),
lag($"value", 2, null, true).over(window),
lag($"value", 3, null, true).over(window),
+ // abs(offset) > rowCount: SPARK-45430
+ lag($"value", -100, null, true).over(window),
lag(concat($"value", $"key"), 1, null, true).over(window))
.orderBy($"order"),
Seq(
- Row("a", 0, null, "x", null, null, "x", "y", "z", "xa",
- null, null, null, null, null, null, null),
- Row("a", 1, "x", null, null, "x", "y", "z", "v", "ya",
- null, null, "x", null, null, null, null),
- Row("b", 2, null, null, "y", null, "y", "z", "v", "ya",
- "x", null, null, "x", null, null, "xa"),
- Row("c", 3, null, "y", null, null, "y", "z", "v", "ya",
- null, "x", null, "x", null, null, "xa"),
- Row("a", 4, "y", null, "z", "y", "z", "v", null, "za",
- null, null, "y", "x", null, null, "xa"),
- Row("b", 5, null, "z", "v", null, "z", "v", null, "za",
- "y", null, null, "y", "x", null, "ya"),
- Row("a", 6, "z", "v", null, "z", "v", null, null, "va",
- null, "y", "z", "y", "x", null, "ya"),
- Row("a", 7, "v", null, null, "v", null, null, null, null,
- "z", null, "v", "z", "y", "x", "za"),
- Row("a", 8, null, null, null, null, null, null, null, null,
- "v", "z", null, "v", "z", "y", "va")))
+ Row("a", 0, null, "x", null, null, "x", "y", "z", null, "xa",
+ null, null, null, null, null, null, null, null),
+ Row("a", 1, "x", null, null, "x", "y", "z", "v", null, "ya",
+ null, null, "x", null, null, null, null, null),
+ Row("b", 2, null, null, "y", null, "y", "z", "v", null, "ya",
+ "x", null, null, "x", null, null, null, "xa"),
+ Row("c", 3, null, "y", null, null, "y", "z", "v", null, "ya",
+ null, "x", null, "x", null, null, null, "xa"),
+ Row("a", 4, "y", null, "z", "y", "z", "v", null, null, "za",
+ null, null, "y", "x", null, null, null, "xa"),
+ Row("b", 5, null, "z", "v", null, "z", "v", null, null, "za",
+ "y", null, null, "y", "x", null, null, "ya"),
+ Row("a", 6, "z", "v", null, "z", "v", null, null, null, "va",
+ null, "y", "z", "y", "x", null, null, "ya"),
+ Row("a", 7, "v", null, null, "v", null, null, null, null, null,
+ "z", null, "v", "z", "y", "x", null, "za"),
+ Row("a", 8, null, null, null, null, null, null, null, null, null,
+ "v", "z", null, "v", "z", "y", null, "va")))
}
test("SPARK-12989 ExtractWindowExpressions treats alias as regular
attribute") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]