This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0fbd34c45b58 [SPARK-50377][SQL] Allow to evaluate foldable 
RuntimeReplaceable
0fbd34c45b58 is described below

commit 0fbd34c45b5818400f1993314acc571690daaf9a
Author: Wenchen Fan <[email protected]>
AuthorDate: Fri Nov 22 13:20:37 2024 +0800

    [SPARK-50377][SQL] Allow to evaluate foldable RuntimeReplaceable
    
    ### What changes were proposed in this pull request?
    
    This is to fix a regression caused by 
https://github.com/apache/spark/pull/47143 . The problem is, in some places, we 
want to get a constant from a foldable expression before the query execution 
starts. https://github.com/apache/spark/pull/47143 brings two problems:
    1. `UnaryPositive` is no longer a `UnaryExpression`, which means it's not 
foldable anymore even if its child is foldable.
    2. `UnaryPositive` is no longer evaluable.
    
    `Lag` is such a place. It may evaluate the `inputOffset` parameter eagerly. 
`lag(..., +1)` no longer works after https://github.com/apache/spark/pull/47143 
.
    
    Instead of fixing `Lag`, this PR makes two changes and hopefully we can 
avoid all similar problems:
    1. Make `UnaryPositive` extend `UnaryExpression` again. We need follow-up 
PRs to check other `RuntimeReplaceable` expressions and see if they should 
extend `UnaryExpression` or `BinaryExpression`, etc.
    2. Implement `RuntimeReplaceable#eval` so that we can evaluate folding 
`RuntimeReplaceable` eagerly when needed.
    
    ### Why are the changes needed?
    
    Fix the regression on `lag` and avoid similar issues in the future.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, the regression is not released yet.
    
    ### How was this patch tested?
    
    new test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    no
    
    Closes #48912 from cloud-fan/replace.
    
    Lead-authored-by: Wenchen Fan <[email protected]>
    Co-authored-by: Wenchen Fan <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../sql/catalyst/expressions/Expression.scala      |  8 ++++++--
 .../sql/catalyst/expressions/arithmetic.scala      | 10 +++-------
 .../sql-tests/analyzer-results/window.sql.out      |  7 ++++---
 .../src/test/resources/sql-tests/inputs/window.sql |  1 +
 .../resources/sql-tests/results/window.sql.out     | 23 +++++++++++-----------
 5 files changed, 26 insertions(+), 23 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index f0f94f088138..c45479985282 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -440,8 +440,12 @@ trait RuntimeReplaceable extends Expression {
   // are semantically equal.
   override lazy val canonicalized: Expression = replacement.canonicalized
 
-  final override def eval(input: InternalRow = null): Any =
-    throw QueryExecutionErrors.cannotEvaluateExpressionError(this)
+  final override def eval(input: InternalRow = null): Any = {
+    // For convenience, we allow to evaluate `RuntimeReplaceable` expressions, 
in case we need to
+    // get a constant from foldable expression before the query execution 
starts.
+    assert(input == null)
+    replacement.eval()
+  }
   final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode =
     throw QueryExecutionErrors.cannotGenerateCodeForExpressionError(this)
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 015240472cf5..f9e8b6a17896 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -115,8 +115,7 @@ case class UnaryMinus(
   since = "1.5.0",
   group = "math_funcs")
 case class UnaryPositive(child: Expression)
-  extends RuntimeReplaceable with ImplicitCastInputTypes {
-  override def nullIntolerant: Boolean = true
+  extends UnaryExpression with RuntimeReplaceable with ImplicitCastInputTypes {
 
   override def prettyName: String = "positive"
 
@@ -128,11 +127,8 @@ case class UnaryPositive(child: Expression)
 
   override lazy val replacement: Expression = child
 
-  override protected def withNewChildrenInternal(
-      newChildren: IndexedSeq[Expression]): UnaryPositive =
-    copy(newChildren.head)
-
-  override def children: Seq[Expression] = child :: Nil
+  override protected def withNewChildInternal(newChild: Expression): 
Expression =
+    copy(child = newChild)
 }
 
 /**
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
index db49b1bfd39d..a1a2b7600637 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
@@ -995,6 +995,7 @@ SELECT
     lag(v, 1) IGNORE NULLS OVER w lag_1,
     lag(v, 2) IGNORE NULLS OVER w lag_2,
     lag(v, 3) IGNORE NULLS OVER w lag_3,
+    lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
     nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
@@ -1007,9 +1008,9 @@ WINDOW w AS (ORDER BY id)
 ORDER BY id
 -- !query analysis
 Sort [id#x ASC NULLS FIRST], true
-+- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, 
lag_0#x, lag_1#x, lag_2#x, lag_3#x, nth_value_1#x, nth_value_2#x, 
nth_value_3#x, first_value#x, any_value#x, last_value#x]
-   +- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, 
lag_0#x, lag_1#x, lag_2#x, lag_3#x, nth_value_1#x, nth_value_2#x, 
nth_value_3#x, first_value#x, any_value#x, last_value#x, lead_0#x, lead_1#x, 
lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, ... 7 more fields]
-      +- Window [lead(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, 
specifiedwindowframe(RowFrame, 0, 0)) AS lead_0#x, lead(v#x, 1, null) 
windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 1, 
1)) AS lead_1#x, lead(v#x, 2, null) windowspecdefinition(id#x ASC NULLS FIRST, 
specifiedwindowframe(RowFrame, 2, 2)) AS lead_2#x, lead(v#x, 3, null) 
windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 3, 
3)) AS lead_3#x, lag(v#x, 0, null) windo [...]
++- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, 
lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, 
nth_value_3#x, first_value#x, any_value#x, last_value#x]
+   +- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, 
lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, 
nth_value_3#x, first_value#x, any_value#x, last_value#x, lead_0#x, lead_1#x, 
lead_2#x, lead_3#x, lag_0#x, lag_1#x, ... 9 more fields]
+      +- Window [lead(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, 
specifiedwindowframe(RowFrame, 0, 0)) AS lead_0#x, lead(v#x, 1, null) 
windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 1, 
1)) AS lead_1#x, lead(v#x, 2, null) windowspecdefinition(id#x ASC NULLS FIRST, 
specifiedwindowframe(RowFrame, 2, 2)) AS lead_2#x, lead(v#x, 3, null) 
windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 3, 
3)) AS lead_3#x, lag(v#x, 0, null) windo [...]
          +- Project [content#x, id#x, v#x]
             +- SubqueryAlias test_ignore_null
                +- View (`test_ignore_null`, [content#x, id#x, v#x])
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql 
b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index f3cbf6ef1ccb..bec79247f9a6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -327,6 +327,7 @@ SELECT
     lag(v, 1) IGNORE NULLS OVER w lag_1,
     lag(v, 2) IGNORE NULLS OVER w lag_2,
     lag(v, 3) IGNORE NULLS OVER w lag_3,
+    lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
     nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out 
b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index 87381b64638b..ce88fb57f8aa 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1060,6 +1060,7 @@ SELECT
     lag(v, 1) IGNORE NULLS OVER w lag_1,
     lag(v, 2) IGNORE NULLS OVER w lag_2,
     lag(v, 3) IGNORE NULLS OVER w lag_3,
+    lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
     nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
@@ -1071,17 +1072,17 @@ FROM
 WINDOW w AS (ORDER BY id)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
--- !query output
-a      0       NULL    NULL    x       y       z       NULL    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    NULL    NULL
-a      1       x       x       y       z       v       x       NULL    NULL    
NULL    x       NULL    NULL    x       x       x
-b      2       NULL    NULL    y       z       v       NULL    x       NULL    
NULL    x       NULL    NULL    x       x       x
-c      3       NULL    NULL    y       z       v       NULL    x       NULL    
NULL    x       NULL    NULL    x       x       x
-a      4       y       y       z       v       NULL    y       x       NULL    
NULL    x       y       NULL    x       x       y
-b      5       NULL    NULL    z       v       NULL    NULL    y       x       
NULL    x       y       NULL    x       x       y
-a      6       z       z       v       NULL    NULL    z       y       x       
NULL    x       y       z       x       x       z
-a      7       v       v       NULL    NULL    NULL    v       z       y       
x       x       y       z       x       x       v
-a      8       NULL    NULL    NULL    NULL    NULL    NULL    v       z       
y       x       y       z       x       x       v
+struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,lag_plus_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
+-- !query output
+a      0       NULL    NULL    x       y       z       NULL    NULL    NULL    
NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
+a      1       x       x       y       z       v       x       NULL    NULL    
NULL    NULL    x       NULL    NULL    x       x       x
+b      2       NULL    NULL    y       z       v       NULL    x       NULL    
NULL    NULL    x       NULL    NULL    x       x       x
+c      3       NULL    NULL    y       z       v       NULL    x       NULL    
NULL    NULL    x       NULL    NULL    x       x       x
+a      4       y       y       z       v       NULL    y       x       NULL    
NULL    NULL    x       y       NULL    x       x       y
+b      5       NULL    NULL    z       v       NULL    NULL    y       x       
NULL    NULL    x       y       NULL    x       x       y
+a      6       z       z       v       NULL    NULL    z       y       x       
NULL    NULL    x       y       z       x       x       z
+a      7       v       v       NULL    NULL    NULL    v       z       y       
x       x       x       y       z       x       x       v
+a      8       NULL    NULL    NULL    NULL    NULL    NULL    v       z       
y       y       x       y       z       x       x       v
 
 
 -- !query


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to