This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new f70c3d4974b6 [SPARK-56916][SQL] Simplify ElementAt array codegen under 
ANSI mode
f70c3d4974b6 is described below

commit f70c3d4974b6f4f04c1eb4f650f81694e2c0a77e
Author: Gengliang Wang <[email protected]>
AuthorDate: Mon May 18 21:43:08 2026 -0700

    [SPARK-56916][SQL] Simplify ElementAt array codegen under ANSI mode
    
    ### What changes were proposed in this pull request?
    
    Introduce `ElementAtUtils.java` with a single helper 
`elementAtIndexExact(int length, int index, QueryContext context)` and use it 
from `ElementAt`'s `ArrayType` branch in both `doGenCode` and `doElementAt` 
(eval).
    
    The helper normalizes a 1-based `element_at` index against the array length 
and returns the 0-based position, throwing `invalidElementAtIndexError` for 
out-of-bound and `invalidIndexOfZeroError` for zero index. The caller still 
emits the type-specific `arr.get(pos, dataType)` (the return type depends on 
the array element type).
    
    The non-ANSI branch is left inline because it can choose between 
`defaultValueOutOfBound` (an `Option[Expression]` that requires codegen access) 
or `null`.
    
    ### Why are the changes needed?
    
    Part of SPARK-56908 (umbrella).
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    ```
    build/sbt "catalyst/testOnly *CollectionExpressionsSuite"
    ```
    
    59/59 pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Cursor 1.x
    
    Closes #55941 from gengliangwang/SPARK-56916-element-at.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
    (cherry picked from commit 003228ff4eacf7a060ef267684039fc269355a88)
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../sql/catalyst/expressions/ElementAtUtils.java   | 51 ++++++++++++++++
 .../expressions/collectionOperations.scala         | 69 ++++++++++++++--------
 2 files changed, 97 insertions(+), 23 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ElementAtUtils.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ElementAtUtils.java
new file mode 100644
index 000000000000..1aece7a91b26
--- /dev/null
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ElementAtUtils.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions;
+
+import org.apache.spark.QueryContext;
+import org.apache.spark.sql.errors.QueryExecutionErrors;
+
+/**
+ * Static helpers used by {@link ElementAt} on {@code ArrayType}
+ * (codegen and eval) under ANSI mode.
+ */
+public final class ElementAtUtils {
+
+  private ElementAtUtils() {}
+
+  /**
+   * Resolves the user-supplied 1-based {@code element_at} index to a
+   * 0-based array position. Throws when the absolute index exceeds the
+   * array length (ANSI out-of-bounds) or when {@code index} is zero
+   * (always invalid).
+   *
+   * @param length  the array length
+   * @param index   the 1-based index supplied by the user (positive or 
negative)
+   * @param context the query context attached to the error
+   * @return        the resolved 0-based position
+   */
+  public static int resolveArrayIndex(int length, int index, QueryContext 
context) {
+    if (length < Math.abs(index)) {
+      throw QueryExecutionErrors.invalidElementAtIndexError(index, length, 
context);
+    }
+    if (index == 0) {
+      throw QueryExecutionErrors.invalidIndexOfZeroError(context);
+    }
+    return index > 0 ? index - 1 : length + index;
+  }
+}
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 60966f3098ca..4f699de137c9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -2738,19 +2738,23 @@ case class ElementAt(
   override def nullSafeEval(value: Any, ordinal: Any): Any = 
doElementAt(value, ordinal)
 
   @transient private lazy val doElementAt: (Any, Any) => Any = left.dataType 
match {
+    // ArrayType is split into ANSI (failOnError) and non-ANSI branches.
+    // Order matters: the guarded case must come first.
+    case _: ArrayType if failOnError =>
+      (value, ordinal) => {
+        val array = value.asInstanceOf[ArrayData]
+        val idx = ElementAtUtils.resolveArrayIndex(
+          array.numElements(), ordinal.asInstanceOf[Int], getContextOrNull())
+        if (arrayElementNullable && array.isNullAt(idx)) null else 
array.get(idx, dataType)
+      }
     case _: ArrayType =>
       (value, ordinal) => {
         val array = value.asInstanceOf[ArrayData]
         val index = ordinal.asInstanceOf[Int]
         if (array.numElements() < math.abs(index)) {
-          if (failOnError) {
-            throw QueryExecutionErrors.invalidElementAtIndexError(
-              index, array.numElements(), getContextOrNull())
-          } else {
-            defaultValueOutOfBound match {
-              case Some(value) => value.eval()
-              case None => null
-            }
+          defaultValueOutOfBound match {
+            case Some(value) => value.eval()
+            case None => null
           }
         } else {
           val idx = if (index == 0) {
@@ -2773,6 +2777,31 @@ case class ElementAt(
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     left.dataType match {
+      // ArrayType is split into ANSI (failOnError) and non-ANSI branches.
+      // Order matters: the guarded case must come first.
+      case _: ArrayType if failOnError =>
+        nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+          val index = ctx.freshName("elementAtIndex")
+          val errorContext = getContextOrNullCode(ctx)
+          val utils = classOf[ElementAtUtils].getName
+          val assignment = s"${ev.value} = ${CodeGenerator.getValue(eval1, 
dataType, index)};"
+          val body = if (arrayElementNullable) {
+            s"""
+               |if ($eval1.isNullAt($index)) {
+               |  ${ev.isNull} = true;
+               |} else {
+               |  $assignment
+               |}
+             """.stripMargin
+          } else {
+            assignment
+          }
+          s"""
+             |int $index = $utils.resolveArrayIndex(
+             |  $eval1.numElements(), (int) $eval2, $errorContext);
+             |$body
+           """.stripMargin
+        })
       case _: ArrayType =>
         nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
           val index = ctx.freshName("elementAtIndex")
@@ -2786,21 +2815,15 @@ case class ElementAt(
             ""
           }
           val errorContext = getContextOrNullCode(ctx)
-          val indexOutOfBoundBranch = if (failOnError) {
-            // scalastyle:off line.size.limit
-            s"throw QueryExecutionErrors.invalidElementAtIndexError($index, 
$eval1.numElements(), $errorContext);"
-            // scalastyle:on line.size.limit
-          } else {
-            defaultValueOutOfBound match {
-              case Some(value) =>
-                val defaultValueEval = value.genCode(ctx)
-                s"""
-                  ${defaultValueEval.code}
-                  ${ev.isNull} = ${defaultValueEval.isNull};
-                  ${ev.value} = ${defaultValueEval.value};
-                """.stripMargin
-              case None => s"${ev.isNull} = true;"
-            }
+          val indexOutOfBoundBranch = defaultValueOutOfBound match {
+            case Some(value) =>
+              val defaultValueEval = value.genCode(ctx)
+              s"""
+                ${defaultValueEval.code}
+                ${ev.isNull} = ${defaultValueEval.isNull};
+                ${ev.value} = ${defaultValueEval.value};
+              """.stripMargin
+            case None => s"${ev.isNull} = true;"
           }
 
           s"""


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to