This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 9608416ef8 [GLUTEN-9291][VL] Fix Spark try_cast and cast function 
error case handling (#9292)
9608416ef8 is described below

commit 9608416ef88011e701d2e2ea22fa74a704c86f59
Author: Zhen Li <[email protected]>
AuthorDate: Tue Jun 24 17:00:14 2025 +0800

    [GLUTEN-9291][VL] Fix Spark try_cast and cast function error case handling 
(#9292)
    
    * Fix try_cast wrong round behavior
    
    * fix build
    
    * update velox branc
    
    * update branch
    
    * fix build
    
    * fix build
    
    ---------
    
    Co-authored-by: zhli <[email protected]>
---
 .../backendsapi/velox/VeloxTransformerApi.scala    |  2 +-
 .../functions/ScalarFunctionsValidateSuite.scala   | 66 ++++++++++++++++++++++
 cpp/velox/substrait/SubstraitToVeloxExpr.cc        |  8 +--
 cpp/velox/substrait/VeloxToSubstraitExpr.cc        |  2 +-
 ep/build-velox/src/get_velox.sh                    |  4 +-
 .../gluten/substrait/expression/CastNode.java      |  8 +--
 .../substrait/expression/ExpressionBuilder.java    |  4 +-
 .../expression/UnaryExpressionTransformer.scala    |  2 +-
 .../gluten/sql/shims/spark34/Spark34Shims.scala    |  1 +
 .../gluten/sql/shims/spark35/Spark35Shims.scala    |  1 +
 10 files changed, 82 insertions(+), 16 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
index bfc4a27b51..76a1a0e3a7 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
@@ -85,7 +85,7 @@ class VeloxTransformerApi extends TransformerApi with Logging 
{
       childNode
     } else {
       val typeNode = ConverterUtils.getTypeNode(dataType, nullable)
-      ExpressionBuilder.makeCast(typeNode, childNode, !nullOnOverflow)
+      ExpressionBuilder.makeCast(typeNode, childNode, nullOnOverflow)
     }
   }
 
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
index 58a17bc9d9..0ce16d9bde 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
@@ -1071,6 +1071,39 @@ abstract class ScalarFunctionsValidateSuite extends 
FunctionsValidateSuite {
           }
       }
     }
+    runQueryAndCompare("select try_cast(' 123 ' AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast('2147483648' AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast('12a34' AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast('2023-08-21 ' AS date)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast(' true' AS boolean)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast('null' AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast('on' AS BOOLEAN)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast(128 AS DECIMAL(2, 0))") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast(128 AS TINYINT)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast(9223372036854775807 AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select try_cast('123.0' AS INT)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
   }
 
   test("cast") {
@@ -1090,6 +1123,39 @@ abstract class ScalarFunctionsValidateSuite extends 
FunctionsValidateSuite {
           }
       }
     }
+    runQueryAndCompare("select cast(' 123 ' AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast('2147483648' AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast('12a34' AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast('2023-08-21 ' AS date)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast(' true' AS boolean)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast('null' AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast('on' AS BOOLEAN)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast(128 AS DECIMAL(2, 0))") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast(128 AS TINYINT)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast(9223372036854775807 AS int)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    runQueryAndCompare("select cast('123.0' AS INT)") {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
   }
 
   testWithMinSparkVersion("equal_null", "3.4") {
diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc 
b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
index eb75306ff6..798071eff3 100755
--- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
@@ -146,8 +146,8 @@ TypePtr getScalarType(const 
::substrait::Expression::Literal& literal) {
   }
 }
 
-/// Whether null will be returned on cast failure.
-bool isNullOnFailure(::substrait::Expression::Cast::FailureBehavior 
failureBehavior) {
+/// Whether is try cast.
+bool isTryCast(::substrait::Expression::Cast::FailureBehavior failureBehavior) 
{
   switch (failureBehavior) {
     case 
::substrait::Expression_Cast_FailureBehavior_FAILURE_BEHAVIOR_UNSPECIFIED:
     case 
::substrait::Expression_Cast_FailureBehavior_FAILURE_BEHAVIOR_THROW_EXCEPTION:
@@ -562,10 +562,8 @@ core::TypedExprPtr 
SubstraitVeloxExprConverter::toVeloxExpr(
     const ::substrait::Expression::Cast& castExpr,
     const RowTypePtr& inputType) {
   auto type = SubstraitParser::parseType(castExpr.type());
-  bool nullOnFailure = isNullOnFailure(castExpr.failure_behavior());
-
   std::vector<core::TypedExprPtr> inputs{toVeloxExpr(castExpr.input(), 
inputType)};
-  return std::make_shared<core::CastTypedExpr>(type, inputs, nullOnFailure);
+  return std::make_shared<core::CastTypedExpr>(type, inputs, 
isTryCast(castExpr.failure_behavior()));
 }
 
 core::TypedExprPtr SubstraitVeloxExprConverter::toVeloxExpr(
diff --git a/cpp/velox/substrait/VeloxToSubstraitExpr.cc 
b/cpp/velox/substrait/VeloxToSubstraitExpr.cc
index f73babfdf6..e77706edc7 100644
--- a/cpp/velox/substrait/VeloxToSubstraitExpr.cc
+++ b/cpp/velox/substrait/VeloxToSubstraitExpr.cc
@@ -380,7 +380,7 @@ const ::substrait::Expression_Cast& 
VeloxToSubstraitExprConvertor::toSubstraitEx
   for (auto& arg : castExprInputs) {
     substraitCastExpr->mutable_input()->MergeFrom(toSubstraitExpr(arena, arg, 
inputType));
   }
-  if (castExpr->nullOnFailure()) {
+  if (castExpr->isTryCast()) {
     
substraitCastExpr->set_failure_behavior(::substrait::Expression_Cast_FailureBehavior_FAILURE_BEHAVIOR_RETURN_NULL);
   } else {
     substraitCastExpr->set_failure_behavior(
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index a478e380c8..ae0ab2d80e 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,11 +17,11 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2025_06_23
+VELOX_BRANCH=2025_06_24
 VELOX_HOME=""
 RUN_SETUP_SCRIPT=ON
 VELOX_ENHANCED_REPO=https://github.com/oap-project/velox.git
-VELOX_ENHANCED_BRANCH=2025_06_10
+VELOX_ENHANCED_BRANCH=2025_06_24
 ENABLE_ENHANCED_FEATURES=OFF
 
 # Developer use only for testing Velox PR.
diff --git 
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
 
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
index 3d2def2bd1..1984c44d74 100644
--- 
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
+++ 
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
@@ -26,12 +26,12 @@ public class CastNode implements ExpressionNode, 
Serializable {
   private final TypeNode typeNode;
   private final ExpressionNode expressionNode;
 
-  public final boolean throwOnFailure;
+  public final boolean isTryCast;
 
-  CastNode(TypeNode typeNode, ExpressionNode expressionNode, boolean 
throwOnFailure) {
+  CastNode(TypeNode typeNode, ExpressionNode expressionNode, boolean 
isTryCast) {
     this.typeNode = typeNode;
     this.expressionNode = expressionNode;
-    this.throwOnFailure = throwOnFailure;
+    this.isTryCast = isTryCast;
   }
 
   @Override
@@ -39,7 +39,7 @@ public class CastNode implements ExpressionNode, Serializable 
{
     Expression.Cast.Builder castBuilder = Expression.Cast.newBuilder();
     castBuilder.setType(typeNode.toProtobuf());
     castBuilder.setInput(expressionNode.toProtobuf());
-    if (throwOnFailure) {
+    if (!isTryCast) {
       // Throw exception on failure.
       castBuilder.setFailureBehaviorValue(2);
     } else {
diff --git 
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
 
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
index 4c456e2142..4bdef37878 100644
--- 
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
+++ 
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
@@ -238,8 +238,8 @@ public class ExpressionBuilder {
   }
 
   public static CastNode makeCast(
-      TypeNode typeNode, ExpressionNode expressionNode, boolean 
throwOnFailure) {
-    return new CastNode(typeNode, expressionNode, throwOnFailure);
+      TypeNode typeNode, ExpressionNode expressionNode, boolean isTryCast) {
+    return new CastNode(typeNode, expressionNode, isTryCast);
   }
 
   public static StringMapNode makeStringMap(Map<String, String> values) {
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
index f309621cce..b762ec95b6 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
@@ -48,7 +48,7 @@ case class CastTransformer(substraitExprName: String, child: 
ExpressionTransform
     ExpressionBuilder.makeCast(
       typeNode,
       child.doTransform(context),
-      SparkShimLoader.getSparkShims.withAnsiEvalMode(original))
+      SparkShimLoader.getSparkShims.withTryEvalMode(original))
   }
 }
 
diff --git 
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
 
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index 508e256106..5e08764a04 100644
--- 
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++ 
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -547,6 +547,7 @@ class Spark34Shims extends SparkShims {
       case s: Subtract => s.evalMode == EvalMode.TRY
       case d: Divide => d.evalMode == EvalMode.TRY
       case m: Multiply => m.evalMode == EvalMode.TRY
+      case c: Cast => c.evalMode == EvalMode.TRY
       case _ => false
     }
   }
diff --git 
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
 
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 7ee55675ba..f953fd90fe 100644
--- 
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++ 
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -580,6 +580,7 @@ class Spark35Shims extends SparkShims {
       case s: Subtract => s.evalMode == EvalMode.TRY
       case d: Divide => d.evalMode == EvalMode.TRY
       case m: Multiply => m.evalMode == EvalMode.TRY
+      case c: Cast => c.evalMode == EvalMode.TRY
       case _ => false
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to