This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 9608416ef8 [GLUTEN-9291][VL] Fix Spark try_cast and cast function
error case handling (#9292)
9608416ef8 is described below
commit 9608416ef88011e701d2e2ea22fa74a704c86f59
Author: Zhen Li <[email protected]>
AuthorDate: Tue Jun 24 17:00:14 2025 +0800
[GLUTEN-9291][VL] Fix Spark try_cast and cast function error case handling
(#9292)
* Fix try_cast wrong round behavior
* fix build
* update velox branc
* update branch
* fix build
* fix build
---------
Co-authored-by: zhli <[email protected]>
---
.../backendsapi/velox/VeloxTransformerApi.scala | 2 +-
.../functions/ScalarFunctionsValidateSuite.scala | 66 ++++++++++++++++++++++
cpp/velox/substrait/SubstraitToVeloxExpr.cc | 8 +--
cpp/velox/substrait/VeloxToSubstraitExpr.cc | 2 +-
ep/build-velox/src/get_velox.sh | 4 +-
.../gluten/substrait/expression/CastNode.java | 8 +--
.../substrait/expression/ExpressionBuilder.java | 4 +-
.../expression/UnaryExpressionTransformer.scala | 2 +-
.../gluten/sql/shims/spark34/Spark34Shims.scala | 1 +
.../gluten/sql/shims/spark35/Spark35Shims.scala | 1 +
10 files changed, 82 insertions(+), 16 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
index bfc4a27b51..76a1a0e3a7 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
@@ -85,7 +85,7 @@ class VeloxTransformerApi extends TransformerApi with Logging
{
childNode
} else {
val typeNode = ConverterUtils.getTypeNode(dataType, nullable)
- ExpressionBuilder.makeCast(typeNode, childNode, !nullOnOverflow)
+ ExpressionBuilder.makeCast(typeNode, childNode, nullOnOverflow)
}
}
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
index 58a17bc9d9..0ce16d9bde 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
@@ -1071,6 +1071,39 @@ abstract class ScalarFunctionsValidateSuite extends
FunctionsValidateSuite {
}
}
}
+ runQueryAndCompare("select try_cast(' 123 ' AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast('2147483648' AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast('12a34' AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast('2023-08-21 ' AS date)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast(' true' AS boolean)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast('null' AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast('on' AS BOOLEAN)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast(128 AS DECIMAL(2, 0))") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast(128 AS TINYINT)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast(9223372036854775807 AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select try_cast('123.0' AS INT)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
}
test("cast") {
@@ -1090,6 +1123,39 @@ abstract class ScalarFunctionsValidateSuite extends
FunctionsValidateSuite {
}
}
}
+ runQueryAndCompare("select cast(' 123 ' AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast('2147483648' AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast('12a34' AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast('2023-08-21 ' AS date)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast(' true' AS boolean)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast('null' AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast('on' AS BOOLEAN)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast(128 AS DECIMAL(2, 0))") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast(128 AS TINYINT)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast(9223372036854775807 AS int)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ runQueryAndCompare("select cast('123.0' AS INT)") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
}
testWithMinSparkVersion("equal_null", "3.4") {
diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
index eb75306ff6..798071eff3 100755
--- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
@@ -146,8 +146,8 @@ TypePtr getScalarType(const
::substrait::Expression::Literal& literal) {
}
}
-/// Whether null will be returned on cast failure.
-bool isNullOnFailure(::substrait::Expression::Cast::FailureBehavior
failureBehavior) {
+/// Whether is try cast.
+bool isTryCast(::substrait::Expression::Cast::FailureBehavior failureBehavior)
{
switch (failureBehavior) {
case
::substrait::Expression_Cast_FailureBehavior_FAILURE_BEHAVIOR_UNSPECIFIED:
case
::substrait::Expression_Cast_FailureBehavior_FAILURE_BEHAVIOR_THROW_EXCEPTION:
@@ -562,10 +562,8 @@ core::TypedExprPtr
SubstraitVeloxExprConverter::toVeloxExpr(
const ::substrait::Expression::Cast& castExpr,
const RowTypePtr& inputType) {
auto type = SubstraitParser::parseType(castExpr.type());
- bool nullOnFailure = isNullOnFailure(castExpr.failure_behavior());
-
std::vector<core::TypedExprPtr> inputs{toVeloxExpr(castExpr.input(),
inputType)};
- return std::make_shared<core::CastTypedExpr>(type, inputs, nullOnFailure);
+ return std::make_shared<core::CastTypedExpr>(type, inputs,
isTryCast(castExpr.failure_behavior()));
}
core::TypedExprPtr SubstraitVeloxExprConverter::toVeloxExpr(
diff --git a/cpp/velox/substrait/VeloxToSubstraitExpr.cc
b/cpp/velox/substrait/VeloxToSubstraitExpr.cc
index f73babfdf6..e77706edc7 100644
--- a/cpp/velox/substrait/VeloxToSubstraitExpr.cc
+++ b/cpp/velox/substrait/VeloxToSubstraitExpr.cc
@@ -380,7 +380,7 @@ const ::substrait::Expression_Cast&
VeloxToSubstraitExprConvertor::toSubstraitEx
for (auto& arg : castExprInputs) {
substraitCastExpr->mutable_input()->MergeFrom(toSubstraitExpr(arena, arg,
inputType));
}
- if (castExpr->nullOnFailure()) {
+ if (castExpr->isTryCast()) {
substraitCastExpr->set_failure_behavior(::substrait::Expression_Cast_FailureBehavior_FAILURE_BEHAVIOR_RETURN_NULL);
} else {
substraitCastExpr->set_failure_behavior(
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index a478e380c8..ae0ab2d80e 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,11 +17,11 @@
set -exu
VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2025_06_23
+VELOX_BRANCH=2025_06_24
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
VELOX_ENHANCED_REPO=https://github.com/oap-project/velox.git
-VELOX_ENHANCED_BRANCH=2025_06_10
+VELOX_ENHANCED_BRANCH=2025_06_24
ENABLE_ENHANCED_FEATURES=OFF
# Developer use only for testing Velox PR.
diff --git
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
index 3d2def2bd1..1984c44d74 100644
---
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
+++
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
@@ -26,12 +26,12 @@ public class CastNode implements ExpressionNode,
Serializable {
private final TypeNode typeNode;
private final ExpressionNode expressionNode;
- public final boolean throwOnFailure;
+ public final boolean isTryCast;
- CastNode(TypeNode typeNode, ExpressionNode expressionNode, boolean
throwOnFailure) {
+ CastNode(TypeNode typeNode, ExpressionNode expressionNode, boolean
isTryCast) {
this.typeNode = typeNode;
this.expressionNode = expressionNode;
- this.throwOnFailure = throwOnFailure;
+ this.isTryCast = isTryCast;
}
@Override
@@ -39,7 +39,7 @@ public class CastNode implements ExpressionNode, Serializable
{
Expression.Cast.Builder castBuilder = Expression.Cast.newBuilder();
castBuilder.setType(typeNode.toProtobuf());
castBuilder.setInput(expressionNode.toProtobuf());
- if (throwOnFailure) {
+ if (!isTryCast) {
// Throw exception on failure.
castBuilder.setFailureBehaviorValue(2);
} else {
diff --git
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
index 4c456e2142..4bdef37878 100644
---
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
+++
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
@@ -238,8 +238,8 @@ public class ExpressionBuilder {
}
public static CastNode makeCast(
- TypeNode typeNode, ExpressionNode expressionNode, boolean
throwOnFailure) {
- return new CastNode(typeNode, expressionNode, throwOnFailure);
+ TypeNode typeNode, ExpressionNode expressionNode, boolean isTryCast) {
+ return new CastNode(typeNode, expressionNode, isTryCast);
}
public static StringMapNode makeStringMap(Map<String, String> values) {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
index f309621cce..b762ec95b6 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
@@ -48,7 +48,7 @@ case class CastTransformer(substraitExprName: String, child:
ExpressionTransform
ExpressionBuilder.makeCast(
typeNode,
child.doTransform(context),
- SparkShimLoader.getSparkShims.withAnsiEvalMode(original))
+ SparkShimLoader.getSparkShims.withTryEvalMode(original))
}
}
diff --git
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index 508e256106..5e08764a04 100644
---
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -547,6 +547,7 @@ class Spark34Shims extends SparkShims {
case s: Subtract => s.evalMode == EvalMode.TRY
case d: Divide => d.evalMode == EvalMode.TRY
case m: Multiply => m.evalMode == EvalMode.TRY
+ case c: Cast => c.evalMode == EvalMode.TRY
case _ => false
}
}
diff --git
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 7ee55675ba..f953fd90fe 100644
---
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -580,6 +580,7 @@ class Spark35Shims extends SparkShims {
case s: Subtract => s.evalMode == EvalMode.TRY
case d: Divide => d.evalMode == EvalMode.TRY
case m: Multiply => m.evalMode == EvalMode.TRY
+ case c: Cast => c.evalMode == EvalMode.TRY
case _ => false
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]