This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 634cda09a95c [SPARK-56912][SQL] Simplify Cast to boolean codegen under 
ANSI mode
634cda09a95c is described below

commit 634cda09a95c96920645aeabda90276a88713eaf
Author: Gengliang Wang <[email protected]>
AuthorDate: Fri May 29 15:17:12 2026 -0700

    [SPARK-56912][SQL] Simplify Cast to boolean codegen under ANSI mode
    
    ### What changes were proposed in this pull request?
    
    Extend `UTF8StringUtils.scala` with `toBooleanExact(UTF8String, 
QueryContext)` and use it from `Cast.scala` for the ANSI `String -> Boolean` 
cast path (both eval and codegen), alongside the peer string ANSI helpers 
(`toByteExact` / `toShortExact` / `toIntExact` / `toLongExact`). The non-ANSI 
path keeps the inline `if/else if/else evNull = true` form because it has no 
error to throw.
    
    ### Why are the changes needed?
    
    Part of SPARK-56908 (umbrella). The ANSI String->Boolean cast emits an 
8-line `if (isTrueString) … else if (isFalseString) … else throw` block in 
codegen. This PR collapses it to a one-line 
`UTF8StringUtils.toBooleanExact(...)` call.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    ```
    build/sbt "catalyst/testOnly *CastSuite *CastWithAnsiOnSuite 
*CastWithAnsiOffSuite *AnsiCastSuite *TryCastSuite"
    ```
    
    307/307 pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Cursor 1.x
    
    Closes #55937 from gengliangwang/SPARK-56912-cast-boolean.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../apache/spark/sql/catalyst/expressions/Cast.scala | 20 ++++++++------------
 .../spark/sql/catalyst/util/UTF8StringUtils.scala    |  8 +++++++-
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 66501ebe7d5c..f190f8ca5055 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -695,6 +695,8 @@ case class Cast(
 
   // UDFToBoolean
   private[this] def castToBoolean(from: DataType): Any => Any = from match {
+    case _: StringType if ansiEnabled =>
+      buildCast[UTF8String](_, s => UTF8StringUtils.toBooleanExact(s, 
getContextOrNull()))
     case _: StringType =>
       buildCast[UTF8String](_, s => {
         if (StringUtils.isTrueString(s)) {
@@ -702,11 +704,7 @@ case class Cast(
         } else if (StringUtils.isFalseString(s)) {
           false
         } else {
-          if (ansiEnabled) {
-            throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s, 
getContextOrNull())
-          } else {
-            null
-          }
+          null
         }
       })
     case TimestampType =>
@@ -1881,22 +1879,20 @@ case class Cast(
   private[this] def castToBooleanCode(
       from: DataType,
       ctx: CodegenContext): CastFunction = from match {
+    case _: StringType if ansiEnabled =>
+      val stringUtils = 
UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$")
+      val errorContext = getContextOrNullCode(ctx)
+      (c, evPrim, _) => code"$evPrim = $stringUtils.toBooleanExact($c, 
$errorContext);"
     case _: StringType =>
       val stringUtils = 
inline"${StringUtils.getClass.getName.stripSuffix("$")}"
       (c, evPrim, evNull) =>
-        val castFailureCode = if (ansiEnabled) {
-          val errorContext = getContextOrNullCode(ctx)
-          s"throw QueryExecutionErrors.invalidInputSyntaxForBooleanError($c, 
$errorContext);"
-        } else {
-          s"$evNull = true;"
-        }
         code"""
           if ($stringUtils.isTrueString($c)) {
             $evPrim = true;
           } else if ($stringUtils.isFalseString($c)) {
             $evPrim = false;
           } else {
-            $castFailureCode
+            $evNull = true;
           }
         """
     case TimestampType =>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
index 1c3a5075dab2..5f9aa4695f50 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.types.{ByteType, DataType, 
IntegerType, LongType, Sh
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
- * Helper functions for casting string to numeric values.
+ * Helper functions for casting string to primitive values under ANSI mode.
  */
 object UTF8StringUtils {
 
@@ -39,6 +39,12 @@ object UTF8StringUtils {
   def toByteExact(s: UTF8String, context: QueryContext): Byte =
     withException(s.toByteExact, context, ByteType, s)
 
+  def toBooleanExact(s: UTF8String, context: QueryContext): Boolean = {
+    if (StringUtils.isTrueString(s)) true
+    else if (StringUtils.isFalseString(s)) false
+    else throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s, 
context)
+  }
+
   private def withException[A](
       f: => A,
       context: QueryContext,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to