This is an automated email from the ASF dual-hosted git repository.
gengliangwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 634cda09a95c [SPARK-56912][SQL] Simplify Cast to boolean codegen under
ANSI mode
634cda09a95c is described below
commit 634cda09a95c96920645aeabda90276a88713eaf
Author: Gengliang Wang <[email protected]>
AuthorDate: Fri May 29 15:17:12 2026 -0700
[SPARK-56912][SQL] Simplify Cast to boolean codegen under ANSI mode
### What changes were proposed in this pull request?
Extend `UTF8StringUtils.scala` with `toBooleanExact(UTF8String,
QueryContext)` and use it from `Cast.scala` for the ANSI `String -> Boolean`
cast path (both eval and codegen), alongside the peer string ANSI helpers
(`toByteExact` / `toShortExact` / `toIntExact` / `toLongExact`). The non-ANSI
path keeps the inline `if/else if/else evNull = true` form because it has no
error to throw.
### Why are the changes needed?
Part of SPARK-56908 (umbrella). The ANSI String->Boolean cast emits an
8-line `if (isTrueString) … else if (isFalseString) … else throw` block in
codegen. This PR collapses it to a one-line
`UTF8StringUtils.toBooleanExact(...)` call.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
```
build/sbt "catalyst/testOnly *CastSuite *CastWithAnsiOnSuite
*CastWithAnsiOffSuite *AnsiCastSuite *TryCastSuite"
```
307/307 pass.
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Cursor 1.x
Closes #55937 from gengliangwang/SPARK-56912-cast-boolean.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
---
.../apache/spark/sql/catalyst/expressions/Cast.scala | 20 ++++++++------------
.../spark/sql/catalyst/util/UTF8StringUtils.scala | 8 +++++++-
2 files changed, 15 insertions(+), 13 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 66501ebe7d5c..f190f8ca5055 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -695,6 +695,8 @@ case class Cast(
// UDFToBoolean
private[this] def castToBoolean(from: DataType): Any => Any = from match {
+ case _: StringType if ansiEnabled =>
+ buildCast[UTF8String](_, s => UTF8StringUtils.toBooleanExact(s,
getContextOrNull()))
case _: StringType =>
buildCast[UTF8String](_, s => {
if (StringUtils.isTrueString(s)) {
@@ -702,11 +704,7 @@ case class Cast(
} else if (StringUtils.isFalseString(s)) {
false
} else {
- if (ansiEnabled) {
- throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s,
getContextOrNull())
- } else {
- null
- }
+ null
}
})
case TimestampType =>
@@ -1881,22 +1879,20 @@ case class Cast(
private[this] def castToBooleanCode(
from: DataType,
ctx: CodegenContext): CastFunction = from match {
+ case _: StringType if ansiEnabled =>
+ val stringUtils =
UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$")
+ val errorContext = getContextOrNullCode(ctx)
+ (c, evPrim, _) => code"$evPrim = $stringUtils.toBooleanExact($c,
$errorContext);"
case _: StringType =>
val stringUtils =
inline"${StringUtils.getClass.getName.stripSuffix("$")}"
(c, evPrim, evNull) =>
- val castFailureCode = if (ansiEnabled) {
- val errorContext = getContextOrNullCode(ctx)
- s"throw QueryExecutionErrors.invalidInputSyntaxForBooleanError($c,
$errorContext);"
- } else {
- s"$evNull = true;"
- }
code"""
if ($stringUtils.isTrueString($c)) {
$evPrim = true;
} else if ($stringUtils.isFalseString($c)) {
$evPrim = false;
} else {
- $castFailureCode
+ $evNull = true;
}
"""
case TimestampType =>
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
index 1c3a5075dab2..5f9aa4695f50 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.types.{ByteType, DataType,
IntegerType, LongType, Sh
import org.apache.spark.unsafe.types.UTF8String
/**
- * Helper functions for casting string to numeric values.
+ * Helper functions for casting string to primitive values under ANSI mode.
*/
object UTF8StringUtils {
@@ -39,6 +39,12 @@ object UTF8StringUtils {
def toByteExact(s: UTF8String, context: QueryContext): Byte =
withException(s.toByteExact, context, ByteType, s)
+ def toBooleanExact(s: UTF8String, context: QueryContext): Boolean = {
+ if (StringUtils.isTrueString(s)) true
+ else if (StringUtils.isFalseString(s)) false
+ else throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s,
context)
+ }
+
private def withException[A](
f: => A,
context: QueryContext,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]