attilapiros commented on code in PR #53458:
URL: https://github.com/apache/spark/pull/53458#discussion_r2756312800
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala:
##########
@@ -930,4 +931,139 @@ class CastWithAnsiOffSuite extends CastSuiteBase {
checkEvaluation(cast(largeTime1, ShortType), null)
checkEvaluation(cast(largeTime1, ByteType), null)
}
+
+ test("LEGACY mode: cast invalid UTF-8 binary to string should return null") {
+ withSQLConf(
+ SQLConf.VALIDATE_BINARY_TO_STRING_CAST.key -> "true",
+ SQLConf.ANSI_ENABLED.key -> "false") {
+ // Create Cast expressions inside withSQLConf so they pick up the
correct config
+ // In LEGACY mode with validation enabled, invalid UTF-8 returns null
+ checkEvaluation(cast(invalidUtf8Literal, StringType), null)
+
+ // Valid UTF-8 should work
+ checkEvaluation(cast(validUtf8Literal, StringType),
UTF8String.fromString("Hello"))
+
+ // Empty binary should work
+ checkEvaluation(cast(emptyBinaryLiteral, StringType),
UTF8String.fromString(""))
+ }
+ }
+
+ test("LEGACY mode: cast invalid UTF-8 with validation disabled (old
behavior)") {
+ withSQLConf(SQLConf.VALIDATE_BINARY_TO_STRING_CAST.key -> "false") {
Review Comment:
ANSI_ENABLED is true by default!
So this actually tests the ANSI mode + VALIDATE_BINARY_TO_STRING_CAST=false.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]