xkrogen commented on a change in pull request #35332:
URL: https://github.com/apache/spark/pull/35332#discussion_r792961446
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
##########
@@ -177,4 +177,18 @@ class CanonicalizeSuite extends SparkFunSuite {
assert(expr.semanticEquals(attr))
assert(attr.semanticEquals(expr))
}
+
+ test("SPARK-38030: Canonicalize Cast should remove nullability of target
dataType") {
+ val structType = StructType(Seq(StructField("name", StringType, nullable =
false)))
+ val attr = AttributeReference("col", structType)()
+ for (cast <- Seq(
+ Cast(attr, structType),
+ AnsiCast(attr, structType),
+ TryCast(attr, structType))) {
+ assert(cast.resolved)
+ // canonicalization should not converted resolved cast to unresolved
+ assert(cast.canonicalized.resolved)
Review comment:
should we be asserting on `cast.canonicalized.output` as well? this is
how the original issue was detected, right?
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
##########
@@ -177,4 +177,18 @@ class CanonicalizeSuite extends SparkFunSuite {
assert(expr.semanticEquals(attr))
assert(attr.semanticEquals(expr))
}
+
+ test("SPARK-38030: Canonicalize Cast should remove nullability of target
dataType") {
+ val structType = StructType(Seq(StructField("name", StringType, nullable =
false)))
+ val attr = AttributeReference("col", structType)()
+ for (cast <- Seq(
+ Cast(attr, structType),
+ AnsiCast(attr, structType),
+ TryCast(attr, structType))) {
+ assert(cast.resolved)
+ // canonicalization should not converted resolved cast to unresolved
+ assert(cast.canonicalized.resolved)
+ assert(cast.canonicalized.dataType == structType.asNullable)
Review comment:
Use triple-equals here
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
##########
@@ -310,13 +310,16 @@ abstract class CastBase extends UnaryExpression with
TimeZoneAwareExpression wit
protected def ansiEnabled: Boolean
+ protected def withDataType(dataType: DataType): CastBase
+
Review comment:
Looks like the implementation is the same between all subclasses, why
can't we implement it here?
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
##########
@@ -177,4 +177,18 @@ class CanonicalizeSuite extends SparkFunSuite {
assert(expr.semanticEquals(attr))
assert(attr.semanticEquals(expr))
}
+
+ test("SPARK-38030: Canonicalize Cast should remove nullability of target
dataType") {
+ val structType = StructType(Seq(StructField("name", StringType, nullable =
false)))
+ val attr = AttributeReference("col", structType)()
+ for (cast <- Seq(
+ Cast(attr, structType),
+ AnsiCast(attr, structType),
+ TryCast(attr, structType))) {
Review comment:
```suggestion
Seq(Cast.apply _, AnsiCast.apply _, TryCast.apply _)
.map(_.apply(attr, structType, None))
.foreach { cast =>
```
Not sure if this is better, but it does deduplicate the args. A bit messy
either way.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]