xkrogen commented on a change in pull request #35332:
URL: https://github.com/apache/spark/pull/35332#discussion_r792961446



##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
##########
@@ -177,4 +177,18 @@ class CanonicalizeSuite extends SparkFunSuite {
     assert(expr.semanticEquals(attr))
     assert(attr.semanticEquals(expr))
   }
+
+  test("SPARK-38030: Canonicalize Cast should remove nullability of target 
dataType") {
+    val structType = StructType(Seq(StructField("name", StringType, nullable = 
false)))
+    val attr = AttributeReference("col", structType)()
+    for (cast <- Seq(
+        Cast(attr, structType),
+        AnsiCast(attr, structType),
+        TryCast(attr, structType))) {
+      assert(cast.resolved)
+      // canonicalization should not converted resolved cast to unresolved
+      assert(cast.canonicalized.resolved)

Review comment:
       should we be asserting on `cast.canonicalized.output` as well? this is 
how the original issue was detected, right?

##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
##########
@@ -177,4 +177,18 @@ class CanonicalizeSuite extends SparkFunSuite {
     assert(expr.semanticEquals(attr))
     assert(attr.semanticEquals(expr))
   }
+
+  test("SPARK-38030: Canonicalize Cast should remove nullability of target 
dataType") {
+    val structType = StructType(Seq(StructField("name", StringType, nullable = 
false)))
+    val attr = AttributeReference("col", structType)()
+    for (cast <- Seq(
+        Cast(attr, structType),
+        AnsiCast(attr, structType),
+        TryCast(attr, structType))) {
+      assert(cast.resolved)
+      // canonicalization should not converted resolved cast to unresolved
+      assert(cast.canonicalized.resolved)
+      assert(cast.canonicalized.dataType == structType.asNullable)

Review comment:
       Use triple-equals here

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
##########
@@ -310,13 +310,16 @@ abstract class CastBase extends UnaryExpression with 
TimeZoneAwareExpression wit
 
   protected def ansiEnabled: Boolean
 
+  protected def withDataType(dataType: DataType): CastBase
+

Review comment:
       Looks like the implementation is the same between all subclasses, why 
can't we implement it here?

##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
##########
@@ -177,4 +177,18 @@ class CanonicalizeSuite extends SparkFunSuite {
     assert(expr.semanticEquals(attr))
     assert(attr.semanticEquals(expr))
   }
+
+  test("SPARK-38030: Canonicalize Cast should remove nullability of target 
dataType") {
+    val structType = StructType(Seq(StructField("name", StringType, nullable = 
false)))
+    val attr = AttributeReference("col", structType)()
+    for (cast <- Seq(
+        Cast(attr, structType),
+        AnsiCast(attr, structType),
+        TryCast(attr, structType))) {

Review comment:
       ```suggestion
       Seq(Cast.apply _, AnsiCast.apply _, TryCast.apply _)
           .map(_.apply(attr, structType, None))
           .foreach { cast =>
   ```
   Not sure if this is better, but it does deduplicate the args. A bit messy 
either way.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to