cloud-fan commented on code in PR #45383:
URL: https://github.com/apache/spark/pull/45383#discussion_r1530367223


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala:
##########
@@ -764,6 +773,184 @@ abstract class TypeCoercionBase {
     }
   }
 
+  object CollationTypeCasts extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      case e if !e.childrenResolved => e
+
+      case checkCastWithIndeterminate: Concat
+        if shouldCast(checkCastWithIndeterminate.children) =>
+        val newChildren =
+          collateToSingleType(checkCastWithIndeterminate.children, 
failOnIndeterminate = false)
+        checkCastWithIndeterminate.withNewChildren(newChildren)
+
+      case checkCastWithoutIndeterminate@(_: BinaryExpression | _: In | _: 
SortOrder)
+        if shouldCast(checkCastWithoutIndeterminate.children) =>
+        val newChildren = 
collateToSingleType(checkCastWithoutIndeterminate.children)
+        checkCastWithoutIndeterminate.withNewChildren(newChildren)
+
+      case checkIndeterminate@(_: BinaryExpression | _: In | _: SortOrder)
+        if hasIndeterminate(checkIndeterminate.children
+          .filter(e => hasStringType(e.dataType))
+          .map(e => extractStringType(e.dataType))) =>
+        throw QueryCompilationErrors.indeterminateCollationError()
+
+      case checkImplicitCastInputTypes: ImplicitCastInputTypes
+        if checkImplicitCastInputTypes.children.exists(e => 
hasStringType(e.dataType))
+          && checkImplicitCastInputTypes.inputTypes.nonEmpty =>
+        val collationId: Int =
+          getOutputCollation(checkImplicitCastInputTypes
+            .children.filter { e => hasStringType(e.dataType) })
+        val children: Seq[Expression] = checkImplicitCastInputTypes
+          .children.zip(checkImplicitCastInputTypes.inputTypes).map {
+            case (e, st) if hasStringType(st) =>
+              castStringType(e, collationId, Some(st)).getOrElse(e)
+            case (e, TypeCollection(types)) if types.exists(hasStringType) =>
+              types.flatMap{ dt =>
+                if (hasStringType(dt)) {
+                  castStringType(e, collationId, Some(dt))
+                } else {
+                  implicitCast(e, dt)
+                }
+              }.headOption.getOrElse(e)
+            case (in, expected) => implicitCast(in, expected).getOrElse(in)
+          }
+        checkImplicitCastInputTypes.withNewChildren(children)
+
+      case checkExpectsInputType: ExpectsInputTypes

Review Comment:
   Unfortunately, we do not have a trait to indicate the inputs of an 
expression should all be the same data type. We have various type coercion 
rules to do it for various expressions: `InConversion`, `ConcatCoercion`, 
`IfCoercion`, etc.
   
   This is probably not a big problem, as not many functions support more than 
one string input and we can add a new rule to merge string collations.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to