mihailom-db commented on code in PR #45383:
URL: https://github.com/apache/spark/pull/45383#discussion_r1530323337
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala:
##########
@@ -764,6 +773,184 @@ abstract class TypeCoercionBase {
}
}
+ object CollationTypeCasts extends TypeCoercionRule {
+ override val transform: PartialFunction[Expression, Expression] = {
+ case e if !e.childrenResolved => e
+
+ case checkCastWithIndeterminate: Concat
+ if shouldCast(checkCastWithIndeterminate.children) =>
+ val newChildren =
+ collateToSingleType(checkCastWithIndeterminate.children,
failOnIndeterminate = false)
+ checkCastWithIndeterminate.withNewChildren(newChildren)
+
+ case checkCastWithoutIndeterminate@(_: BinaryExpression | _: In | _:
SortOrder)
+ if shouldCast(checkCastWithoutIndeterminate.children) =>
+ val newChildren =
collateToSingleType(checkCastWithoutIndeterminate.children)
+ checkCastWithoutIndeterminate.withNewChildren(newChildren)
+
+ case checkIndeterminate@(_: BinaryExpression | _: In | _: SortOrder)
+ if hasIndeterminate(checkIndeterminate.children
+ .filter(e => hasStringType(e.dataType))
+ .map(e => extractStringType(e.dataType))) =>
+ throw QueryCompilationErrors.indeterminateCollationError()
+
+ case checkImplicitCastInputTypes: ImplicitCastInputTypes
+ if checkImplicitCastInputTypes.children.exists(e =>
hasStringType(e.dataType))
+ && checkImplicitCastInputTypes.inputTypes.nonEmpty =>
+ val collationId: Int =
+ getOutputCollation(checkImplicitCastInputTypes
+ .children.filter { e => hasStringType(e.dataType) })
+ val children: Seq[Expression] = checkImplicitCastInputTypes
+ .children.zip(checkImplicitCastInputTypes.inputTypes).map {
+ case (e, st) if hasStringType(st) =>
+ castStringType(e, collationId, Some(st)).getOrElse(e)
+ case (e, TypeCollection(types)) if types.exists(hasStringType) =>
+ types.flatMap{ dt =>
+ if (hasStringType(dt)) {
+ castStringType(e, collationId, Some(dt))
+ } else {
+ implicitCast(e, dt)
+ }
+ }.headOption.getOrElse(e)
+ case (in, expected) => implicitCast(in, expected).getOrElse(in)
+ }
+ checkImplicitCastInputTypes.withNewChildren(children)
+
+ case checkExpectsInputType: ExpectsInputTypes
Review Comment:
Also, I was a bit sceptical about changing acceptsType in StringType. If I
understood correctly this function should give us what types can be used for
this expression without implicitCasting. Unfortunately, even after Uros'es PR
https://github.com/apache/spark/pull/45422 we will have this problem. We do not
want to get a function with two binary collations like UNICODE and UTF8_BINARY,
and still say that it is fine to call a function. We specifically need one
collation. One workaround would be to just include collationid in the check of
all the special cases. This also would change the hierarchy of classes, as
additional objects that Uros added would need to extend StringType.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]