stefankandic commented on code in PR #49103:
URL: https://github.com/apache/spark/pull/49103#discussion_r1899531278
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala:
##########
@@ -461,6 +421,66 @@ object CollationTypeCoercion {
else right
}
}
+
+ /**
+ * Throws an analysis exception if the new data type has indeterminate
collation,
+ * and the expression is not allowed to have inputs with indeterminate
collations.
+ */
+ private def checkIndeterminateCollation(expression: Expression, newDataType:
DataType): Unit = {
+ if (shouldFailWithIndeterminateCollation(expression, newDataType)) {
+ expression.failAnalysis(
+ errorClass = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ messageParameters = Map("expr" -> toSQLExpr(expression)))
+ }
+ }
+
+ /**
+ * Returns whether the given expression which isn't allowed to have inputs
with indeterminate
+ * collations has indeterminate collation.
+ */
+ private def shouldFailWithIndeterminateCollation(expression: Expression):
Boolean = {
+ def getDataTypeSafe(e: Expression): DataType = try {
+ e.dataType
+ } catch {
+ case _: Throwable => NullType
+ }
+
+ expression.children.exists(child => expression.resolved &&
+ shouldFailWithIndeterminateCollation(expression, getDataTypeSafe(child)))
+ }
+
+ /**
+ * Returns whether the given expression should fail with indeterminate
collation if it is cast
+ * to the given data type.
+ */
+ private def shouldFailWithIndeterminateCollation(
+ expression: Expression,
+ dataType: DataType): Boolean = {
+ !canContainIndeterminateCollation(expression) &&
hasIndeterminateCollation(dataType)
+ }
+
+ /**
+ * Returns whether the given data type has indeterminate collation.
+ */
+ private def hasIndeterminateCollation(dataType: DataType): Boolean = {
+ dataType.existsRecursively {
+ case IndeterminateStringType | StringTypeWithContext(_, Indeterminate)
=> true
+ case _ => false
+ }
+ }
+
+ /**
+ * Returns whether the given expression can contain indeterminate collation.
+ */
+ private def canContainIndeterminateCollation(expr: Expression): Boolean =
expr match {
+ // This is not an exhaustive list, and it's fine to miss some expressions.
The only difference
Review Comment:
The comment kind of explains that already, so I am not really sure how you
propose to modify it?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]