dejankrak-db commented on code in PR #49103:
URL: https://github.com/apache/spark/pull/49103#discussion_r1899564426
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala:
##########
@@ -461,6 +421,66 @@ object CollationTypeCoercion {
else right
}
}
+
+ /**
+ * Throws an analysis exception if the new data type has indeterminate
collation,
+ * and the expression is not allowed to have inputs with indeterminate
collations.
+ */
+ private def checkIndeterminateCollation(expression: Expression, newDataType:
DataType): Unit = {
+ if (shouldFailWithIndeterminateCollation(expression, newDataType)) {
+ expression.failAnalysis(
+ errorClass = "INDETERMINATE_COLLATION_IN_EXPRESSION",
+ messageParameters = Map("expr" -> toSQLExpr(expression)))
+ }
+ }
+
+ /**
+ * Returns whether the given expression which isn't allowed to have inputs
with indeterminate
+ * collations has indeterminate collation.
+ */
+ private def shouldFailWithIndeterminateCollation(expression: Expression):
Boolean = {
+ def getDataTypeSafe(e: Expression): DataType = try {
+ e.dataType
+ } catch {
+ case _: Throwable => NullType
+ }
+
+ expression.children.exists(child => expression.resolved &&
+ shouldFailWithIndeterminateCollation(expression, getDataTypeSafe(child)))
+ }
+
+ /**
+ * Returns whether the given expression should fail with indeterminate
collation if it is cast
+ * to the given data type.
+ */
+ private def shouldFailWithIndeterminateCollation(
+ expression: Expression,
+ dataType: DataType): Boolean = {
+ !canContainIndeterminateCollation(expression) &&
hasIndeterminateCollation(dataType)
+ }
+
+ /**
+ * Returns whether the given data type has indeterminate collation.
+ */
+ private def hasIndeterminateCollation(dataType: DataType): Boolean = {
+ dataType.existsRecursively {
+ case IndeterminateStringType | StringTypeWithContext(_, Indeterminate)
=> true
+ case _ => false
+ }
+ }
+
+ /**
+ * Returns whether the given expression can contain indeterminate collation.
+ */
+ private def canContainIndeterminateCollation(expr: Expression): Boolean =
expr match {
+ // This is not an exhaustive list, and it's fine to miss some expressions.
The only difference
Review Comment:
Well, I wouldn't use the wording that 'it's fine to miss some expressions',
as I assume that we want to encourage engineer adding a new expression that
cannot contain indeterminate collation to add it to the list below. An
additional explanation can be provided that otherwise, if the expression is not
added to the list but cannot contain indeterminate collation, it will still
fail at runtime, as already pointed out.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]