gengliangwang commented on code in PR #37486:
URL: https://github.com/apache/spark/pull/37486#discussion_r944181199
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala:
##########
@@ -460,29 +500,57 @@ case class Cast(
final override def nodePatternsInternal(): Seq[TreePattern] = Seq(CAST)
- private def typeCheckFailureMessage: String = if (ansiEnabled) {
- if (getTagValue(Cast.BY_TABLE_INSERTION).isDefined) {
- Cast.typeCheckFailureMessage(child.dataType, dataType,
- Some(SQLConf.STORE_ASSIGNMENT_POLICY.key ->
SQLConf.StoreAssignmentPolicy.LEGACY.toString))
- } else {
- Cast.typeCheckFailureMessage(child.dataType, dataType,
- Some(SQLConf.ANSI_ENABLED.key -> "false"))
- }
- } else {
- s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}"
+ def ansiEnabled: Boolean = {
+ evalMode == EvalMode.ANSI || evalMode == EvalMode.TRY
+ }
+
+ // Whether this expression is used for `try_cast()`.
+ def isTryCast: Boolean = {
+ evalMode == EvalMode.TRY
+ }
+
+ private def typeCheckFailureMessage: String = evalMode match {
+ case EvalMode.ANSI =>
+ if (getTagValue(Cast.BY_TABLE_INSERTION).isDefined) {
+ Cast.typeCheckFailureMessage(child.dataType, dataType,
+ Some(SQLConf.STORE_ASSIGNMENT_POLICY.key ->
+ SQLConf.StoreAssignmentPolicy.LEGACY.toString))
+ } else {
+ Cast.typeCheckFailureMessage(child.dataType, dataType,
+ Some(SQLConf.ANSI_ENABLED.key -> "false"))
+ }
+ case EvalMode.TRY =>
+ Cast.typeCheckFailureMessage(child.dataType, dataType, None)
+ case _ =>
+ s"cannot cast ${child.dataType.catalogString} to
${dataType.catalogString}"
}
override def checkInputDataTypes(): TypeCheckResult = {
- if (ansiEnabled && Cast.canAnsiCast(child.dataType, dataType)) {
- TypeCheckResult.TypeCheckSuccess
- } else if (!ansiEnabled && Cast.canCast(child.dataType, dataType)) {
+ val canCast = evalMode match {
+ case EvalMode.LEGACY => Cast.canCast(child.dataType, dataType)
+ case EvalMode.ANSI => Cast.canAnsiCast(child.dataType, dataType)
+ case EvalMode.TRY => Cast.canTryCast(child.dataType, dataType)
+ case other => throw new IllegalArgumentException(s"Unknown EvalMode
value: $other")
+ }
+ if (canCast) {
TypeCheckResult.TypeCheckSuccess
} else {
TypeCheckResult.TypeCheckFailure(typeCheckFailureMessage)
}
}
- override def nullable: Boolean = child.nullable ||
Cast.forceNullable(child.dataType, dataType)
+ override def nullable: Boolean = if (!isTryCast) {
+ child.nullable || Cast.forceNullable(child.dataType, dataType)
+ } else {
+ (child.dataType, dataType) match {
+ case (StringType, BinaryType) => child.nullable
+ // TODO: Implement a more accurate method for checking whether a decimal
value can be cast
+ // as integral types without overflow. Currently, the cast can
overflow even if
+ // "Cast.canUpCast" method returns true.
+ case (_: DecimalType, _: IntegralType) => true
Review Comment:
Decimal(10, 0) is considered as "canUpCast" as Int. However, `Int.MaxValue +
1` can fit in Decimal(10, 0) too..
It has been a long time and the strict table insertion policy relies on
this.
I would consider fixing it in another PR. For this one I will leave it as it
is now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]