dbatomic commented on code in PR #45383:
URL: https://github.com/apache/spark/pull/45383#discussion_r1519730371
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala:
##########
@@ -138,21 +140,31 @@ object AnsiTypeCoercion extends TypeCoercionBase {
@scala.annotation.tailrec
private def findWiderTypeForString(dt1: DataType, dt2: DataType):
Option[DataType] = {
(dt1, dt2) match {
- case (StringType, _: IntegralType) => Some(LongType)
- case (StringType, _: FractionalType) => Some(DoubleType)
- case (StringType, NullType) => Some(StringType)
+ case (_: StringType, _: IntegralType) => Some(LongType)
+ case (_: StringType, _: FractionalType) => Some(DoubleType)
+ case (st: StringType, NullType) => Some(st)
// If a binary operation contains interval type and string, we can't
decide which
// interval type the string should be promoted as. There are many
possible interval
// types, such as year interval, month interval, day interval, hour
interval, etc.
- case (StringType, _: AnsiIntervalType) => None
- case (StringType, a: AtomicType) => Some(a)
- case (other, StringType) if other != StringType =>
findWiderTypeForString(StringType, other)
+ case (_: StringType, _: AnsiIntervalType) => None
+ case (_: StringType, a: AtomicType) => Some(a)
+ case (other, st: StringType) if !other.isInstanceOf[StringType] =>
+ findWiderTypeForString(st, other)
case _ => None
}
}
- override def findWiderCommonType(types: Seq[DataType]): Option[DataType] = {
- types.foldLeft[Option[DataType]](Some(NullType))((r, c) =>
+ override def findWiderCommonType(exprs: Seq[Expression],
+ failOnIndeterminate: Boolean = false):
Option[DataType] = {
+ (if (exprs.map(_.dataType).partition(hasStringType)._1.distinct.size > 1) {
Review Comment:
I see. Still, I think partition + select first in tuple is pretty weird way
to filter?
Shouldn't `exprs.map(_.dataType).filter(hasStringType).distinct.size ` be
more performant and readable?
All of that + comment that you shouldn't be using `distinct` against
`StringType` for checking collation equivalence since `StringType` may in
future hold other properties.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]