Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/19389#discussion_r150528361
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
---
@@ -468,14 +460,16 @@ object PartitioningUtils {
}
/**
- * Given a collection of [[Literal]]s, resolves possible type conflicts
by up-casting "lower"
- * types.
+ * Given a collection of [[Literal]]s, resolves possible type conflicts
by
+ * [[TypeCoercion.findWiderCommonType]]. See
[[TypeCoercion.findWiderTypeForTwo]].
*/
private def resolveTypeConflicts(literals: Seq[Literal], timeZone:
TimeZone): Seq[Literal] = {
- val desiredType = {
- val topType =
literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_))
--- End diff --
So, for the types:
```scala
Seq(
NullType, IntegerType, LongType, DoubleType,
*DecimalType(...), DateType, TimestampType, StringType)
```
I produced a chart as below by this codes:
```scala
test("Print out chart") {
val supportedTypes: Seq[DataType] = Seq(
NullType, IntegerType, LongType, DoubleType,
DecimalType(38, 0), DateType, TimestampType, StringType)
val combinations = for {
t1 <- supportedTypes
t2 <- supportedTypes
} yield Seq(t1,t2)
// Old type conflict resolution:
val upCastingOrder: Seq[DataType] =
Seq(NullType, IntegerType, LongType, FloatType, DoubleType, StringType)
def oldResolveTypeConflicts(dataTypes: Seq[DataType]): DataType = {
val topType = dataTypes.maxBy(upCastingOrder.indexOf(_))
if (topType == NullType) StringType else topType
}
// New type conflict resolution:
def newResolveTypeConflicts(dataTypes: Seq[DataType]): DataType = {
TypeCoercion.findWiderCommonType(dataTypes) match {
case Some(NullType) => StringType
case Some(dt: DataType) => dt
case _ => StringType
}
}
println("|Input types|Old output type|New output type|")
println("|-----------|---------------|---------------|")
combinations.foreach { pair =>
val oldType = oldResolveTypeConflicts(pair)
val newType = newResolveTypeConflicts(pair)
if (oldType != newType) {
println(s"|[`${pair(0)}`, `${pair(1)}`]|`$oldType`|`$newType`|")
}
}
}
```
So, looks this PR makes the changes in type resolution as below:
|Input types|Old output type|New output type|
|-----------|---------------|---------------|
|[`NullType`, `DecimalType(38,0)`]|`StringType`|`DecimalType(38,0)`|
|[`NullType`, `DateType`]|`StringType`|`DateType`|
|[`NullType`, `TimestampType`]|`StringType`|`TimestampType`|
|[`IntegerType`, `DecimalType(38,0)`]|`IntegerType`|`DecimalType(38,0)`|
|[`IntegerType`, `DateType`]|`IntegerType`|`StringType`|
|[`IntegerType`, `TimestampType`]|`IntegerType`|`StringType`|
|[`LongType`, `DecimalType(38,0)`]|`LongType`|`DecimalType(38,0)`|
|[`LongType`, `DateType`]|`LongType`|`StringType`|
|[`LongType`, `TimestampType`]|`LongType`|`StringType`|
|[`DoubleType`, `DateType`]|`DoubleType`|`StringType`|
|[`DoubleType`, `TimestampType`]|`DoubleType`|`StringType`|
|[`DecimalType(38,0)`, `NullType`]|`StringType`|`DecimalType(38,0)`|
|[`DecimalType(38,0)`, `IntegerType`]|`IntegerType`|`DecimalType(38,0)`|
|[`DecimalType(38,0)`, `LongType`]|`LongType`|`DecimalType(38,0)`|
|[`DecimalType(38,0)`, `DateType`]|`DecimalType(38,0)`|`StringType`|
|[`DecimalType(38,0)`, `TimestampType`]|`DecimalType(38,0)`|`StringType`|
|[`DateType`, `NullType`]|`StringType`|`DateType`|
|[`DateType`, `IntegerType`]|`IntegerType`|`StringType`|
|[`DateType`, `LongType`]|`LongType`|`StringType`|
|[`DateType`, `DoubleType`]|`DoubleType`|`StringType`|
|[`DateType`, `DecimalType(38,0)`]|`DateType`|`StringType`|
|[`DateType`, `TimestampType`]|`DateType`|`TimestampType`|
|[`TimestampType`, `NullType`]|`StringType`|`TimestampType`|
|[`TimestampType`, `IntegerType`]|`IntegerType`|`StringType`|
|[`TimestampType`, `LongType`]|`LongType`|`StringType`|
|[`TimestampType`, `DoubleType`]|`DoubleType`|`StringType`|
|[`TimestampType`, `DecimalType(38,0)`]|`TimestampType`|`StringType`|
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]