Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19389#discussion_r150528361
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
 ---
    @@ -468,14 +460,16 @@ object PartitioningUtils {
       }
     
       /**
    -   * Given a collection of [[Literal]]s, resolves possible type conflicts 
by up-casting "lower"
    -   * types.
    +   * Given a collection of [[Literal]]s, resolves possible type conflicts 
by
    +   * [[TypeCoercion.findWiderCommonType]]. See 
[[TypeCoercion.findWiderTypeForTwo]].
        */
       private def resolveTypeConflicts(literals: Seq[Literal], timeZone: 
TimeZone): Seq[Literal] = {
    -    val desiredType = {
    -      val topType = 
literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_))
    --- End diff --
    
    So, for the types:
    
    ```scala
    Seq(
        NullType, IntegerType, LongType, DoubleType,
        *DecimalType(...), DateType, TimestampType, StringType)
    ```
    
    
    I produced a chart as below by this codes:
    
    ```scala
    test("Print out chart") {
      val supportedTypes: Seq[DataType] = Seq(
        NullType, IntegerType, LongType, DoubleType,
        DecimalType(38, 0), DateType, TimestampType, StringType)
    
      val combinations = for {
        t1 <- supportedTypes
        t2 <- supportedTypes
      } yield Seq(t1,t2)
    
      // Old type conflict resolution:
      val upCastingOrder: Seq[DataType] =
        Seq(NullType, IntegerType, LongType, FloatType, DoubleType, StringType)
    
      def oldResolveTypeConflicts(dataTypes: Seq[DataType]): DataType = {
        val topType = dataTypes.maxBy(upCastingOrder.indexOf(_))
        if (topType == NullType) StringType else topType
      }
    
      // New type conflict resolution:
      def newResolveTypeConflicts(dataTypes: Seq[DataType]): DataType = {
        TypeCoercion.findWiderCommonType(dataTypes) match {
          case Some(NullType) => StringType
          case Some(dt: DataType) => dt
          case _ => StringType
        }
      }
    
      println("|Input types|Old output type|New output type|")
      println("|-----------|---------------|---------------|")
      combinations.foreach { pair =>
        val oldType = oldResolveTypeConflicts(pair)
        val newType = newResolveTypeConflicts(pair)
    
        if (oldType != newType) {
          println(s"|[`${pair(0)}`, `${pair(1)}`]|`$oldType`|`$newType`|")
        }
      }
    }
    ```
    
    So, looks this PR makes the changes in type resolution as below:
    
    |Input types|Old output type|New output type|
    |-----------|---------------|---------------|
    |[`NullType`, `DecimalType(38,0)`]|`StringType`|`DecimalType(38,0)`|
    |[`NullType`, `DateType`]|`StringType`|`DateType`|
    |[`NullType`, `TimestampType`]|`StringType`|`TimestampType`|
    |[`IntegerType`, `DecimalType(38,0)`]|`IntegerType`|`DecimalType(38,0)`|
    |[`IntegerType`, `DateType`]|`IntegerType`|`StringType`|
    |[`IntegerType`, `TimestampType`]|`IntegerType`|`StringType`|
    |[`LongType`, `DecimalType(38,0)`]|`LongType`|`DecimalType(38,0)`|
    |[`LongType`, `DateType`]|`LongType`|`StringType`|
    |[`LongType`, `TimestampType`]|`LongType`|`StringType`|
    |[`DoubleType`, `DateType`]|`DoubleType`|`StringType`|
    |[`DoubleType`, `TimestampType`]|`DoubleType`|`StringType`|
    |[`DecimalType(38,0)`, `NullType`]|`StringType`|`DecimalType(38,0)`|
    |[`DecimalType(38,0)`, `IntegerType`]|`IntegerType`|`DecimalType(38,0)`|
    |[`DecimalType(38,0)`, `LongType`]|`LongType`|`DecimalType(38,0)`|
    |[`DecimalType(38,0)`, `DateType`]|`DecimalType(38,0)`|`StringType`|
    |[`DecimalType(38,0)`, `TimestampType`]|`DecimalType(38,0)`|`StringType`|
    |[`DateType`, `NullType`]|`StringType`|`DateType`|
    |[`DateType`, `IntegerType`]|`IntegerType`|`StringType`|
    |[`DateType`, `LongType`]|`LongType`|`StringType`|
    |[`DateType`, `DoubleType`]|`DoubleType`|`StringType`|
    |[`DateType`, `DecimalType(38,0)`]|`DateType`|`StringType`|
    |[`DateType`, `TimestampType`]|`DateType`|`TimestampType`|
    |[`TimestampType`, `NullType`]|`StringType`|`TimestampType`|
    |[`TimestampType`, `IntegerType`]|`IntegerType`|`StringType`|
    |[`TimestampType`, `LongType`]|`LongType`|`StringType`|
    |[`TimestampType`, `DoubleType`]|`DoubleType`|`StringType`|
    |[`TimestampType`, `DecimalType(38,0)`]|`TimestampType`|`StringType`|


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to