Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/20023#discussion_r161716242 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala --- @@ -243,17 +279,43 @@ object DecimalPrecision extends TypeCoercionRule { // Promote integers inside a binary expression with fixed-precision decimals to decimals, // and fixed-precision decimals in an expression with floats / doubles to doubles case b @ BinaryOperator(left, right) if left.dataType != right.dataType => - (left.dataType, right.dataType) match { - case (t: IntegralType, DecimalType.Fixed(p, s)) => - b.makeCopy(Array(Cast(left, DecimalType.forType(t)), right)) - case (DecimalType.Fixed(p, s), t: IntegralType) => - b.makeCopy(Array(left, Cast(right, DecimalType.forType(t)))) - case (t, DecimalType.Fixed(p, s)) if isFloat(t) => - b.makeCopy(Array(left, Cast(right, DoubleType))) - case (DecimalType.Fixed(p, s), t) if isFloat(t) => - b.makeCopy(Array(Cast(left, DoubleType), right)) - case _ => - b - } + nondecimalLiteralAndDecimal(b).lift((left, right)).getOrElse( + nondecimalNonliteralAndDecimal(b).applyOrElse((left.dataType, right.dataType), + (_: (DataType, DataType)) => b)) } + + /** + * Type coercion for BinaryOperator in which one side is a non-decimal literal numeric, and the + * other side is a decimal. + */ + private def nondecimalLiteralAndDecimal( + b: BinaryOperator): PartialFunction[(Expression, Expression), Expression] = { + // Promote literal integers inside a binary expression with fixed-precision decimals to + // decimals. The precision and scale are the ones needed by the integer value. + case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] + && l.dataType.isInstanceOf[IntegralType] => + b.makeCopy(Array(Cast(l, DecimalType.forLiteral(l)), r)) --- End diff -- if we don't do this we have many test failure in spark-hive, because spark-hive do so. Moreover, requiring more precision is not OK, since it leads to a useless loss of precision. Think of this example: you multiply a column which is DECIMAL(38, 18) by `2`. If you don't do this, `2` is considered a DECIMAL(10, 0). According to the rules, the result should be DECIMAL(38 + 10 + 1, 18), which is out of range: then according to the rules it becomes DECIMAL(38, 7), leading to potentially loosing 11 digits of the fractional part. With this change, instead, the result would be DECIMAL(38 + 1 + 1, 18), which becomes DECIMAL(38, 16), safely having a much lower precision loss.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org