Repository: spark Updated Branches: refs/heads/master 9ecc40d3a -> 3ce81494c
[SPARK-1947] [SQL] Child of SumDistinct or Average should be widened to prevent overflows the same as Sum. Child of `SumDistinct` or `Average` should be widened to prevent overflows the same as `Sum`. Author: Takuya UESHIN <[email protected]> Closes #902 from ueshin/issues/SPARK-1947 and squashes the following commits: 99c3dcb [Takuya UESHIN] Insert Cast for SumDistinct and Average. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ce81494 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ce81494 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ce81494 Branch: refs/heads/master Commit: 3ce81494c512bc97979a743ea77ef913315f7fb6 Parents: 9ecc40d Author: Takuya UESHIN <[email protected]> Authored: Sat May 31 11:30:03 2014 -0700 Committer: Michael Armbrust <[email protected]> Committed: Sat May 31 11:30:03 2014 -0700 ---------------------------------------------------------------------- .../sql/catalyst/analysis/HiveTypeCoercion.scala | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3ce81494/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 4557d77..326feea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -264,10 +264,22 @@ trait HiveTypeCoercion { // Skip nodes who's children have not been resolved yet. case e if !e.childrenResolved => e - // Promote SUM to largest types to prevent overflows. + // Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows. case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest. case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType)) case Sum(e @ FractionalType()) if e.dataType != DoubleType => Sum(Cast(e, DoubleType)) + + case s @ SumDistinct(e @ DecimalType()) => s // Decimal is already the biggest. + case SumDistinct(e @ IntegralType()) if e.dataType != LongType => + SumDistinct(Cast(e, LongType)) + case SumDistinct(e @ FractionalType()) if e.dataType != DoubleType => + SumDistinct(Cast(e, DoubleType)) + + case s @ Average(e @ DecimalType()) => s // Decimal is already the biggest. + case Average(e @ IntegralType()) if e.dataType != LongType => + Average(Cast(e, LongType)) + case Average(e @ FractionalType()) if e.dataType != DoubleType => + Average(Cast(e, DoubleType)) } } }
