Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/20062#discussion_r158949891
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
---
@@ -225,17 +224,17 @@ case class FilterEstimation(plan: Filter) extends
Logging {
def evaluateNullCheck(
attr: Attribute,
isNull: Boolean,
- update: Boolean): Option[BigDecimal] = {
+ update: Boolean): Option[Double] = {
if (!colStatsMap.contains(attr)) {
logDebug("[CBO] No statistics for " + attr)
return None
}
val colStat = colStatsMap(attr)
val rowCountValue = childStats.rowCount.get
- val nullPercent: BigDecimal = if (rowCountValue == 0) {
+ val nullPercent: Double = if (rowCountValue == 0) {
0
} else {
- BigDecimal(colStat.nullCount) / BigDecimal(rowCountValue)
+ (BigDecimal(colStat.nullCount) / BigDecimal(rowCountValue)).toDouble
--- End diff --
what's the difference between this and `colStat.nullCount.toDouble /
rowCountValue`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]