Github user wzhfy commented on a diff in the pull request:
https://github.com/apache/spark/pull/19594#discussion_r157696227
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
---
@@ -225,6 +236,43 @@ case class JoinEstimation(join: Join) extends Logging {
(ceil(card), newStats)
}
+ /** Compute join cardinality using equi-height histograms. */
+ private def computeByEquiHeightHistogram(
+ leftKey: AttributeReference,
+ rightKey: AttributeReference,
+ leftHistogram: Histogram,
+ rightHistogram: Histogram,
+ newMin: Option[Any],
+ newMax: Option[Any]): (BigInt, ColumnStat) = {
+ val overlappedRanges = getOverlappedRanges(
+ leftHistogram = leftHistogram,
+ rightHistogram = rightHistogram,
+ // Only numeric values have equi-height histograms.
+ lowerBound = newMin.get.toString.toDouble,
+ upperBound = newMax.get.toString.toDouble)
--- End diff --
that's because we need to update the column stats' min and max at the end
of the method.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]