Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/19594#discussion_r156388437 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala --- @@ -114,4 +115,183 @@ object EstimationUtils { } } + /** + * Returns overlapped ranges between two histograms, in the given value range [newMin, newMax]. + */ + def getOverlappedRanges( + leftHistogram: Histogram, + rightHistogram: Histogram, + newMin: Double, + newMax: Double): Seq[OverlappedRange] = { + val overlappedRanges = new ArrayBuffer[OverlappedRange]() + // Only bins whose range intersect [newMin, newMax] have join possibility. + val leftBins = leftHistogram.bins + .filter(b => b.lo <= newMax && b.hi >= newMin) + val rightBins = rightHistogram.bins + .filter(b => b.lo <= newMax && b.hi >= newMin) + + leftBins.foreach { lb => + rightBins.foreach { rb => --- End diff -- nit: ``` for { leftBin <- leftBins rightBin <- rightBins } yield { ... OverlappedRange ... } ``` Then we can omit `val overlappedRanges = new ArrayBuffer[OverlappedRange]()`
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org