srowen commented on a change in pull request #26029: [SPARK-29336][SQL] Fix the
implementation of QuantileSummaries.merge (guarantee that the relativeError
will be respected)
URL: https://github.com/apache/spark/pull/26029#discussion_r331713951
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala
##########
@@ -169,5 +170,22 @@ class QuantileSummariesSuite extends SparkFunSuite {
checkQuantile(0.1, data, s)
checkQuantile(0.001, data, s)
}
+
+ // length of data21 is 4 * length of data22
+ val data21 = data.zipWithIndex.filter(_._2 % 5 != 0).map(_._1).toSeq
+ val data22 = data.zipWithIndex.filter(_._2 % 5 == 0).map(_._1).toSeq
+
+ test(
+ s"Merging unbalanced interleaved lists with epsi=$epsi and
seq=$seq_name, " +
+ s"compression=$compression") {
+ val s1 = buildSummary(data21, epsi, compression)
+ val s2 = buildSummary(data22, epsi, compression)
+ val s = s1.merge(s2)
+ // Check all quantiles
+ for (query_rank <- 1 to n) {
+ val query_quantile = query_rank.toDouble / n.toDouble
Review comment:
Nit: `queryQuantile`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]