This is an automated email from the ASF dual-hosted git repository. mneumann pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new df5a09c75b Unimplement PartialOrd for TDigest's Centroid (#17440) df5a09c75b is described below commit df5a09c75b00e4396370fd9f4421b6975c66c7bf Author: Piotr Findeisen <piotr.findei...@gmail.com> AuthorDate: Fri Sep 5 04:39:04 2025 -0700 Unimplement PartialOrd for TDigest's Centroid (#17440) The implementation was not consistent with PartialEq, violating the PartialOrd contract. --- .../functions-aggregate-common/src/merge_arrays.rs | 1 + .../functions-aggregate-common/src/tdigest.rs | 24 +++++++--------------- datafusion/physical-plan/src/topk/mod.rs | 1 + 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/datafusion/functions-aggregate-common/src/merge_arrays.rs b/datafusion/functions-aggregate-common/src/merge_arrays.rs index 0cfea66249..c6989bc010 100644 --- a/datafusion/functions-aggregate-common/src/merge_arrays.rs +++ b/datafusion/functions-aggregate-common/src/merge_arrays.rs @@ -67,6 +67,7 @@ impl<'a> CustomElement<'a> { // - When used inside `BinaryHeap` it is a min-heap. impl Ord for CustomElement<'_> { fn cmp(&self, other: &Self) -> Ordering { + // TODO Ord/PartialOrd is not consistent with PartialEq; PartialOrd contract is violated // Compares according to custom ordering self.ordering(&self.ordering, &other.ordering) // Convert max heap to min heap diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs index 378fc8c42b..38a9292cea 100644 --- a/datafusion/functions-aggregate-common/src/tdigest.rs +++ b/datafusion/functions-aggregate-common/src/tdigest.rs @@ -103,20 +103,6 @@ pub struct Centroid { weight: f64, } -impl PartialOrd for Centroid { - fn partial_cmp(&self, other: &Centroid) -> Option<Ordering> { - Some(self.cmp(other)) - } -} - -impl Eq for Centroid {} - -impl Ord for Centroid { - fn cmp(&self, other: &Centroid) -> Ordering { - self.mean.total_cmp(&other.mean) - } -} - impl Centroid { pub fn new(mean: f64, weight: f64) -> Self { Centroid { mean, weight } @@ -139,6 +125,10 @@ impl Centroid { self.mean = new_sum / new_weight; new_sum } + + pub fn cmp_mean(&self, other: &Self) -> Ordering { + self.mean.total_cmp(&other.mean) + } } impl Default for Centroid { @@ -331,7 +321,7 @@ impl TDigest { result.sum += curr.add(sums_to_merge, weights_to_merge); compressed.push(curr); compressed.shrink_to_fit(); - compressed.sort(); + compressed.sort_by(|a, b| a.cmp_mean(b)); result.centroids = compressed; result @@ -349,7 +339,7 @@ impl TDigest { let mut j = middle; while i < middle && j < last { - match centroids[i].cmp(¢roids[j]) { + match centroids[i].cmp_mean(¢roids[j]) { Ordering::Less => { result.push(centroids[i].clone()); i += 1; @@ -466,7 +456,7 @@ impl TDigest { result.sum += curr.add(sums_to_merge, weights_to_merge); compressed.push(curr.clone()); compressed.shrink_to_fit(); - compressed.sort(); + compressed.sort_by(|a, b| a.cmp_mean(b)); result.count = count; result.min = min; diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs index 6c0479a108..ce977b0706 100644 --- a/datafusion/physical-plan/src/topk/mod.rs +++ b/datafusion/physical-plan/src/topk/mod.rs @@ -940,6 +940,7 @@ impl Eq for TopKRow {} impl PartialOrd for TopKRow { fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + // TODO PartialOrd is not consistent with PartialEq; PartialOrd contract is violated Some(self.cmp(other)) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org