This is an automated email from the ASF dual-hosted git repository.

mneumann pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new df5a09c75b Unimplement PartialOrd for TDigest's Centroid (#17440)
df5a09c75b is described below

commit df5a09c75b00e4396370fd9f4421b6975c66c7bf
Author: Piotr Findeisen <piotr.findei...@gmail.com>
AuthorDate: Fri Sep 5 04:39:04 2025 -0700

    Unimplement PartialOrd for TDigest's Centroid (#17440)
    
    The implementation was not consistent with PartialEq, violating the
    PartialOrd contract.
---
 .../functions-aggregate-common/src/merge_arrays.rs |  1 +
 .../functions-aggregate-common/src/tdigest.rs      | 24 +++++++---------------
 datafusion/physical-plan/src/topk/mod.rs           |  1 +
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/datafusion/functions-aggregate-common/src/merge_arrays.rs 
b/datafusion/functions-aggregate-common/src/merge_arrays.rs
index 0cfea66249..c6989bc010 100644
--- a/datafusion/functions-aggregate-common/src/merge_arrays.rs
+++ b/datafusion/functions-aggregate-common/src/merge_arrays.rs
@@ -67,6 +67,7 @@ impl<'a> CustomElement<'a> {
 // - When used inside `BinaryHeap` it is a min-heap.
 impl Ord for CustomElement<'_> {
     fn cmp(&self, other: &Self) -> Ordering {
+        // TODO Ord/PartialOrd is not consistent with PartialEq; PartialOrd 
contract is violated
         // Compares according to custom ordering
         self.ordering(&self.ordering, &other.ordering)
             // Convert max heap to min heap
diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs 
b/datafusion/functions-aggregate-common/src/tdigest.rs
index 378fc8c42b..38a9292cea 100644
--- a/datafusion/functions-aggregate-common/src/tdigest.rs
+++ b/datafusion/functions-aggregate-common/src/tdigest.rs
@@ -103,20 +103,6 @@ pub struct Centroid {
     weight: f64,
 }
 
-impl PartialOrd for Centroid {
-    fn partial_cmp(&self, other: &Centroid) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Eq for Centroid {}
-
-impl Ord for Centroid {
-    fn cmp(&self, other: &Centroid) -> Ordering {
-        self.mean.total_cmp(&other.mean)
-    }
-}
-
 impl Centroid {
     pub fn new(mean: f64, weight: f64) -> Self {
         Centroid { mean, weight }
@@ -139,6 +125,10 @@ impl Centroid {
         self.mean = new_sum / new_weight;
         new_sum
     }
+
+    pub fn cmp_mean(&self, other: &Self) -> Ordering {
+        self.mean.total_cmp(&other.mean)
+    }
 }
 
 impl Default for Centroid {
@@ -331,7 +321,7 @@ impl TDigest {
         result.sum += curr.add(sums_to_merge, weights_to_merge);
         compressed.push(curr);
         compressed.shrink_to_fit();
-        compressed.sort();
+        compressed.sort_by(|a, b| a.cmp_mean(b));
 
         result.centroids = compressed;
         result
@@ -349,7 +339,7 @@ impl TDigest {
         let mut j = middle;
 
         while i < middle && j < last {
-            match centroids[i].cmp(&centroids[j]) {
+            match centroids[i].cmp_mean(&centroids[j]) {
                 Ordering::Less => {
                     result.push(centroids[i].clone());
                     i += 1;
@@ -466,7 +456,7 @@ impl TDigest {
         result.sum += curr.add(sums_to_merge, weights_to_merge);
         compressed.push(curr.clone());
         compressed.shrink_to_fit();
-        compressed.sort();
+        compressed.sort_by(|a, b| a.cmp_mean(b));
 
         result.count = count;
         result.min = min;
diff --git a/datafusion/physical-plan/src/topk/mod.rs 
b/datafusion/physical-plan/src/topk/mod.rs
index 6c0479a108..ce977b0706 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -940,6 +940,7 @@ impl Eq for TopKRow {}
 
 impl PartialOrd for TopKRow {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        // TODO PartialOrd is not consistent with PartialEq; PartialOrd 
contract is violated
         Some(self.cmp(other))
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to