This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new f99a6cf6ab Improve `PartialEq`, `Eq` speed for `LexOrdering`, make 
`PartialEq` and `PartialOrd` consistent (#17442)
f99a6cf6ab is described below

commit f99a6cf6ab1be38a70c63c319678144d9dfe5c0c
Author: Piotr Findeisen <piotr.findei...@gmail.com>
AuthorDate: Tue Sep 9 06:37:03 2025 -0700

    Improve `PartialEq`, `Eq` speed for `LexOrdering`, make `PartialEq` and 
`PartialOrd` consistent (#17442)
    
    * Improve `PartialEq`, `Eq` speed for `LexOrdering`
    
    When eq is derived, it unnecessarily invokes eq on the `set` field. It's
    more expensive that simple `Vec` eq done on the `expr` field.
    Fortunately it's redundant, the `set` field is logically derived from
    `expr`.
    
    This also updates `PartialOrd` implementation to emphasize the fact that
    `PartialEq` and `PartialOrd` need to be consistent. They already were
    consistent, it's a stylistic change.
    
    * Remove duplicate logic for `LexOrdering` bookkeeping
    
    Remove duplicate logic for keeping `expr` and `set` fields in sync
    between `LexOrdering::new` / `LexOrdering::construct` and
    `LexOrdering::push`. This is done by reusing `push` during construction.
    
    This also changes internal construction API, enforcing that degenerate
    instance is never returned to the caller.
    
    * Drop Vec::with_capacity pre-sizing
    
    It's not really needed. Was there only for behavior consistency with
    collect into Vec, but it is not necessarily strictly better than just
    Vec:new due to filtering.
---
 datafusion/functions-aggregate/src/array_agg.rs  |  2 +-
 datafusion/physical-expr-common/src/sort_expr.rs | 57 ++++++++++++++----------
 2 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/datafusion/functions-aggregate/src/array_agg.rs 
b/datafusion/functions-aggregate/src/array_agg.rs
index 321deb0d21..268349ecf1 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -1105,7 +1105,7 @@ mod tests {
         ])])?;
 
         // without compaction, the size is 17112
-        assert_eq!(acc.size(), 2112);
+        assert_eq!(acc.size(), 2184);
 
         Ok(())
     }
diff --git a/datafusion/physical-expr-common/src/sort_expr.rs 
b/datafusion/physical-expr-common/src/sort_expr.rs
index 07edfb70f4..d19d7024a5 100644
--- a/datafusion/physical-expr-common/src/sort_expr.rs
+++ b/datafusion/physical-expr-common/src/sort_expr.rs
@@ -353,7 +353,7 @@ impl From<PhysicalSortRequirement> for PhysicalSortExpr {
 /// 1. It is non-degenerate, meaning it contains at least one element.
 /// 2. It is duplicate-free, meaning it does not contain multiple entries for
 ///    the same column.
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone)]
 pub struct LexOrdering {
     /// Vector of sort expressions representing the lexicographical ordering.
     exprs: Vec<PhysicalSortExpr>,
@@ -367,8 +367,20 @@ impl LexOrdering {
     /// Creates a new [`LexOrdering`] from the given vector of sort 
expressions.
     /// If the vector is empty, returns `None`.
     pub fn new(exprs: impl IntoIterator<Item = PhysicalSortExpr>) -> 
Option<Self> {
-        let (non_empty, ordering) = Self::construct(exprs);
-        non_empty.then_some(ordering)
+        let exprs = exprs.into_iter();
+        let mut candidate = Self {
+            // not valid yet; valid publicly-returned instance must be 
non-empty
+            exprs: Vec::new(),
+            set: HashSet::new(),
+        };
+        for expr in exprs {
+            candidate.push(expr);
+        }
+        if candidate.exprs.is_empty() {
+            None
+        } else {
+            Some(candidate)
+        }
     }
 
     /// Appends an element to the back of the `LexOrdering`.
@@ -414,27 +426,28 @@ impl LexOrdering {
         self.exprs.truncate(len);
         true
     }
+}
 
-    /// Constructs a new `LexOrdering` from the given sort requirements w/o
-    /// enforcing non-degeneracy. This function is used internally and is not
-    /// meant (or safe) for external use.
-    fn construct(exprs: impl IntoIterator<Item = PhysicalSortExpr>) -> (bool, 
Self) {
-        let mut set = HashSet::new();
-        let exprs = exprs
-            .into_iter()
-            .filter_map(|s| set.insert(Arc::clone(&s.expr)).then_some(s))
-            .collect();
-        (!set.is_empty(), Self { exprs, set })
+impl PartialEq for LexOrdering {
+    fn eq(&self, other: &Self) -> bool {
+        let Self {
+            exprs,
+            set: _, // derived from `exprs`
+        } = self;
+        // PartialEq must be consistent with PartialOrd
+        exprs == &other.exprs
     }
 }
-
+impl Eq for LexOrdering {}
 impl PartialOrd for LexOrdering {
     /// There is a partial ordering among `LexOrdering` objects. For example, 
the
     /// ordering `[a ASC]` is coarser (less) than ordering `[a ASC, b ASC]`.
     /// If two orderings do not share a prefix, they are incomparable.
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        self.iter()
-            .zip(other.iter())
+        // PartialEq must be consistent with PartialOrd
+        self.exprs
+            .iter()
+            .zip(other.exprs.iter())
             .all(|(lhs, rhs)| lhs == rhs)
             .then(|| self.len().cmp(&other.len()))
     }
@@ -445,9 +458,8 @@ impl<const N: usize> From<[PhysicalSortExpr; N]> for 
LexOrdering {
         // TODO: Replace this assertion with a condition on the generic 
parameter
         //       when Rust supports it.
         assert!(N > 0);
-        let (non_empty, ordering) = Self::construct(value);
-        debug_assert!(non_empty);
-        ordering
+        Self::new(value)
+            .expect("A LexOrdering from non-empty array must be 
non-degenerate")
     }
 }
 
@@ -604,10 +616,9 @@ impl From<LexOrdering> for LexRequirement {
 
 impl From<LexRequirement> for LexOrdering {
     fn from(value: LexRequirement) -> Self {
-        // Can construct directly as `value` is non-degenerate:
-        let (non_empty, ordering) = 
Self::construct(value.into_iter().map(Into::into));
-        debug_assert!(non_empty);
-        ordering
+        // Can construct directly as `value` is non-degenerate
+        Self::new(value.into_iter().map(Into::into))
+            .expect("A LexOrdering from LexRequirement must be non-degenerate")
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to