This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new f99a6cf6ab Improve `PartialEq`, `Eq` speed for `LexOrdering`, make `PartialEq` and `PartialOrd` consistent (#17442) f99a6cf6ab is described below commit f99a6cf6ab1be38a70c63c319678144d9dfe5c0c Author: Piotr Findeisen <piotr.findei...@gmail.com> AuthorDate: Tue Sep 9 06:37:03 2025 -0700 Improve `PartialEq`, `Eq` speed for `LexOrdering`, make `PartialEq` and `PartialOrd` consistent (#17442) * Improve `PartialEq`, `Eq` speed for `LexOrdering` When eq is derived, it unnecessarily invokes eq on the `set` field. It's more expensive that simple `Vec` eq done on the `expr` field. Fortunately it's redundant, the `set` field is logically derived from `expr`. This also updates `PartialOrd` implementation to emphasize the fact that `PartialEq` and `PartialOrd` need to be consistent. They already were consistent, it's a stylistic change. * Remove duplicate logic for `LexOrdering` bookkeeping Remove duplicate logic for keeping `expr` and `set` fields in sync between `LexOrdering::new` / `LexOrdering::construct` and `LexOrdering::push`. This is done by reusing `push` during construction. This also changes internal construction API, enforcing that degenerate instance is never returned to the caller. * Drop Vec::with_capacity pre-sizing It's not really needed. Was there only for behavior consistency with collect into Vec, but it is not necessarily strictly better than just Vec:new due to filtering. --- datafusion/functions-aggregate/src/array_agg.rs | 2 +- datafusion/physical-expr-common/src/sort_expr.rs | 57 ++++++++++++++---------- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index 321deb0d21..268349ecf1 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -1105,7 +1105,7 @@ mod tests { ])])?; // without compaction, the size is 17112 - assert_eq!(acc.size(), 2112); + assert_eq!(acc.size(), 2184); Ok(()) } diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs index 07edfb70f4..d19d7024a5 100644 --- a/datafusion/physical-expr-common/src/sort_expr.rs +++ b/datafusion/physical-expr-common/src/sort_expr.rs @@ -353,7 +353,7 @@ impl From<PhysicalSortRequirement> for PhysicalSortExpr { /// 1. It is non-degenerate, meaning it contains at least one element. /// 2. It is duplicate-free, meaning it does not contain multiple entries for /// the same column. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone)] pub struct LexOrdering { /// Vector of sort expressions representing the lexicographical ordering. exprs: Vec<PhysicalSortExpr>, @@ -367,8 +367,20 @@ impl LexOrdering { /// Creates a new [`LexOrdering`] from the given vector of sort expressions. /// If the vector is empty, returns `None`. pub fn new(exprs: impl IntoIterator<Item = PhysicalSortExpr>) -> Option<Self> { - let (non_empty, ordering) = Self::construct(exprs); - non_empty.then_some(ordering) + let exprs = exprs.into_iter(); + let mut candidate = Self { + // not valid yet; valid publicly-returned instance must be non-empty + exprs: Vec::new(), + set: HashSet::new(), + }; + for expr in exprs { + candidate.push(expr); + } + if candidate.exprs.is_empty() { + None + } else { + Some(candidate) + } } /// Appends an element to the back of the `LexOrdering`. @@ -414,27 +426,28 @@ impl LexOrdering { self.exprs.truncate(len); true } +} - /// Constructs a new `LexOrdering` from the given sort requirements w/o - /// enforcing non-degeneracy. This function is used internally and is not - /// meant (or safe) for external use. - fn construct(exprs: impl IntoIterator<Item = PhysicalSortExpr>) -> (bool, Self) { - let mut set = HashSet::new(); - let exprs = exprs - .into_iter() - .filter_map(|s| set.insert(Arc::clone(&s.expr)).then_some(s)) - .collect(); - (!set.is_empty(), Self { exprs, set }) +impl PartialEq for LexOrdering { + fn eq(&self, other: &Self) -> bool { + let Self { + exprs, + set: _, // derived from `exprs` + } = self; + // PartialEq must be consistent with PartialOrd + exprs == &other.exprs } } - +impl Eq for LexOrdering {} impl PartialOrd for LexOrdering { /// There is a partial ordering among `LexOrdering` objects. For example, the /// ordering `[a ASC]` is coarser (less) than ordering `[a ASC, b ASC]`. /// If two orderings do not share a prefix, they are incomparable. fn partial_cmp(&self, other: &Self) -> Option<Ordering> { - self.iter() - .zip(other.iter()) + // PartialEq must be consistent with PartialOrd + self.exprs + .iter() + .zip(other.exprs.iter()) .all(|(lhs, rhs)| lhs == rhs) .then(|| self.len().cmp(&other.len())) } @@ -445,9 +458,8 @@ impl<const N: usize> From<[PhysicalSortExpr; N]> for LexOrdering { // TODO: Replace this assertion with a condition on the generic parameter // when Rust supports it. assert!(N > 0); - let (non_empty, ordering) = Self::construct(value); - debug_assert!(non_empty); - ordering + Self::new(value) + .expect("A LexOrdering from non-empty array must be non-degenerate") } } @@ -604,10 +616,9 @@ impl From<LexOrdering> for LexRequirement { impl From<LexRequirement> for LexOrdering { fn from(value: LexRequirement) -> Self { - // Can construct directly as `value` is non-degenerate: - let (non_empty, ordering) = Self::construct(value.into_iter().map(Into::into)); - debug_assert!(non_empty); - ordering + // Can construct directly as `value` is non-degenerate + Self::new(value.into_iter().map(Into::into)) + .expect("A LexOrdering from LexRequirement must be non-degenerate") } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org