This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 9a8f8b7188 fix: Fix eq properties regression from #10434 (#11363)
9a8f8b7188 is described below

commit 9a8f8b7188fba8dcba52028443b90556aeff7f5f
Author: Matthew Cramerus <[email protected]>
AuthorDate: Wed Jul 10 08:14:28 2024 -0500

    fix: Fix eq properties regression from #10434 (#11363)
    
    * discover new orderings when constants are added
    
    * more comments
    
    * reduce nesting + describe argument
    
    * lint?
---
 .../physical-expr/src/equivalence/properties.rs    | 183 +++++++++++++--------
 1 file changed, 113 insertions(+), 70 deletions(-)

diff --git a/datafusion/physical-expr/src/equivalence/properties.rs 
b/datafusion/physical-expr/src/equivalence/properties.rs
index d9d19c0bcf..8c327fbaf4 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -223,56 +223,11 @@ impl EquivalenceProperties {
             }
         }
 
-        // Discover new valid orderings in light of the new equality. For a 
discussion, see:
-        // https://github.com/apache/datafusion/issues/9812
-        let mut new_orderings = vec![];
-        for ordering in self.normalized_oeq_class().iter() {
-            let expressions = if left.eq(&ordering[0].expr) {
-                // Left expression is leading ordering
-                Some((ordering[0].options, right))
-            } else if right.eq(&ordering[0].expr) {
-                // Right expression is leading ordering
-                Some((ordering[0].options, left))
-            } else {
-                None
-            };
-            if let Some((leading_ordering, other_expr)) = expressions {
-                // Currently, we only handle expressions with a single child.
-                // TODO: It should be possible to handle expressions orderings 
like
-                //       f(a, b, c), a, b, c if f is monotonic in all 
arguments.
-                // First expression after leading ordering
-                if let Some(next_expr) = ordering.get(1) {
-                    let children = other_expr.children();
-                    if children.len() == 1
-                        && children[0].eq(&next_expr.expr)
-                        && SortProperties::Ordered(leading_ordering)
-                            == other_expr
-                                .get_properties(&[ExprProperties {
-                                    sort_properties: SortProperties::Ordered(
-                                        leading_ordering,
-                                    ),
-                                    range: Interval::make_unbounded(
-                                        &other_expr.data_type(&self.schema)?,
-                                    )?,
-                                }])?
-                                .sort_properties
-                    {
-                        // Assume existing ordering is [a ASC, b ASC]
-                        // When equality a = f(b) is given, If we know that 
given ordering `[b ASC]`, ordering `[f(b) ASC]` is valid,
-                        // then we can deduce that ordering `[b ASC]` is also 
valid.
-                        // Hence, ordering `[b ASC]` can be added to the state 
as valid ordering.
-                        // (e.g. existing ordering where leading ordering is 
removed)
-                        new_orderings.push(ordering[1..].to_vec());
-                    }
-                }
-            }
-        }
-        if !new_orderings.is_empty() {
-            self.oeq_class.add_new_orderings(new_orderings);
-        }
-
         // Add equal expressions to the state
         self.eq_group.add_equal_conditions(left, right);
+
+        // Discover any new orderings
+        self.discover_new_orderings(left)?;
         Ok(())
     }
 
@@ -304,9 +259,78 @@ impl EquivalenceProperties {
                 self.constants.push(const_expr);
             }
         }
+
+        for ordering in self.normalized_oeq_class().iter() {
+            if let Err(e) = self.discover_new_orderings(&ordering[0].expr) {
+                log::debug!("error discovering new orderings: {e}");
+            }
+        }
+
         self
     }
 
+    // Discover new valid orderings in light of a new equality.
+    // Accepts a single argument (`expr`) which is used to determine
+    // which orderings should be updated.
+    // When constants or equivalence classes are changed, there may be new 
orderings
+    // that can be discovered with the new equivalence properties.
+    // For a discussion, see: https://github.com/apache/datafusion/issues/9812
+    fn discover_new_orderings(&mut self, expr: &Arc<dyn PhysicalExpr>) -> 
Result<()> {
+        let normalized_expr = self.eq_group().normalize_expr(Arc::clone(expr));
+        let eq_class = self
+            .eq_group
+            .classes
+            .iter()
+            .find_map(|class| {
+                class
+                    .contains(&normalized_expr)
+                    .then(|| class.clone().into_vec())
+            })
+            .unwrap_or_else(|| vec![Arc::clone(&normalized_expr)]);
+
+        let mut new_orderings: Vec<LexOrdering> = vec![];
+        for (ordering, next_expr) in self
+            .normalized_oeq_class()
+            .iter()
+            .filter(|ordering| ordering[0].expr.eq(&normalized_expr))
+            // First expression after leading ordering
+            .filter_map(|ordering| Some(ordering).zip(ordering.get(1)))
+        {
+            let leading_ordering = ordering[0].options;
+            // Currently, we only handle expressions with a single child.
+            // TODO: It should be possible to handle expressions orderings like
+            //       f(a, b, c), a, b, c if f is monotonic in all arguments.
+            for equivalent_expr in &eq_class {
+                let children = equivalent_expr.children();
+                if children.len() == 1
+                    && children[0].eq(&next_expr.expr)
+                    && SortProperties::Ordered(leading_ordering)
+                        == equivalent_expr
+                            .get_properties(&[ExprProperties {
+                                sort_properties: SortProperties::Ordered(
+                                    leading_ordering,
+                                ),
+                                range: Interval::make_unbounded(
+                                    &equivalent_expr.data_type(&self.schema)?,
+                                )?,
+                            }])?
+                            .sort_properties
+                {
+                    // Assume existing ordering is [a ASC, b ASC]
+                    // When equality a = f(b) is given, If we know that given 
ordering `[b ASC]`, ordering `[f(b) ASC]` is valid,
+                    // then we can deduce that ordering `[b ASC]` is also 
valid.
+                    // Hence, ordering `[b ASC]` can be added to the state as 
valid ordering.
+                    // (e.g. existing ordering where leading ordering is 
removed)
+                    new_orderings.push(ordering[1..].to_vec());
+                    break;
+                }
+            }
+        }
+
+        self.oeq_class.add_new_orderings(new_orderings);
+        Ok(())
+    }
+
     /// Updates the ordering equivalence group within assuming that the table
     /// is re-sorted according to the argument `sort_exprs`. Note that 
constants
     /// and equivalence classes are unchanged as they are unaffected by a 
re-sort.
@@ -2454,30 +2478,49 @@ mod tests {
         ];
 
         for case in cases {
-            let mut properties = base_properties
-                .clone()
-                
.add_constants(case.constants.into_iter().map(ConstExpr::from));
-            for [left, right] in &case.equal_conditions {
-                properties.add_equal_conditions(left, right)?
-            }
-
-            let sort = case
-                .sort_columns
-                .iter()
-                .map(|&name| {
-                    col(name, &schema).map(|col| PhysicalSortExpr {
-                        expr: col,
-                        options: SortOptions::default(),
+            // Construct the equivalence properties in different orders
+            // to exercise different code paths
+            // (The resulting properties _should_ be the same)
+            for properties in [
+                // Equal conditions before constants
+                {
+                    let mut properties = base_properties.clone();
+                    for [left, right] in &case.equal_conditions {
+                        properties.add_equal_conditions(left, right)?
+                    }
+                    properties.add_constants(
+                        case.constants.iter().cloned().map(ConstExpr::from),
+                    )
+                },
+                // Constants before equal conditions
+                {
+                    let mut properties = base_properties.clone().add_constants(
+                        case.constants.iter().cloned().map(ConstExpr::from),
+                    );
+                    for [left, right] in &case.equal_conditions {
+                        properties.add_equal_conditions(left, right)?
+                    }
+                    properties
+                },
+            ] {
+                let sort = case
+                    .sort_columns
+                    .iter()
+                    .map(|&name| {
+                        col(name, &schema).map(|col| PhysicalSortExpr {
+                            expr: col,
+                            options: SortOptions::default(),
+                        })
                     })
-                })
-                .collect::<Result<Vec<_>>>()?;
+                    .collect::<Result<Vec<_>>>()?;
 
-            assert_eq!(
-                properties.ordering_satisfy(&sort),
-                case.should_satisfy_ordering,
-                "failed test '{}'",
-                case.name
-            );
+                assert_eq!(
+                    properties.ordering_satisfy(&sort),
+                    case.should_satisfy_ordering,
+                    "failed test '{}'",
+                    case.name
+                );
+            }
         }
 
         Ok(())


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to