alamb commented on code in PR #6501:
URL: https://github.com/apache/arrow-datafusion/pull/6501#discussion_r1214775206


##########
datafusion/core/tests/sqllogictests/test_files/window.slt:
##########
@@ -2405,6 +2405,30 @@ GlobalLimitExec: skip=0, fetch=5
 ------SortExec: expr=[c9@0 DESC]
 --------CsvExec: file_groups={1 group: 
[[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], 
has_header=true
 
+# This test shows that ordering equivalence can keep track of complex 
expressions (not just Column expressions)

Review Comment:
   ❤️ 



##########
datafusion/physical-expr/src/equivalence.rs:
##########
@@ -34,14 +32,14 @@ use std::sync::Arc;
 /// This is used to represent both:
 ///
 /// 1. Equality conditions (like `A=B`), when `T` = [`Column`]
-/// 2. Ordering (like `A ASC = B ASC`), when `T` = [`OrderedColumn`]
+/// 2. Ordering (like `A ASC = B ASC`), when `T` = [`PhysicalSortExpr`]
 #[derive(Debug, Clone)]
 pub struct EquivalenceProperties<T = Column> {

Review Comment:
   🤔  maybe eventually we will do the same thing for `equality analysis as well`



##########
datafusion/physical-expr/src/equivalence.rs:
##########
@@ -115,6 +113,53 @@ impl<T: Eq + Hash + Clone> EquivalenceProperties<T> {
     }
 }
 
+/// Remove duplicates inside the `in_data` vector, returned vector would 
consist of unique entries
+fn deduplicate_vector<T: PartialEq>(in_data: Vec<T>) -> Vec<T> {
+    let mut result = vec![];
+    for elem in in_data {
+        if !result.contains(&elem) {
+            result.push(elem);
+        }
+    }
+    result
+}
+
+/// Find the position of `entry` inside `in_data`, if `entry` is not found 
return `None`.
+fn get_entry_position<T: PartialEq>(in_data: &[T], entry: &T) -> Option<usize> 
{
+    in_data.iter().position(|item| item.eq(entry))
+}
+
+/// Remove `entry` for the `in_data`, returns `true` if removal is successful 
(e.g `entry` is indeed in the `in_data`)
+/// Otherwise return `false`
+fn remove_from_vec<T: PartialEq>(in_data: &mut Vec<T>, entry: &T) -> bool {

Review Comment:
   Perhaps a more idiomatic way would be for this function to return 
`Option<T>` (which is what `Some(in_data.remove())` returns )
   
   That might allow you to avoid some of the other changes to `remove` later



##########
datafusion/physical-expr/src/equivalence.rs:
##########
@@ -551,4 +555,52 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]

Review Comment:
   👍 



##########
datafusion/physical-expr/src/equivalence.rs:
##########
@@ -115,6 +113,53 @@ impl<T: Eq + Hash + Clone> EquivalenceProperties<T> {
     }
 }
 
+/// Remove duplicates inside the `in_data` vector, returned vector would 
consist of unique entries
+fn deduplicate_vector<T: PartialEq>(in_data: Vec<T>) -> Vec<T> {
+    let mut result = vec![];
+    for elem in in_data {
+        if !result.contains(&elem) {
+            result.push(elem);
+        }
+    }
+    result
+}
+
+/// Find the position of `entry` inside `in_data`, if `entry` is not found 
return `None`.
+fn get_entry_position<T: PartialEq>(in_data: &[T], entry: &T) -> Option<usize> 
{
+    in_data.iter().position(|item| item.eq(entry))
+}
+
+/// Remove `entry` for the `in_data`, returns `true` if removal is successful 
(e.g `entry` is indeed in the `in_data`)
+/// Otherwise return `false`
+fn remove_from_vec<T: PartialEq>(in_data: &mut Vec<T>, entry: &T) -> bool {
+    if let Some(idx) = get_entry_position(in_data, entry) {
+        in_data.remove(idx);
+        true
+    } else {
+        false
+    }
+}
+
+// Helper function to calculate column info recursively
+fn get_column_infos_helper(
+    indices: &mut Vec<(usize, String)>,
+    expr: &Arc<dyn PhysicalExpr>,
+) {
+    if let Some(col) = expr.as_any().downcast_ref::<Column>() {
+        indices.push((col.index(), col.name().to_string()))
+    } else if let Some(binary_expr) = 
expr.as_any().downcast_ref::<BinaryExpr>() {
+        get_column_infos_helper(indices, binary_expr.left());
+        get_column_infos_helper(indices, binary_expr.right());
+    };
+}
+
+/// Get index and name of each column that is in the expression (Can return 
multiple entries for `BinaryExpr`s)
+fn get_column_infos(expr: &Arc<dyn PhysicalExpr>) -> Vec<(usize, String)> {

Review Comment:
   Maybe 'get_column_index` would better explain what this function is doing



##########
datafusion/physical-expr/src/equivalence.rs:
##########
@@ -198,58 +247,7 @@ impl<T: Eq + Hash + Clone> EquivalentClass<T> {
     }
 }
 
-/// This object represents a [`Column`] with a definite ordering, for

Review Comment:
   🎉 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to