alamb commented on code in PR #10218:
URL: https://github.com/apache/datafusion/pull/10218#discussion_r1578667639


##########
datafusion/optimizer/src/eliminate_duplicated_expr.rs:
##########
@@ -35,78 +35,107 @@ impl EliminateDuplicatedExpr {
         Self {}
     }
 }
-
+// use this structure to avoid initial clone

Review Comment:
   ```suggestion
   /// Wrap the Expr in a Wrapper to support specialized comparison.
   ///
   /// Ignores the sort options for `SortExpr` because if the expression is the 
same
   /// the subsequent exprs are never matched
   /// 
   /// For example,  `ORDER BY a ASC a DESC` is the same
   // as `ORDER BY a ASC` (the second `a DESC` is never compared)
   ```



##########
datafusion/optimizer/src/eliminate_duplicated_expr.rs:
##########
@@ -35,78 +36,111 @@ impl EliminateDuplicatedExpr {
         Self {}
     }
 }
-
+// use this structure to avoid initial clone
+#[derive(Eq, Clone, Debug)]
+struct SortExprWrapper {

Review Comment:
   This is very clever 👏 



##########
datafusion/optimizer/src/eliminate_duplicated_expr.rs:
##########
@@ -35,78 +36,111 @@ impl EliminateDuplicatedExpr {
         Self {}
     }
 }
-
+// use this structure to avoid initial clone
+#[derive(Eq, Clone, Debug)]
+struct SortExprWrapper {
+    expr: Expr,
+}
+impl PartialEq for SortExprWrapper {
+    fn eq(&self, other: &Self) -> bool {
+        match (&self.expr, &other.expr) {
+            (Expr::Sort(own_sort), Expr::Sort(other_sort)) => {
+                own_sort.expr == other_sort.expr
+            }
+            _ => self.expr == other.expr,
+        }
+    }
+}
+impl Hash for SortExprWrapper {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        match &self.expr {
+            Expr::Sort(sort) => {
+                sort.expr.hash(state);
+            }
+            _ => {
+                self.expr.hash(state);
+            }
+        }
+    }
+}
 impl OptimizerRule for EliminateDuplicatedExpr {
     fn try_optimize(
         &self,
-        plan: &LogicalPlan,
+        _plan: &LogicalPlan,
         _config: &dyn OptimizerConfig,
     ) -> Result<Option<LogicalPlan>> {
+        internal_err!("Should have called EliminateDuplicatedExpr::rewrite")
+    }
+
+    fn apply_order(&self) -> Option<ApplyOrder> {
+        Some(ApplyOrder::TopDown)
+    }
+
+    fn supports_rewrite(&self) -> bool {
+        true
+    }
+
+    fn rewrite(
+        &self,
+        plan: LogicalPlan,
+        _config: &dyn OptimizerConfig,
+    ) -> Result<Transformed<LogicalPlan>> {
         match plan {
             LogicalPlan::Sort(sort) => {
-                let normalized_sort_keys = sort
+                let len = sort.expr.len();
+                let normalized_sort_keys: Vec<_> = sort
                     .expr
-                    .iter()
-                    .map(|e| match e {
-                        Expr::Sort(ExprSort { expr, .. }) => {
-                            Expr::Sort(ExprSort::new(expr.clone(), true, 
false))
-                        }
-                        _ => e.clone(),
-                    })
-                    .collect::<Vec<_>>();
+                    .into_iter()
+                    .map(|e| SortExprWrapper { expr: e })
+                    .collect();
 
-                // dedup sort.expr and keep order
-                let mut dedup_expr = Vec::new();
-                let mut dedup_set = HashSet::new();
-                sort.expr.iter().zip(normalized_sort_keys.iter()).for_each(
-                    |(expr, normalized_expr)| {
-                        if !dedup_set.contains(normalized_expr) {
-                            dedup_expr.push(expr);
-                            dedup_set.insert(normalized_expr);
-                        }
-                    },
-                );
-                if dedup_expr.len() == sort.expr.len() {
-                    Ok(None)
-                } else {
-                    Ok(Some(LogicalPlan::Sort(Sort {
-                        expr: 
dedup_expr.into_iter().cloned().collect::<Vec<_>>(),
-                        input: sort.input.clone(),
-                        fetch: sort.fetch,
-                    })))
+                let mut index_set = IndexSet::new(); // use index_set instead 
of Hashset to preserve order

Review Comment:
   This is also quite clever
   
   I think you can avoid a Vec here if you skip `normalized_sort_keys` and 
create `unique_exprs` directly, as you did below.
   
   Something like
   
   ```rust
   
   let unique_exprs: Vec<Expr> = sort
                       .expr
                       .into_iter()
                       // use SortExpr wrapper to ignore sort options
                       .map(|e| SortExprWrapper { expr: e })
                       .collect::<IndexSet<_>>()
                       .into_iter()
                       .map(|wrapper| wrapper.expr)
                       .collect();
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to