alamb commented on code in PR #10939:
URL: https://github.com/apache/datafusion/pull/10939#discussion_r1642598704


##########
datafusion/optimizer/src/common_subexpr_eliminate.rs:
##########
@@ -1113,18 +1080,18 @@ mod test {
             )?
             .build()?;
 
-        let expected = "Projection: UInt32(1) + test.a, UInt32(1) + 
{AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + 
test.a|{test.a}|{UInt32(1)}}}} AS col1, UInt32(1) - {AVG({UInt32(1) + 
test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}} AS 
col2, {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)} AS 
AVG(UInt32(1) + test.a), UInt32(1) + {my_agg({UInt32(1) + 
test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}} AS 
col3, UInt32(1) - {my_agg({UInt32(1) + 
test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}} AS 
col4, {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)} 
AS my_agg(UInt32(1) + test.a)\
-        \n  Aggregate: groupBy=[[{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS 
UInt32(1) + test.a]], aggr=[[AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}}) AS 
{AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + 
test.a|{test.a}|{UInt32(1)}}}}, my_agg({UInt32(1) + 
test.a|{test.a}|{UInt32(1)}}) AS {my_agg({UInt32(1) + 
test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}, 
AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS 
{AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)}, 
my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS 
{my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)}]]\
-        \n    Projection: UInt32(1) + test.a AS {UInt32(1) + 
test.a|{test.a}|{UInt32(1)}}, test.a, test.b, test.c\
+        let expected = "Projection: UInt32(1) + test.a, UInt32(1) + __cse_2 AS 
col1, UInt32(1) - __cse_2 AS col2, __cse_4 AS AVG(UInt32(1) + test.a), 
UInt32(1) + __cse_3 AS col3, UInt32(1) - __cse_3 AS col4, __cse_5 AS 
my_agg(UInt32(1) + test.a)\

Review Comment:
   That certainly looks much nicer



##########
datafusion/optimizer/src/common_subexpr_eliminate.rs:
##########
@@ -84,11 +84,11 @@ type IdArray = Vec<(usize, Identifier)>;
 /// - The number of occurrences and
 /// - The DataType
 /// of an expression.
-type ExprStats = HashMap<Identifier, (usize, DataType)>;
+type ExprStats = HashMap<Identifier, usize>;
 
 /// A map that contains the common expressions extracted during the second, 
rewriting
 /// traversal.

Review Comment:
   perhaps we can also update the docstring here to mention that the map 
contains the alias
   
   ```suggestion
   /// A map that contains the common expressions and their alias extracted 
during the second, rewriting
   /// traversal.
   ```



##########
datafusion/optimizer/src/common_subexpr_eliminate.rs:
##########
@@ -714,14 +689,7 @@ impl<'n> TreeNodeVisitor<'n> for ExprIdentifierVisitor<'_> 
{
         self.id_array[down_index].0 = self.up_index;
         if !self.expr_mask.ignores(expr) {
             self.id_array[down_index].1.clone_from(&expr_id);
-
-            // TODO: can we capture the data type in the second traversal only 
for
-            //  replaced expressions?
-            let data_type = expr.get_type(&self.input_schema)?;
-            let (count, _) = self
-                .expr_stats
-                .entry(expr_id.clone())
-                .or_insert((0, data_type));
+            let count = self.expr_stats.entry(expr_id.clone()).or_insert(0);

Review Comment:
   it would be great to avoid this clone (which I think is a String) by using a 
slightly different API. Maybe something like
   
   ```rust
   let count = if let Some(count) = self.expr_stats.get(expr_id) {
     count
   } else {
     self.expr_stats.insert(expr_id.clone(), 0);
     0
   };
   ```
   
   Which while less elegant and requires hashing twice doens't clone the string 
🤔 
   
   There may be a more clever way to avoid cloning the id
   
   Also, I see the current code clones `expr_id` as well, so this change is no 
worse.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to