alamb commented on code in PR #10939: URL: https://github.com/apache/datafusion/pull/10939#discussion_r1642598704
########## datafusion/optimizer/src/common_subexpr_eliminate.rs: ########## @@ -1113,18 +1080,18 @@ mod test { )? .build()?; - let expected = "Projection: UInt32(1) + test.a, UInt32(1) + {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}} AS col1, UInt32(1) - {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}} AS col2, {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)} AS AVG(UInt32(1) + test.a), UInt32(1) + {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}} AS col3, UInt32(1) - {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}} AS col4, {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)} AS my_agg(UInt32(1) + test.a)\ - \n Aggregate: groupBy=[[{UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a]], aggr=[[AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}}) AS {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}, my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}}) AS {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}})|{{UInt32(1) + test.a|{test.a}|{UInt32(1)}}}}, AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS {AVG({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)}, my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a) AS {my_agg({UInt32(1) + test.a|{test.a}|{UInt32(1)}} AS UInt32(1) + test.a)}]]\ - \n Projection: UInt32(1) + test.a AS {UInt32(1) + test.a|{test.a}|{UInt32(1)}}, test.a, test.b, test.c\ + let expected = "Projection: UInt32(1) + test.a, UInt32(1) + __cse_2 AS col1, UInt32(1) - __cse_2 AS col2, __cse_4 AS AVG(UInt32(1) + test.a), UInt32(1) + __cse_3 AS col3, UInt32(1) - __cse_3 AS col4, __cse_5 AS my_agg(UInt32(1) + test.a)\ Review Comment: That certainly looks much nicer ########## datafusion/optimizer/src/common_subexpr_eliminate.rs: ########## @@ -84,11 +84,11 @@ type IdArray = Vec<(usize, Identifier)>; /// - The number of occurrences and /// - The DataType /// of an expression. -type ExprStats = HashMap<Identifier, (usize, DataType)>; +type ExprStats = HashMap<Identifier, usize>; /// A map that contains the common expressions extracted during the second, rewriting /// traversal. Review Comment: perhaps we can also update the docstring here to mention that the map contains the alias ```suggestion /// A map that contains the common expressions and their alias extracted during the second, rewriting /// traversal. ``` ########## datafusion/optimizer/src/common_subexpr_eliminate.rs: ########## @@ -714,14 +689,7 @@ impl<'n> TreeNodeVisitor<'n> for ExprIdentifierVisitor<'_> { self.id_array[down_index].0 = self.up_index; if !self.expr_mask.ignores(expr) { self.id_array[down_index].1.clone_from(&expr_id); - - // TODO: can we capture the data type in the second traversal only for - // replaced expressions? - let data_type = expr.get_type(&self.input_schema)?; - let (count, _) = self - .expr_stats - .entry(expr_id.clone()) - .or_insert((0, data_type)); + let count = self.expr_stats.entry(expr_id.clone()).or_insert(0); Review Comment: it would be great to avoid this clone (which I think is a String) by using a slightly different API. Maybe something like ```rust let count = if let Some(count) = self.expr_stats.get(expr_id) { count } else { self.expr_stats.insert(expr_id.clone(), 0); 0 }; ``` Which while less elegant and requires hashing twice doens't clone the string 🤔 There may be a more clever way to avoid cloning the id Also, I see the current code clones `expr_id` as well, so this change is no worse. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org