peter-toth commented on code in PR #10396:
URL: https://github.com/apache/datafusion/pull/10396#discussion_r1592914848
##########
datafusion/optimizer/src/common_subexpr_eliminate.rs:
##########
@@ -34,90 +33,63 @@ use datafusion_common::{
use datafusion_expr::expr::Alias;
use datafusion_expr::logical_plan::{Aggregate, LogicalPlan, Projection,
Window};
use datafusion_expr::{col, Expr, ExprSchemable};
+use indexmap::IndexMap;
-/// Set of expressions generated by the [`ExprIdentifierVisitor`]
-/// and consumed by the [`CommonSubexprRewriter`].
-#[derive(Default)]
-struct ExprSet {
- /// A map from expression's identifier (stringified expr) to tuple
including:
- /// - the expression itself (cloned)
- /// - counter
- /// - DataType of this expression.
- /// - symbol used as the identifier in the alias.
- map: HashMap<Identifier, (Expr, usize, DataType, Identifier)>,
-}
-
-impl ExprSet {
- fn expr_identifier(expr: &Expr) -> Identifier {
- format!("{expr}")
- }
-
- fn get(&self, key: &Identifier) -> Option<&(Expr, usize, DataType,
Identifier)> {
- self.map.get(key)
- }
-
- fn entry(
- &mut self,
- key: Identifier,
- ) -> Entry<'_, Identifier, (Expr, usize, DataType, Identifier)> {
- self.map.entry(key)
- }
-
- fn populate_expr_set(
- &mut self,
- expr: &[Expr],
- input_schema: DFSchemaRef,
- expr_mask: ExprMask,
- ) -> Result<()> {
- expr.iter().try_for_each(|e| {
- self.expr_to_identifier(e, Arc::clone(&input_schema), expr_mask)?;
-
- Ok(())
- })
- }
-
- /// Go through an expression tree and generate identifier for every node
in this tree.
- fn expr_to_identifier(
- &mut self,
- expr: &Expr,
- input_schema: DFSchemaRef,
- expr_mask: ExprMask,
- ) -> Result<()> {
- expr.visit(&mut ExprIdentifierVisitor {
- expr_set: self,
- input_schema,
- visit_stack: vec![],
- node_count: 0,
- expr_mask,
- })?;
-
- Ok(())
- }
-}
-
-impl From<Vec<(Identifier, (Expr, usize, DataType, Identifier))>> for ExprSet {
- fn from(entries: Vec<(Identifier, (Expr, usize, DataType, Identifier))>)
-> Self {
- let mut expr_set = Self::default();
- entries.into_iter().for_each(|(k, v)| {
- expr_set.map.insert(k, v);
- });
- expr_set
- }
-}
-
-/// Identifier for each subexpression.
+/// Identifier that represents a subexpression tree.
///
-/// Note that the current implementation uses the `Display` of an expression
-/// (a `String`) as `Identifier`.
+/// Note that the current implementation contains:
+/// - the `Display` of an expression (a `String`) and
+/// - the identifiers of the childrens of the expression
Review Comment:
I added a comment here:
https://github.com/apache/datafusion/pull/10396#issuecomment-2099072200 that
identifiers as keys of the `ExprStats` map is a must have but identifiers as
aliases of exracted common expressions is not.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]