(datafusion) branch main updated: Improve `CommonSubexprEliminate` rule with surely and conditionally evaluated stats (#11357)

alamb Fri, 12 Jul 2024 03:55:29 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new d542cbda8f Improve `CommonSubexprEliminate` rule with surely and 
conditionally evaluated stats (#11357)
d542cbda8f is described below

commit d542cbda8f17ba004de18bb107ecf1c8ec3266f6
Author: Peter Toth <[email protected]>
AuthorDate: Fri Jul 12 12:53:05 2024 +0200

    Improve `CommonSubexprEliminate` rule with surely and conditionally 
evaluated stats (#11357)
    
    * Improve `CommonSubexprEliminate` rule with surely and conditionally 
evaluated stats
    
    * remove expression tree hashing as no longer needed
    
    * address review comments
    
    * add negative tests
---
 datafusion/expr/src/expr.rs                        |  39 +++-
 .../optimizer/src/common_subexpr_eliminate.rs      | 256 +++++++++++++--------
 .../optimizer/src/optimize_projections/mod.rs      |  10 +-
 datafusion/sqllogictest/test_files/cse.slt         |  88 +++++--
 datafusion/sqllogictest/test_files/select.slt      |  20 +-
 .../sqllogictest/test_files/tpch/q14.slt.part      |  33 +--
 6 files changed, 298 insertions(+), 148 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index ecece6dbfc..a344e621dd 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -17,7 +17,7 @@
 
 //! Logical Expressions: [`Expr`]
 
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::fmt::{self, Display, Formatter, Write};
 use std::hash::{Hash, Hasher};
 use std::mem;
@@ -1380,7 +1380,7 @@ impl Expr {
     /// // refs contains "a" and "b"
     /// assert_eq!(refs.len(), 2);
     /// assert!(refs.contains(&Column::new_unqualified("a")));
-    ///  assert!(refs.contains(&Column::new_unqualified("b")));
+    /// assert!(refs.contains(&Column::new_unqualified("b")));
     /// ```
     pub fn column_refs(&self) -> HashSet<&Column> {
         let mut using_columns = HashSet::new();
@@ -1401,6 +1401,41 @@ impl Expr {
         .expect("traversal is infallable");
     }
 
+    /// Return all references to columns and their occurrence counts in the 
expression.
+    ///
+    /// # Example
+    /// ```
+    /// # use std::collections::HashMap;
+    /// # use datafusion_common::Column;
+    /// # use datafusion_expr::col;
+    /// // For an expression `a + (b * a)`
+    /// let expr = col("a") + (col("b") * col("a"));
+    /// let mut refs = expr.column_refs_counts();
+    /// // refs contains "a" and "b"
+    /// assert_eq!(refs.len(), 2);
+    /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
+    /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
+    /// ```
+    pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
+        let mut map = HashMap::new();
+        self.add_column_ref_counts(&mut map);
+        map
+    }
+
+    /// Adds references to all columns and their occurrence counts in the 
expression to
+    /// the map.
+    ///
+    /// See [`Self::column_refs_counts`] for details
+    pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, 
usize>) {
+        self.apply(|expr| {
+            if let Expr::Column(col) = expr {
+                *map.entry(col).or_default() += 1;
+            }
+            Ok(TreeNodeRecursion::Continue)
+        })
+        .expect("traversal is infallable");
+    }
+
     /// Returns true if there are any column references in this Expr
     pub fn any_column_refs(&self) -> bool {
         self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs 
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 721987b917..e4b3665297 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -33,12 +33,12 @@ use datafusion_common::tree_node::{
 use datafusion_common::{
     internal_datafusion_err, qualified_name, Column, DFSchema, DFSchemaRef, 
Result,
 };
-use datafusion_expr::expr::Alias;
+use datafusion_expr::expr::{Alias, ScalarFunction};
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{
     Aggregate, Filter, LogicalPlan, Projection, Sort, Window,
 };
-use datafusion_expr::{col, Expr, ExprSchemable};
+use datafusion_expr::{col, BinaryExpr, Case, Expr, ExprSchemable, Operator};
 use indexmap::IndexMap;
 
 const CSE_PREFIX: &str = "__common_expr";
@@ -56,13 +56,9 @@ struct Identifier<'n> {
 }
 
 impl<'n> Identifier<'n> {
-    fn new(expr: &'n Expr, is_tree: bool, random_state: &RandomState) -> Self {
+    fn new(expr: &'n Expr, random_state: &RandomState) -> Self {
         let mut hasher = random_state.build_hasher();
-        if is_tree {
-            expr.hash(&mut hasher);
-        } else {
-            expr.hash_node(&mut hasher);
-        }
+        expr.hash_node(&mut hasher);
         let hash = hasher.finish();
         Self { hash, expr }
     }
@@ -110,8 +106,9 @@ impl Hash for Identifier<'_> {
 /// ```
 type IdArray<'n> = Vec<(usize, Option<Identifier<'n>>)>;
 
-/// A map that contains the number of occurrences of expressions by their 
identifiers.
-type ExprStats<'n> = HashMap<Identifier<'n>, usize>;
+/// A map that contains the number of normal and conditional occurrences of 
expressions by
+/// their identifiers.
+type ExprStats<'n> = HashMap<Identifier<'n>, (usize, usize)>;
 
 /// A map that contains the common expressions and their alias extracted 
during the
 /// second, rewriting traversal.
@@ -200,6 +197,7 @@ impl CommonSubexprEliminate {
             expr_mask,
             random_state: &self.random_state,
             found_common: false,
+            conditional: false,
         };
         expr.visit(&mut visitor)?;
 
@@ -901,15 +899,17 @@ struct ExprIdentifierVisitor<'a, 'n> {
     random_state: &'a RandomState,
     // a flag to indicate that common expression found
     found_common: bool,
+    // if we are in a conditional branch. A conditional branch means that the 
expression
+    // might not be executed depending on the runtime values of other 
expressions, and
+    // thus can not be extracted as a common expression.
+    conditional: bool,
 }
 
 /// Record item that used when traversing an expression tree.
 enum VisitRecord<'n> {
     /// Marks the beginning of expression. It contains:
     /// - The post-order index assigned during the first, visiting traversal.
-    /// - A boolean flag if the record marks an expression subtree (not just a 
single
-    ///   node).
-    EnterMark(usize, bool),
+    EnterMark(usize),
 
     /// Marks an accumulated subexpression tree. It contains:
     /// - The accumulated identifier of a subexpression.
@@ -924,10 +924,6 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
     /// Find the first `EnterMark` in the stack, and accumulates every 
`ExprItem` before
     /// it. Returns a tuple that contains:
     /// - The pre-order index of the expression we marked.
-    /// - A boolean flag if we marked an expression subtree (not just a single 
node).
-    ///   If true we didn't recurse into the node's children, so we need to 
calculate the
-    ///   hash of the marked expression tree (not just the node) and we need 
to validate
-    ///   the expression tree (not just the node).
     /// - The accumulated identifier of the children of the marked expression.
     /// - An accumulated boolean flag from the children of the marked 
expression if all
     ///   children are valid for subexpression elimination (i.e. it is safe to 
extract the
@@ -937,14 +933,14 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
     ///   information up from children to parents via `visit_stack` during the 
first,
     ///   visiting traversal and no need to test the expression's validity 
beforehand with
     ///   an extra traversal).
-    fn pop_enter_mark(&mut self) -> (usize, bool, Option<Identifier<'n>>, 
bool) {
+    fn pop_enter_mark(&mut self) -> (usize, Option<Identifier<'n>>, bool) {
         let mut expr_id = None;
         let mut is_valid = true;
 
         while let Some(item) = self.visit_stack.pop() {
             match item {
-                VisitRecord::EnterMark(down_index, is_tree) => {
-                    return (down_index, is_tree, expr_id, is_valid);
+                VisitRecord::EnterMark(down_index) => {
+                    return (down_index, expr_id, is_valid);
                 }
                 VisitRecord::ExprItem(sub_expr_id, sub_expr_is_valid) => {
                     expr_id = Some(sub_expr_id.combine(expr_id));
@@ -954,53 +950,112 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
         }
         unreachable!("Enter mark should paired with node number");
     }
+
+    /// Save the current `conditional` status and run `f` with `conditional` 
set to true.
+    fn conditionally<F: FnMut(&mut Self) -> Result<()>>(
+        &mut self,
+        mut f: F,
+    ) -> Result<()> {
+        let conditional = self.conditional;
+        self.conditional = true;
+        f(self)?;
+        self.conditional = conditional;
+
+        Ok(())
+    }
 }
 
 impl<'n> TreeNodeVisitor<'n> for ExprIdentifierVisitor<'_, 'n> {
     type Node = Expr;
 
     fn f_down(&mut self, expr: &'n Expr) -> Result<TreeNodeRecursion> {
-        // If an expression can short circuit its children then don't consider 
its
-        // children for CSE 
(https://github.com/apache/arrow-datafusion/issues/8814).
-        // This means that we don't recurse into its children, but handle the 
expression
-        // as a subtree when we calculate its identifier.
-        // TODO: consider surely executed children of "short circuited"s for 
CSE
-        let is_tree = expr.short_circuits();
-        let tnr = if is_tree {
-            TreeNodeRecursion::Jump
-        } else {
-            TreeNodeRecursion::Continue
-        };
-
         self.id_array.push((0, None));
         self.visit_stack
-            .push(VisitRecord::EnterMark(self.down_index, is_tree));
+            .push(VisitRecord::EnterMark(self.down_index));
         self.down_index += 1;
 
-        Ok(tnr)
+        // If an expression can short-circuit then some of its children might 
not be
+        // executed so count the occurrence of subexpressions as conditional 
in all
+        // children.
+        Ok(match expr {
+            // If we are already in a conditionally evaluated subtree then 
continue
+            // traversal.
+            _ if self.conditional => TreeNodeRecursion::Continue,
+
+            // In case of `ScalarFunction`s we don't know which children are 
surely
+            // executed so start visiting all children conditionally and stop 
the
+            // recursion with `TreeNodeRecursion::Jump`.
+            Expr::ScalarFunction(ScalarFunction { func, args })
+                if func.short_circuits() =>
+            {
+                self.conditionally(|visitor| {
+                    args.iter().try_for_each(|e| e.visit(visitor).map(|_| ()))
+                })?;
+
+                TreeNodeRecursion::Jump
+            }
+
+            // In case of `And` and `Or` the first child is surely executed, 
but we
+            // account subexpressions as conditional in the second.
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::And | Operator::Or,
+                right,
+            }) => {
+                left.visit(self)?;
+                self.conditionally(|visitor| right.visit(visitor).map(|_| 
()))?;
+
+                TreeNodeRecursion::Jump
+            }
+
+            // In case of `Case` the optional base expression and the first 
when
+            // expressions are surely executed, but we account subexpressions 
as
+            // conditional in the others.
+            Expr::Case(Case {
+                expr,
+                when_then_expr,
+                else_expr,
+            }) => {
+                expr.iter().try_for_each(|e| e.visit(self).map(|_| ()))?;
+                when_then_expr.iter().take(1).try_for_each(|(when, then)| {
+                    when.visit(self)?;
+                    self.conditionally(|visitor| then.visit(visitor).map(|_| 
()))
+                })?;
+                self.conditionally(|visitor| {
+                    when_then_expr.iter().skip(1).try_for_each(|(when, then)| {
+                        when.visit(visitor)?;
+                        then.visit(visitor).map(|_| ())
+                    })?;
+                    else_expr
+                        .iter()
+                        .try_for_each(|e| e.visit(visitor).map(|_| ()))
+                })?;
+
+                TreeNodeRecursion::Jump
+            }
+
+            // In case of non-short-circuit expressions continue the traversal.
+            _ => TreeNodeRecursion::Continue,
+        })
     }
 
     fn f_up(&mut self, expr: &'n Expr) -> Result<TreeNodeRecursion> {
-        let (down_index, is_tree, sub_expr_id, sub_expr_is_valid) = 
self.pop_enter_mark();
+        let (down_index, sub_expr_id, sub_expr_is_valid) = 
self.pop_enter_mark();
 
-        let (expr_id, is_valid) = if is_tree {
-            (
-                Identifier::new(expr, true, self.random_state),
-                !expr.is_volatile()?,
-            )
-        } else {
-            (
-                Identifier::new(expr, false, 
self.random_state).combine(sub_expr_id),
-                !expr.is_volatile_node() && sub_expr_is_valid,
-            )
-        };
+        let expr_id = Identifier::new(expr, 
self.random_state).combine(sub_expr_id);
+        let is_valid = !expr.is_volatile_node() && sub_expr_is_valid;
 
         self.id_array[down_index].0 = self.up_index;
         if is_valid && !self.expr_mask.ignores(expr) {
             self.id_array[down_index].1 = Some(expr_id);
-            let count = self.expr_stats.entry(expr_id).or_insert(0);
-            *count += 1;
-            if *count > 1 {
+            let (count, conditional_count) =
+                self.expr_stats.entry(expr_id).or_insert((0, 0));
+            if self.conditional {
+                *conditional_count += 1;
+            } else {
+                *count += 1;
+            }
+            if *count > 1 || (*count == 1 && *conditional_count > 0) {
                 self.found_common = true;
             }
         }
@@ -1039,51 +1094,40 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_, '_> 
{
             self.alias_counter += 1;
         }
 
-        // The `CommonSubexprRewriter` relies on `ExprIdentifierVisitor` to 
generate the
-        // `id_array`, which records the expr's identifier used to rewrite 
expr. So if we
-        // skip an expr in `ExprIdentifierVisitor`, we should skip it here, 
too.
-        let is_tree = expr.short_circuits();
-        let tnr = if is_tree {
-            TreeNodeRecursion::Jump
-        } else {
-            TreeNodeRecursion::Continue
-        };
-
         let (up_index, expr_id) = self.id_array[self.down_index];
         self.down_index += 1;
 
-        // skip `Expr`s without identifier (empty identifier).
-        let Some(expr_id) = expr_id else {
-            return Ok(Transformed::new(expr, false, tnr));
-        };
-
-        let count = self.expr_stats.get(&expr_id).unwrap();
-        if *count > 1 {
-            // step index to skip all sub-node (which has smaller series 
number).
-            while self.down_index < self.id_array.len()
-                && self.id_array[self.down_index].0 < up_index
-            {
-                self.down_index += 1;
-            }
+        // Handle `Expr`s with identifiers only
+        if let Some(expr_id) = expr_id {
+            let (count, conditional_count) = 
self.expr_stats.get(&expr_id).unwrap();
+            if *count > 1 || *count == 1 && *conditional_count > 0 {
+                // step index to skip all sub-node (which has smaller series 
number).
+                while self.down_index < self.id_array.len()
+                    && self.id_array[self.down_index].0 < up_index
+                {
+                    self.down_index += 1;
+                }
 
-            let expr_name = expr.display_name()?;
-            let (_, expr_alias) = 
self.common_exprs.entry(expr_id).or_insert_with(|| {
-                let expr_alias = self.alias_generator.next(CSE_PREFIX);
-                (expr, expr_alias)
-            });
+                let expr_name = expr.display_name()?;
+                let (_, expr_alias) =
+                    self.common_exprs.entry(expr_id).or_insert_with(|| {
+                        let expr_alias = self.alias_generator.next(CSE_PREFIX);
+                        (expr, expr_alias)
+                    });
 
-            // alias the expressions without an `Alias` ancestor node
-            let rewritten = if self.alias_counter > 0 {
-                col(expr_alias.clone())
-            } else {
-                self.alias_counter += 1;
-                col(expr_alias.clone()).alias(expr_name)
-            };
+                // alias the expressions without an `Alias` ancestor node
+                let rewritten = if self.alias_counter > 0 {
+                    col(expr_alias.clone())
+                } else {
+                    self.alias_counter += 1;
+                    col(expr_alias.clone()).alias(expr_name)
+                };
 
-            Ok(Transformed::new(rewritten, true, TreeNodeRecursion::Jump))
-        } else {
-            Ok(Transformed::new(expr, false, tnr))
+                return Ok(Transformed::new(rewritten, true, 
TreeNodeRecursion::Jump));
+            }
         }
+
+        Ok(Transformed::no(expr))
     }
 
     fn f_up(&mut self, expr: Expr) -> Result<Transformed<Self::Node>> {
@@ -1685,7 +1729,7 @@ mod test {
             .unwrap();
         let rule = CommonSubexprEliminate::new();
         let optimized_plan = rule.rewrite(plan, 
&OptimizerContext::new()).unwrap();
-        assert!(!optimized_plan.transformed);
+        assert!(optimized_plan.transformed);
         let optimized_plan = optimized_plan.data;
 
         let schema = optimized_plan.schema();
@@ -1837,22 +1881,29 @@ mod test {
         let table_scan = test_table_scan()?;
 
         let extracted_short_circuit = 
col("a").eq(lit(0)).or(col("b").eq(lit(0)));
-        let not_extracted_short_circuit_leg_1 = (col("a") + 
col("b")).eq(lit(0));
+        let extracted_short_circuit_leg_1 = (col("a") + col("b")).eq(lit(0));
         let not_extracted_short_circuit_leg_2 = (col("a") - 
col("b")).eq(lit(0));
+        let extracted_short_circuit_leg_3 = (col("a") * col("b")).eq(lit(0));
         let plan = LogicalPlanBuilder::from(table_scan.clone())
             .project(vec![
                 extracted_short_circuit.clone().alias("c1"),
                 extracted_short_circuit.alias("c2"),
-                not_extracted_short_circuit_leg_1.clone().alias("c3"),
-                not_extracted_short_circuit_leg_2.clone().alias("c4"),
-                not_extracted_short_circuit_leg_1
-                    .or(not_extracted_short_circuit_leg_2)
+                extracted_short_circuit_leg_1
+                    .clone()
+                    .or(not_extracted_short_circuit_leg_2.clone())
+                    .alias("c3"),
+                extracted_short_circuit_leg_1
+                    .and(not_extracted_short_circuit_leg_2)
+                    .alias("c4"),
+                extracted_short_circuit_leg_3
+                    .clone()
+                    .or(extracted_short_circuit_leg_3.clone())
                     .alias("c5"),
             ])?
             .build()?;
 
-        let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS 
c2, test.a + test.b = Int32(0) AS c3, test.a - test.b = Int32(0) AS c4, test.a 
+ test.b = Int32(0) OR test.a - test.b = Int32(0) AS c5\
-        \n  Projection: test.a = Int32(0) OR test.b = Int32(0) AS 
__common_expr_1, test.a, test.b, test.c\
+        let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS 
c2, __common_expr_2 OR test.a - test.b = Int32(0) AS c3, __common_expr_2 AND 
test.a - test.b = Int32(0) AS c4, __common_expr_3 OR __common_expr_3 AS c5\
+        \n  Projection: test.a = Int32(0) OR test.b = Int32(0) AS 
__common_expr_1, test.a + test.b = Int32(0) AS __common_expr_2, test.a * test.b 
= Int32(0) AS __common_expr_3, test.a, test.b, test.c\
         \n    TableScan: test";
 
         assert_optimized_plan_eq(expected, plan, None);
@@ -1888,10 +1939,12 @@ mod test {
         let table_scan = test_table_scan()?;
 
         let rand = rand_func().call(vec![]);
-        let not_extracted_volatile_short_circuit_2 =
-            rand.clone().eq(lit(0)).or(col("b").eq(lit(0)));
+        let extracted_short_circuit_leg_1 = col("a").eq(lit(0));
         let not_extracted_volatile_short_circuit_1 =
-            col("a").eq(lit(0)).or(rand.eq(lit(0)));
+            extracted_short_circuit_leg_1.or(rand.clone().eq(lit(0)));
+        let not_extracted_short_circuit_leg_2 = col("b").eq(lit(0));
+        let not_extracted_volatile_short_circuit_2 =
+            rand.eq(lit(0)).or(not_extracted_short_circuit_leg_2);
         let plan = LogicalPlanBuilder::from(table_scan.clone())
             .project(vec![
                 not_extracted_volatile_short_circuit_1.clone().alias("c1"),
@@ -1901,10 +1954,11 @@ mod test {
             ])?
             .build()?;
 
-        let expected = "Projection: test.a = Int32(0) OR random() = Int32(0) 
AS c1, test.a = Int32(0) OR random() = Int32(0) AS c2, random() = Int32(0) OR 
test.b = Int32(0) AS c3, random() = Int32(0) OR test.b = Int32(0) AS c4\
-        \n  TableScan: test";
+        let expected = "Projection: __common_expr_1 OR random() = Int32(0) AS 
c1, __common_expr_1 OR random() = Int32(0) AS c2, random() = Int32(0) OR test.b 
= Int32(0) AS c3, random() = Int32(0) OR test.b = Int32(0) AS c4\
+        \n  Projection: test.a = Int32(0) AS __common_expr_1, test.a, test.b, 
test.c\
+        \n    TableScan: test";
 
-        assert_non_optimized_plan_eq(expected, plan, None);
+        assert_optimized_plan_eq(expected, plan, None);
 
         Ok(())
     }
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs 
b/datafusion/optimizer/src/optimize_projections/mod.rs
index cae2a7b2ca..58c1ae297b 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -19,7 +19,7 @@
 
 mod required_indices;
 
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
 use crate::optimizer::ApplyOrder;
@@ -42,7 +42,6 @@ use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion,
 };
 use datafusion_expr::logical_plan::tree_node::unwrap_arc;
-use hashbrown::HashMap;
 
 /// Optimizer rule to prune unnecessary columns from intermediate schemas
 /// inside the [`LogicalPlan`]. This rule:
@@ -472,11 +471,8 @@ fn merge_consecutive_projections(proj: Projection) -> 
Result<Transformed<Project
 
     // Count usages (referrals) of each projection expression in its input 
fields:
     let mut column_referral_map = HashMap::<&Column, usize>::new();
-    for columns in expr.iter().map(|expr| expr.column_refs()) {
-        for col in columns.into_iter() {
-            *column_referral_map.entry(col).or_default() += 1;
-        }
-    }
+    expr.iter()
+        .for_each(|expr| expr.add_column_ref_counts(&mut column_referral_map));
 
     // If an expression is non-trivial and appears more than once, do not merge
     // them as consecutive projections will benefit from a compute-once 
approach.
diff --git a/datafusion/sqllogictest/test_files/cse.slt 
b/datafusion/sqllogictest/test_files/cse.slt
index 3579c1c163..19b47fa50e 100644
--- a/datafusion/sqllogictest/test_files/cse.slt
+++ b/datafusion/sqllogictest/test_files/cse.slt
@@ -93,15 +93,16 @@ FROM t1
 ----
 logical_plan
 01)Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, __common_expr_2 
AS c3, __common_expr_2 AS c4, __common_expr_3 AS c5, __common_expr_3 AS c6
-02)--Projection: t1.a = Float64(0) AND t1.b = Float64(0) AS __common_expr_1, 
t1.a = Float64(0) OR t1.b = Float64(0) AS __common_expr_2, CASE WHEN t1.a = 
Float64(0) THEN Int64(0) ELSE Int64(1) END AS __common_expr_3
-03)----TableScan: t1 projection=[a, b]
+02)--Projection: __common_expr_4 AND t1.b = Float64(0) AS __common_expr_1, 
__common_expr_4 OR t1.b = Float64(0) AS __common_expr_2, CASE WHEN 
__common_expr_4 THEN Int64(0) ELSE Int64(1) END AS __common_expr_3
+03)----Projection: t1.a = Float64(0) AS __common_expr_4, t1.b
+04)------TableScan: t1 projection=[a, b]
 physical_plan
 01)ProjectionExec: expr=[__common_expr_1@0 as c1, __common_expr_1@0 as c2, 
__common_expr_2@1 as c3, __common_expr_2@1 as c4, __common_expr_3@2 as c5, 
__common_expr_3@2 as c6]
-02)--ProjectionExec: expr=[a@0 = 0 AND b@1 = 0 as __common_expr_1, a@0 = 0 OR 
b@1 = 0 as __common_expr_2, CASE WHEN a@0 = 0 THEN 0 ELSE 1 END as 
__common_expr_3]
-03)----MemoryExec: partitions=1, partition_sizes=[0]
+02)--ProjectionExec: expr=[__common_expr_4@0 AND b@1 = 0 as __common_expr_1, 
__common_expr_4@0 OR b@1 = 0 as __common_expr_2, CASE WHEN __common_expr_4@0 
THEN 0 ELSE 1 END as __common_expr_3]
+03)----ProjectionExec: expr=[a@0 = 0 as __common_expr_4, b@1 as b]
+04)------MemoryExec: partitions=1, partition_sizes=[0]
 
 # Common children of short-circuit expression
-# TODO: consider surely executed children of "short circuited"s for CSE. i.e. 
`a = 0`, `a = 2`, `a = 4` should be extracted
 query TT
 EXPLAIN SELECT
     a = 0 AND b = 0 AS c1,
@@ -121,14 +122,15 @@ EXPLAIN SELECT
 FROM t1
 ----
 logical_plan
-01)Projection: t1.a = Float64(0) AND t1.b = Float64(0) AS c1, t1.a = 
Float64(0) AND t1.b = Float64(1) AS c2, t1.b = Float64(2) AND t1.a = Float64(1) 
AS c3, t1.b = Float64(3) AND t1.a = Float64(1) AS c4, t1.a = Float64(2) OR t1.b 
= Float64(4) AS c5, t1.a = Float64(2) OR t1.b = Float64(5) AS c6, t1.b = 
Float64(6) OR t1.a = Float64(3) AS c7, t1.b = Float64(7) OR t1.a = Float64(3) 
AS c8, CASE WHEN t1.a = Float64(4) THEN Int64(0) ELSE Int64(1) END AS c9, CASE 
WHEN t1.a = Float64(4) THEN Int64 [...]
-02)--TableScan: t1 projection=[a, b]
+01)Projection: __common_expr_1 AND t1.b = Float64(0) AS c1, __common_expr_1 
AND t1.b = Float64(1) AS c2, t1.b = Float64(2) AND t1.a = Float64(1) AS c3, 
t1.b = Float64(3) AND t1.a = Float64(1) AS c4, __common_expr_2 OR t1.b = 
Float64(4) AS c5, __common_expr_2 OR t1.b = Float64(5) AS c6, t1.b = Float64(6) 
OR t1.a = Float64(3) AS c7, t1.b = Float64(7) OR t1.a = Float64(3) AS c8, CASE 
WHEN __common_expr_3 THEN Int64(0) ELSE Int64(1) END AS c9, CASE WHEN 
__common_expr_3 THEN Int64(0) ELSE Int [...]
+02)--Projection: t1.a = Float64(0) AS __common_expr_1, t1.a = Float64(2) AS 
__common_expr_2, t1.a = Float64(4) AS __common_expr_3, t1.a, t1.b
+03)----TableScan: t1 projection=[a, b]
 physical_plan
-01)ProjectionExec: expr=[a@0 = 0 AND b@1 = 0 as c1, a@0 = 0 AND b@1 = 1 as c2, 
b@1 = 2 AND a@0 = 1 as c3, b@1 = 3 AND a@0 = 1 as c4, a@0 = 2 OR b@1 = 4 as c5, 
a@0 = 2 OR b@1 = 5 as c6, b@1 = 6 OR a@0 = 3 as c7, b@1 = 7 OR a@0 = 3 as c8, 
CASE WHEN a@0 = 4 THEN 0 ELSE 1 END as c9, CASE WHEN a@0 = 4 THEN 0 ELSE 2 END 
as c10, CASE WHEN b@1 = 8 THEN a@0 + 1 ELSE 0 END as c11, CASE WHEN b@1 = 9 
THEN a@0 + 1 ELSE 0 END as c12, CASE WHEN b@1 = 10 THEN 0 ELSE a@0 + 2 END as 
c13, CASE WHEN b@1 = 1 [...]
-02)--MemoryExec: partitions=1, partition_sizes=[0]
+01)ProjectionExec: expr=[__common_expr_1@0 AND b@4 = 0 as c1, 
__common_expr_1@0 AND b@4 = 1 as c2, b@4 = 2 AND a@3 = 1 as c3, b@4 = 3 AND a@3 
= 1 as c4, __common_expr_2@1 OR b@4 = 4 as c5, __common_expr_2@1 OR b@4 = 5 as 
c6, b@4 = 6 OR a@3 = 3 as c7, b@4 = 7 OR a@3 = 3 as c8, CASE WHEN 
__common_expr_3@2 THEN 0 ELSE 1 END as c9, CASE WHEN __common_expr_3@2 THEN 0 
ELSE 2 END as c10, CASE WHEN b@4 = 8 THEN a@3 + 1 ELSE 0 END as c11, CASE WHEN 
b@4 = 9 THEN a@3 + 1 ELSE 0 END as c12, CASE WHE [...]
+02)--ProjectionExec: expr=[a@0 = 0 as __common_expr_1, a@0 = 2 as 
__common_expr_2, a@0 = 4 as __common_expr_3, a@0 as a, b@1 as b]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
 
 # Common children of volatile, short-circuit expression
-# TODO: consider surely executed children of "short circuited"s for CSE. i.e. 
`a = 0`, `a = 2`, `a = 4` should be extracted
 query TT
 EXPLAIN SELECT
     a = 0 AND b = random() AS c1,
@@ -148,11 +150,13 @@ EXPLAIN SELECT
 FROM t1
 ----
 logical_plan
-01)Projection: t1.a = Float64(0) AND t1.b = random() AS c1, t1.a = Float64(0) 
AND t1.b = Float64(1) + random() AS c2, t1.b = Float64(2) + random() AND t1.a = 
Float64(1) AS c3, t1.b = Float64(3) + random() AND t1.a = Float64(1) AS c4, 
t1.a = Float64(2) OR t1.b = Float64(4) + random() AS c5, t1.a = Float64(2) OR 
t1.b = Float64(5) + random() AS c6, t1.b = Float64(6) + random() OR t1.a = 
Float64(3) AS c7, t1.b = Float64(7) + random() OR t1.a = Float64(3) AS c8, CASE 
WHEN t1.a = Float64(4) TH [...]
-02)--TableScan: t1 projection=[a, b]
+01)Projection: __common_expr_1 AND t1.b = random() AS c1, __common_expr_1 AND 
t1.b = Float64(1) + random() AS c2, t1.b = Float64(2) + random() AND t1.a = 
Float64(1) AS c3, t1.b = Float64(3) + random() AND t1.a = Float64(1) AS c4, 
__common_expr_2 OR t1.b = Float64(4) + random() AS c5, __common_expr_2 OR t1.b 
= Float64(5) + random() AS c6, t1.b = Float64(6) + random() OR t1.a = 
Float64(3) AS c7, t1.b = Float64(7) + random() OR t1.a = Float64(3) AS c8, CASE 
WHEN __common_expr_3 THEN random( [...]
+02)--Projection: t1.a = Float64(0) AS __common_expr_1, t1.a = Float64(2) AS 
__common_expr_2, t1.a = Float64(4) AS __common_expr_3, t1.a, t1.b
+03)----TableScan: t1 projection=[a, b]
 physical_plan
-01)ProjectionExec: expr=[a@0 = 0 AND b@1 = random() as c1, a@0 = 0 AND b@1 = 1 
+ random() as c2, b@1 = 2 + random() AND a@0 = 1 as c3, b@1 = 3 + random() AND 
a@0 = 1 as c4, a@0 = 2 OR b@1 = 4 + random() as c5, a@0 = 2 OR b@1 = 5 + 
random() as c6, b@1 = 6 + random() OR a@0 = 3 as c7, b@1 = 7 + random() OR a@0 
= 3 as c8, CASE WHEN a@0 = 4 THEN random() ELSE 1 END as c9, CASE WHEN a@0 = 4 
THEN random() ELSE 2 END as c10, CASE WHEN b@1 = 8 + random() THEN a@0 + 1 ELSE 
0 END as c11, CASE WHEN [...]
-02)--MemoryExec: partitions=1, partition_sizes=[0]
+01)ProjectionExec: expr=[__common_expr_1@0 AND b@4 = random() as c1, 
__common_expr_1@0 AND b@4 = 1 + random() as c2, b@4 = 2 + random() AND a@3 = 1 
as c3, b@4 = 3 + random() AND a@3 = 1 as c4, __common_expr_2@1 OR b@4 = 4 + 
random() as c5, __common_expr_2@1 OR b@4 = 5 + random() as c6, b@4 = 6 + 
random() OR a@3 = 3 as c7, b@4 = 7 + random() OR a@3 = 3 as c8, CASE WHEN 
__common_expr_3@2 THEN random() ELSE 1 END as c9, CASE WHEN __common_expr_3@2 
THEN random() ELSE 2 END as c10, CASE WHEN  [...]
+02)--ProjectionExec: expr=[a@0 = 0 as __common_expr_1, a@0 = 2 as 
__common_expr_2, a@0 = 4 as __common_expr_3, a@0 as a, b@1 as b]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
 
 # Common volatile children of short-circuit expression
 query TT
@@ -171,3 +175,59 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[a@0 = random() AND b@1 = 0 as c1, a@0 = random() AND 
b@1 = 1 as c2, a@0 = 2 + random() OR b@1 = 4 as c3, a@0 = 2 + random() OR b@1 = 
5 as c4, CASE WHEN a@0 = 4 + random() THEN 0 ELSE 1 END as c5, CASE WHEN a@0 = 
4 + random() THEN 0 ELSE 2 END as c6]
 02)--MemoryExec: partitions=1, partition_sizes=[0]
+
+# Surely only once but also conditionally evaluated expressions
+query TT
+EXPLAIN SELECT
+    (a = 1 OR random() = 0) AND a = 1 AS c1,
+    (a = 2 AND random() = 0) OR a = 2 AS c2,
+    CASE WHEN a + 3 = 0 THEN a + 3 ELSE 0 END AS c3,
+    CASE WHEN a + 4 = 0 THEN 0 WHEN a + 4 THEN 0 ELSE 0 END AS c4,
+    CASE WHEN a + 5 = 0 THEN 0 WHEN random() = 0 THEN a + 5 ELSE 0 END AS c5,
+    CASE WHEN a + 6 = 0 THEN 0 ELSE a + 6 END AS c6
+FROM t1
+----
+logical_plan
+01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_1 
AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AS c2, CASE 
WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS 
c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN 
CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE 
WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) 
THEN __common_expr_5 ELSE Float64(0) END [...]
+02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS 
__common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS 
__common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS 
__common_expr_6
+03)----TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND 
__common_expr_1@0 as c1, __common_expr_2@1 AND random() = 0 OR 
__common_expr_2@1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 
ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN 
CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN 
__common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 
END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 EN 
[...]
+02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as 
__common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 + 
5 as __common_expr_5, a@0 + 6 as __common_expr_6]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
+
+# Surely only once but also conditionally evaluated subexpressions
+query TT
+EXPLAIN SELECT
+    (a = 1 OR random() = 0) AND (a = 1 OR random() = 1) AS c1,
+    (a = 2 AND random() = 0) OR (a = 2 AND random() = 1) AS c2,
+    CASE WHEN a + 3 = 0 THEN a + 3 + random() ELSE 0 END AS c3,
+    CASE WHEN a + 4 = 0 THEN 0 ELSE a + 4 + random() END AS c4
+FROM t1
+----
+logical_plan
+01)Projection: (__common_expr_1 OR random() = Float64(0)) AND (__common_expr_1 
OR random() = Float64(1)) AS c1, __common_expr_2 AND random() = Float64(0) OR 
__common_expr_2 AND random() = Float64(1) AS c2, CASE WHEN __common_expr_3 = 
Float64(0) THEN __common_expr_3 + random() ELSE Float64(0) END AS c3, CASE WHEN 
__common_expr_4 = Float64(0) THEN Float64(0) ELSE __common_expr_4 + random() 
END AS c4
+02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS 
__common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS 
__common_expr_4
+03)----TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND 
(__common_expr_1@0 OR random() = 1) as c1, __common_expr_2@1 AND random() = 0 
OR __common_expr_2@1 AND random() = 1 as c2, CASE WHEN __common_expr_3@2 = 0 
THEN __common_expr_3@2 + random() ELSE 0 END as c3, CASE WHEN __common_expr_4@3 
= 0 THEN 0 ELSE __common_expr_4@3 + random() END as c4]
+02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as 
__common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
+
+# Only conditionally evaluated expressions
+query TT
+EXPLAIN SELECT
+    (random() = 0 OR a = 1) AND a = 1 AS c1,
+    (random() = 0 AND a = 2) OR a = 2 AS c2,
+    CASE WHEN random() = 0 THEN a + 3 ELSE a + 3 END AS c3,
+    CASE WHEN random() = 0 THEN 0 WHEN a + 4 = 0 THEN a + 4 ELSE 0 END AS c4,
+    CASE WHEN random() = 0 THEN 0 WHEN a + 5 = 0 THEN 0 ELSE a + 5 END AS c5,
+    CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a + 6 ELSE a + 6 END 
AS c6
+FROM t1
+----
+logical_plan
+01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = 
Float64(1) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = 
Float64(2) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE 
t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) 
WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END 
AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = 
Float64(0) THEN Float64(0) ELSE t1. [...]
+02)--TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 1 as c1, random() 
= 0 AND a@0 = 2 OR a@0 = 2 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 
+ 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 
0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 
END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE 
a@0 + 6 END as c6]
+02)--MemoryExec: partitions=1, partition_sizes=[0]
diff --git a/datafusion/sqllogictest/test_files/select.slt 
b/datafusion/sqllogictest/test_files/select.slt
index f9baf8db69..95f67245a9 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1504,21 +1504,25 @@ query TT
 EXPLAIN SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t;
 ----
 logical_plan
-01)Projection: t.y > Int32(0) AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS 
t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND t.y > Int32(0) 
AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x > 
Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x
-02)--TableScan: t projection=[x, y]
+01)Projection: __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS 
t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND 
__common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS 
Int64) AS t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x
+02)--Projection: t.y > Int32(0) AS __common_expr_1, t.x, t.y
+03)----TableScan: t projection=[x, y]
 physical_plan
-01)ProjectionExec: expr=[y@1 > 0 AND 1 / CAST(y@1 AS Int64) < 1 as t.y > 
Int64(0) AND Int64(1) / t.y < Int64(1), x@0 > 0 AND y@1 > 0 AND 1 / CAST(y@1 AS 
Int64) < 1 / CAST(x@0 AS Int64) as t.x > Int64(0) AND t.y > Int64(0) AND 
Int64(1) / t.y < Int64(1) / t.x]
-02)--MemoryExec: partitions=1, partition_sizes=[1]
+01)ProjectionExec: expr=[__common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 as 
t.y > Int64(0) AND Int64(1) / t.y < Int64(1), x@1 > 0 AND __common_expr_1@0 AND 
1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x > Int64(0) AND t.y > 
Int64(0) AND Int64(1) / t.y < Int64(1) / t.x]
+02)--ProjectionExec: expr=[y@1 > 0 as __common_expr_1, x@0 as x, y@1 as y]
+03)----MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t;
 ----
 logical_plan
-01)Projection: t.y = Int32(0) OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS 
t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR t.y = Int32(0) 
OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x = 
Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x
-02)--TableScan: t projection=[x, y]
+01)Projection: __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS 
t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR __common_expr_1 
OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x = 
Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x
+02)--Projection: t.y = Int32(0) AS __common_expr_1, t.x, t.y
+03)----TableScan: t projection=[x, y]
 physical_plan
-01)ProjectionExec: expr=[y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 as t.y = 
Int64(0) OR Int64(1) / t.y < Int64(1), x@0 = 0 OR y@1 = 0 OR 1 / CAST(y@1 AS 
Int64) < 1 / CAST(x@0 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) 
/ t.y < Int64(1) / t.x]
-02)--MemoryExec: partitions=1, partition_sizes=[1]
+01)ProjectionExec: expr=[__common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 as 
t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@1 = 0 OR __common_expr_1@0 OR 1 
/ CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x = Int64(0) OR t.y = 
Int64(0) OR Int64(1) / t.y < Int64(1) / t.x]
+02)--ProjectionExec: expr=[y@1 = 0 as __common_expr_1, x@0 as x, y@1 as y]
+03)----MemoryExec: partitions=1, partition_sizes=[1]
 
 # due to the reason describe in 
https://github.com/apache/datafusion/issues/8927,
 # the following queries will fail
diff --git a/datafusion/sqllogictest/test_files/tpch/q14.slt.part 
b/datafusion/sqllogictest/test_files/tpch/q14.slt.part
index e56e463a61..3743c201ff 100644
--- a/datafusion/sqllogictest/test_files/tpch/q14.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q14.slt.part
@@ -32,9 +32,9 @@ where
   and l_shipdate < date '1995-10-01';
 ----
 logical_plan
-01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE 
Utf8("PROMO%")  THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount 
ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - 
lineitem.l_discount) AS Float64) AS promo_revenue
-02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE 
Utf8("PROMO%") THEN lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - 
lineitem.l_discount) ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN 
part.p_type LIKE Utf8("PROMO%")  THEN lineitem.l_extendedprice * Int64(1) - 
lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * 
(Decimal128(Some(1),20,0) - lineitem.l_discount)) AS 
sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
-03)----Projection: lineitem.l_extendedprice, lineitem.l_discount, part.p_type
+01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE 
Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount 
ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - 
lineitem.l_discount) AS Float64) AS promo_revenue
+02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE 
Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),38,4) END) AS 
sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * 
Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(__common_expr_1) AS 
sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+03)----Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - 
lineitem.l_discount) AS __common_expr_1, part.p_type
 04)------Inner Join: lineitem.l_partkey = part.p_partkey
 05)--------Projection: lineitem.l_partkey, lineitem.l_extendedprice, 
lineitem.l_discount
 06)----------Filter: lineitem.l_shipdate >= Date32("1995-09-01") AND 
lineitem.l_shipdate < Date32("1995-10-01")
@@ -44,19 +44,20 @@ physical_plan
 01)ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE 
Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount 
ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) 
- lineitem.l_discount)@1 AS Float64) as promo_revenue]
 02)--AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE 
Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount 
ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - 
lineitem.l_discount)]
 03)----CoalescePartitionsExec
-04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type 
LIKE Utf8("PROMO%")  THEN lineitem.l_extendedprice * Int64(1) - 
lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) 
- lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------HashJoinExec: mode=Partitioned, join_type=Inner, 
on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, 
p_type@4]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), 
input_partitions=4
-09)----------------ProjectionExec: expr=[l_partkey@0 as l_partkey, 
l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 
< 1995-10-01
-12)----------------------CsvExec: file_groups={4 groups: 
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]},
 projection=[l_partkey, l_extendedprice, l_discount, l_shipd [...]
-13)------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), 
input_partitions=4
-15)----------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
-16)------------------CsvExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, 
projection=[p_partkey, p_type], has_header=false
+04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type 
LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - 
lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) 
- lineitem.l_discount)]
+05)--------ProjectionExec: expr=[l_extendedprice@0 * (Some(1),20,0 - 
l_discount@1) as __common_expr_1, p_type@2 as p_type]
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------HashJoinExec: mode=Partitioned, join_type=Inner, 
on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, 
p_type@4]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), 
input_partitions=4
+10)------------------ProjectionExec: expr=[l_partkey@0 as l_partkey, 
l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
+11)--------------------CoalesceBatchesExec: target_batch_size=8192
+12)----------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND 
l_shipdate@3 < 1995-10-01
+13)------------------------CsvExec: file_groups={4 groups: 
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]},
 projection=[l_partkey, l_extendedprice, l_discount, l_shi [...]
+14)--------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), 
input_partitions=4
+16)------------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
+17)--------------------CsvExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, 
projection=[p_partkey, p_type], has_header=false
 
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: Improve `CommonSubexprEliminate` rule with surely and conditionally evaluated stats (#11357)

Reply via email to