This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new d542cbda8f Improve `CommonSubexprEliminate` rule with surely and
conditionally evaluated stats (#11357)
d542cbda8f is described below
commit d542cbda8f17ba004de18bb107ecf1c8ec3266f6
Author: Peter Toth <[email protected]>
AuthorDate: Fri Jul 12 12:53:05 2024 +0200
Improve `CommonSubexprEliminate` rule with surely and conditionally
evaluated stats (#11357)
* Improve `CommonSubexprEliminate` rule with surely and conditionally
evaluated stats
* remove expression tree hashing as no longer needed
* address review comments
* add negative tests
---
datafusion/expr/src/expr.rs | 39 +++-
.../optimizer/src/common_subexpr_eliminate.rs | 256 +++++++++++++--------
.../optimizer/src/optimize_projections/mod.rs | 10 +-
datafusion/sqllogictest/test_files/cse.slt | 88 +++++--
datafusion/sqllogictest/test_files/select.slt | 20 +-
.../sqllogictest/test_files/tpch/q14.slt.part | 33 +--
6 files changed, 298 insertions(+), 148 deletions(-)
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index ecece6dbfc..a344e621dd 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -17,7 +17,7 @@
//! Logical Expressions: [`Expr`]
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
use std::fmt::{self, Display, Formatter, Write};
use std::hash::{Hash, Hasher};
use std::mem;
@@ -1380,7 +1380,7 @@ impl Expr {
/// // refs contains "a" and "b"
/// assert_eq!(refs.len(), 2);
/// assert!(refs.contains(&Column::new_unqualified("a")));
- /// assert!(refs.contains(&Column::new_unqualified("b")));
+ /// assert!(refs.contains(&Column::new_unqualified("b")));
/// ```
pub fn column_refs(&self) -> HashSet<&Column> {
let mut using_columns = HashSet::new();
@@ -1401,6 +1401,41 @@ impl Expr {
.expect("traversal is infallable");
}
+ /// Return all references to columns and their occurrence counts in the
expression.
+ ///
+ /// # Example
+ /// ```
+ /// # use std::collections::HashMap;
+ /// # use datafusion_common::Column;
+ /// # use datafusion_expr::col;
+ /// // For an expression `a + (b * a)`
+ /// let expr = col("a") + (col("b") * col("a"));
+ /// let mut refs = expr.column_refs_counts();
+ /// // refs contains "a" and "b"
+ /// assert_eq!(refs.len(), 2);
+ /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2);
+ /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1);
+ /// ```
+ pub fn column_refs_counts(&self) -> HashMap<&Column, usize> {
+ let mut map = HashMap::new();
+ self.add_column_ref_counts(&mut map);
+ map
+ }
+
+ /// Adds references to all columns and their occurrence counts in the
expression to
+ /// the map.
+ ///
+ /// See [`Self::column_refs_counts`] for details
+ pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column,
usize>) {
+ self.apply(|expr| {
+ if let Expr::Column(col) = expr {
+ *map.entry(col).or_default() += 1;
+ }
+ Ok(TreeNodeRecursion::Continue)
+ })
+ .expect("traversal is infallable");
+ }
+
/// Returns true if there are any column references in this Expr
pub fn any_column_refs(&self) -> bool {
self.exists(|expr| Ok(matches!(expr, Expr::Column(_))))
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 721987b917..e4b3665297 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -33,12 +33,12 @@ use datafusion_common::tree_node::{
use datafusion_common::{
internal_datafusion_err, qualified_name, Column, DFSchema, DFSchemaRef,
Result,
};
-use datafusion_expr::expr::Alias;
+use datafusion_expr::expr::{Alias, ScalarFunction};
use datafusion_expr::logical_plan::tree_node::unwrap_arc;
use datafusion_expr::logical_plan::{
Aggregate, Filter, LogicalPlan, Projection, Sort, Window,
};
-use datafusion_expr::{col, Expr, ExprSchemable};
+use datafusion_expr::{col, BinaryExpr, Case, Expr, ExprSchemable, Operator};
use indexmap::IndexMap;
const CSE_PREFIX: &str = "__common_expr";
@@ -56,13 +56,9 @@ struct Identifier<'n> {
}
impl<'n> Identifier<'n> {
- fn new(expr: &'n Expr, is_tree: bool, random_state: &RandomState) -> Self {
+ fn new(expr: &'n Expr, random_state: &RandomState) -> Self {
let mut hasher = random_state.build_hasher();
- if is_tree {
- expr.hash(&mut hasher);
- } else {
- expr.hash_node(&mut hasher);
- }
+ expr.hash_node(&mut hasher);
let hash = hasher.finish();
Self { hash, expr }
}
@@ -110,8 +106,9 @@ impl Hash for Identifier<'_> {
/// ```
type IdArray<'n> = Vec<(usize, Option<Identifier<'n>>)>;
-/// A map that contains the number of occurrences of expressions by their
identifiers.
-type ExprStats<'n> = HashMap<Identifier<'n>, usize>;
+/// A map that contains the number of normal and conditional occurrences of
expressions by
+/// their identifiers.
+type ExprStats<'n> = HashMap<Identifier<'n>, (usize, usize)>;
/// A map that contains the common expressions and their alias extracted
during the
/// second, rewriting traversal.
@@ -200,6 +197,7 @@ impl CommonSubexprEliminate {
expr_mask,
random_state: &self.random_state,
found_common: false,
+ conditional: false,
};
expr.visit(&mut visitor)?;
@@ -901,15 +899,17 @@ struct ExprIdentifierVisitor<'a, 'n> {
random_state: &'a RandomState,
// a flag to indicate that common expression found
found_common: bool,
+ // if we are in a conditional branch. A conditional branch means that the
expression
+ // might not be executed depending on the runtime values of other
expressions, and
+ // thus can not be extracted as a common expression.
+ conditional: bool,
}
/// Record item that used when traversing an expression tree.
enum VisitRecord<'n> {
/// Marks the beginning of expression. It contains:
/// - The post-order index assigned during the first, visiting traversal.
- /// - A boolean flag if the record marks an expression subtree (not just a
single
- /// node).
- EnterMark(usize, bool),
+ EnterMark(usize),
/// Marks an accumulated subexpression tree. It contains:
/// - The accumulated identifier of a subexpression.
@@ -924,10 +924,6 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
/// Find the first `EnterMark` in the stack, and accumulates every
`ExprItem` before
/// it. Returns a tuple that contains:
/// - The pre-order index of the expression we marked.
- /// - A boolean flag if we marked an expression subtree (not just a single
node).
- /// If true we didn't recurse into the node's children, so we need to
calculate the
- /// hash of the marked expression tree (not just the node) and we need
to validate
- /// the expression tree (not just the node).
/// - The accumulated identifier of the children of the marked expression.
/// - An accumulated boolean flag from the children of the marked
expression if all
/// children are valid for subexpression elimination (i.e. it is safe to
extract the
@@ -937,14 +933,14 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
/// information up from children to parents via `visit_stack` during the
first,
/// visiting traversal and no need to test the expression's validity
beforehand with
/// an extra traversal).
- fn pop_enter_mark(&mut self) -> (usize, bool, Option<Identifier<'n>>,
bool) {
+ fn pop_enter_mark(&mut self) -> (usize, Option<Identifier<'n>>, bool) {
let mut expr_id = None;
let mut is_valid = true;
while let Some(item) = self.visit_stack.pop() {
match item {
- VisitRecord::EnterMark(down_index, is_tree) => {
- return (down_index, is_tree, expr_id, is_valid);
+ VisitRecord::EnterMark(down_index) => {
+ return (down_index, expr_id, is_valid);
}
VisitRecord::ExprItem(sub_expr_id, sub_expr_is_valid) => {
expr_id = Some(sub_expr_id.combine(expr_id));
@@ -954,53 +950,112 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> {
}
unreachable!("Enter mark should paired with node number");
}
+
+ /// Save the current `conditional` status and run `f` with `conditional`
set to true.
+ fn conditionally<F: FnMut(&mut Self) -> Result<()>>(
+ &mut self,
+ mut f: F,
+ ) -> Result<()> {
+ let conditional = self.conditional;
+ self.conditional = true;
+ f(self)?;
+ self.conditional = conditional;
+
+ Ok(())
+ }
}
impl<'n> TreeNodeVisitor<'n> for ExprIdentifierVisitor<'_, 'n> {
type Node = Expr;
fn f_down(&mut self, expr: &'n Expr) -> Result<TreeNodeRecursion> {
- // If an expression can short circuit its children then don't consider
its
- // children for CSE
(https://github.com/apache/arrow-datafusion/issues/8814).
- // This means that we don't recurse into its children, but handle the
expression
- // as a subtree when we calculate its identifier.
- // TODO: consider surely executed children of "short circuited"s for
CSE
- let is_tree = expr.short_circuits();
- let tnr = if is_tree {
- TreeNodeRecursion::Jump
- } else {
- TreeNodeRecursion::Continue
- };
-
self.id_array.push((0, None));
self.visit_stack
- .push(VisitRecord::EnterMark(self.down_index, is_tree));
+ .push(VisitRecord::EnterMark(self.down_index));
self.down_index += 1;
- Ok(tnr)
+ // If an expression can short-circuit then some of its children might
not be
+ // executed so count the occurrence of subexpressions as conditional
in all
+ // children.
+ Ok(match expr {
+ // If we are already in a conditionally evaluated subtree then
continue
+ // traversal.
+ _ if self.conditional => TreeNodeRecursion::Continue,
+
+ // In case of `ScalarFunction`s we don't know which children are
surely
+ // executed so start visiting all children conditionally and stop
the
+ // recursion with `TreeNodeRecursion::Jump`.
+ Expr::ScalarFunction(ScalarFunction { func, args })
+ if func.short_circuits() =>
+ {
+ self.conditionally(|visitor| {
+ args.iter().try_for_each(|e| e.visit(visitor).map(|_| ()))
+ })?;
+
+ TreeNodeRecursion::Jump
+ }
+
+ // In case of `And` and `Or` the first child is surely executed,
but we
+ // account subexpressions as conditional in the second.
+ Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::And | Operator::Or,
+ right,
+ }) => {
+ left.visit(self)?;
+ self.conditionally(|visitor| right.visit(visitor).map(|_|
()))?;
+
+ TreeNodeRecursion::Jump
+ }
+
+ // In case of `Case` the optional base expression and the first
when
+ // expressions are surely executed, but we account subexpressions
as
+ // conditional in the others.
+ Expr::Case(Case {
+ expr,
+ when_then_expr,
+ else_expr,
+ }) => {
+ expr.iter().try_for_each(|e| e.visit(self).map(|_| ()))?;
+ when_then_expr.iter().take(1).try_for_each(|(when, then)| {
+ when.visit(self)?;
+ self.conditionally(|visitor| then.visit(visitor).map(|_|
()))
+ })?;
+ self.conditionally(|visitor| {
+ when_then_expr.iter().skip(1).try_for_each(|(when, then)| {
+ when.visit(visitor)?;
+ then.visit(visitor).map(|_| ())
+ })?;
+ else_expr
+ .iter()
+ .try_for_each(|e| e.visit(visitor).map(|_| ()))
+ })?;
+
+ TreeNodeRecursion::Jump
+ }
+
+ // In case of non-short-circuit expressions continue the traversal.
+ _ => TreeNodeRecursion::Continue,
+ })
}
fn f_up(&mut self, expr: &'n Expr) -> Result<TreeNodeRecursion> {
- let (down_index, is_tree, sub_expr_id, sub_expr_is_valid) =
self.pop_enter_mark();
+ let (down_index, sub_expr_id, sub_expr_is_valid) =
self.pop_enter_mark();
- let (expr_id, is_valid) = if is_tree {
- (
- Identifier::new(expr, true, self.random_state),
- !expr.is_volatile()?,
- )
- } else {
- (
- Identifier::new(expr, false,
self.random_state).combine(sub_expr_id),
- !expr.is_volatile_node() && sub_expr_is_valid,
- )
- };
+ let expr_id = Identifier::new(expr,
self.random_state).combine(sub_expr_id);
+ let is_valid = !expr.is_volatile_node() && sub_expr_is_valid;
self.id_array[down_index].0 = self.up_index;
if is_valid && !self.expr_mask.ignores(expr) {
self.id_array[down_index].1 = Some(expr_id);
- let count = self.expr_stats.entry(expr_id).or_insert(0);
- *count += 1;
- if *count > 1 {
+ let (count, conditional_count) =
+ self.expr_stats.entry(expr_id).or_insert((0, 0));
+ if self.conditional {
+ *conditional_count += 1;
+ } else {
+ *count += 1;
+ }
+ if *count > 1 || (*count == 1 && *conditional_count > 0) {
self.found_common = true;
}
}
@@ -1039,51 +1094,40 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_, '_>
{
self.alias_counter += 1;
}
- // The `CommonSubexprRewriter` relies on `ExprIdentifierVisitor` to
generate the
- // `id_array`, which records the expr's identifier used to rewrite
expr. So if we
- // skip an expr in `ExprIdentifierVisitor`, we should skip it here,
too.
- let is_tree = expr.short_circuits();
- let tnr = if is_tree {
- TreeNodeRecursion::Jump
- } else {
- TreeNodeRecursion::Continue
- };
-
let (up_index, expr_id) = self.id_array[self.down_index];
self.down_index += 1;
- // skip `Expr`s without identifier (empty identifier).
- let Some(expr_id) = expr_id else {
- return Ok(Transformed::new(expr, false, tnr));
- };
-
- let count = self.expr_stats.get(&expr_id).unwrap();
- if *count > 1 {
- // step index to skip all sub-node (which has smaller series
number).
- while self.down_index < self.id_array.len()
- && self.id_array[self.down_index].0 < up_index
- {
- self.down_index += 1;
- }
+ // Handle `Expr`s with identifiers only
+ if let Some(expr_id) = expr_id {
+ let (count, conditional_count) =
self.expr_stats.get(&expr_id).unwrap();
+ if *count > 1 || *count == 1 && *conditional_count > 0 {
+ // step index to skip all sub-node (which has smaller series
number).
+ while self.down_index < self.id_array.len()
+ && self.id_array[self.down_index].0 < up_index
+ {
+ self.down_index += 1;
+ }
- let expr_name = expr.display_name()?;
- let (_, expr_alias) =
self.common_exprs.entry(expr_id).or_insert_with(|| {
- let expr_alias = self.alias_generator.next(CSE_PREFIX);
- (expr, expr_alias)
- });
+ let expr_name = expr.display_name()?;
+ let (_, expr_alias) =
+ self.common_exprs.entry(expr_id).or_insert_with(|| {
+ let expr_alias = self.alias_generator.next(CSE_PREFIX);
+ (expr, expr_alias)
+ });
- // alias the expressions without an `Alias` ancestor node
- let rewritten = if self.alias_counter > 0 {
- col(expr_alias.clone())
- } else {
- self.alias_counter += 1;
- col(expr_alias.clone()).alias(expr_name)
- };
+ // alias the expressions without an `Alias` ancestor node
+ let rewritten = if self.alias_counter > 0 {
+ col(expr_alias.clone())
+ } else {
+ self.alias_counter += 1;
+ col(expr_alias.clone()).alias(expr_name)
+ };
- Ok(Transformed::new(rewritten, true, TreeNodeRecursion::Jump))
- } else {
- Ok(Transformed::new(expr, false, tnr))
+ return Ok(Transformed::new(rewritten, true,
TreeNodeRecursion::Jump));
+ }
}
+
+ Ok(Transformed::no(expr))
}
fn f_up(&mut self, expr: Expr) -> Result<Transformed<Self::Node>> {
@@ -1685,7 +1729,7 @@ mod test {
.unwrap();
let rule = CommonSubexprEliminate::new();
let optimized_plan = rule.rewrite(plan,
&OptimizerContext::new()).unwrap();
- assert!(!optimized_plan.transformed);
+ assert!(optimized_plan.transformed);
let optimized_plan = optimized_plan.data;
let schema = optimized_plan.schema();
@@ -1837,22 +1881,29 @@ mod test {
let table_scan = test_table_scan()?;
let extracted_short_circuit =
col("a").eq(lit(0)).or(col("b").eq(lit(0)));
- let not_extracted_short_circuit_leg_1 = (col("a") +
col("b")).eq(lit(0));
+ let extracted_short_circuit_leg_1 = (col("a") + col("b")).eq(lit(0));
let not_extracted_short_circuit_leg_2 = (col("a") -
col("b")).eq(lit(0));
+ let extracted_short_circuit_leg_3 = (col("a") * col("b")).eq(lit(0));
let plan = LogicalPlanBuilder::from(table_scan.clone())
.project(vec![
extracted_short_circuit.clone().alias("c1"),
extracted_short_circuit.alias("c2"),
- not_extracted_short_circuit_leg_1.clone().alias("c3"),
- not_extracted_short_circuit_leg_2.clone().alias("c4"),
- not_extracted_short_circuit_leg_1
- .or(not_extracted_short_circuit_leg_2)
+ extracted_short_circuit_leg_1
+ .clone()
+ .or(not_extracted_short_circuit_leg_2.clone())
+ .alias("c3"),
+ extracted_short_circuit_leg_1
+ .and(not_extracted_short_circuit_leg_2)
+ .alias("c4"),
+ extracted_short_circuit_leg_3
+ .clone()
+ .or(extracted_short_circuit_leg_3.clone())
.alias("c5"),
])?
.build()?;
- let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS
c2, test.a + test.b = Int32(0) AS c3, test.a - test.b = Int32(0) AS c4, test.a
+ test.b = Int32(0) OR test.a - test.b = Int32(0) AS c5\
- \n Projection: test.a = Int32(0) OR test.b = Int32(0) AS
__common_expr_1, test.a, test.b, test.c\
+ let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS
c2, __common_expr_2 OR test.a - test.b = Int32(0) AS c3, __common_expr_2 AND
test.a - test.b = Int32(0) AS c4, __common_expr_3 OR __common_expr_3 AS c5\
+ \n Projection: test.a = Int32(0) OR test.b = Int32(0) AS
__common_expr_1, test.a + test.b = Int32(0) AS __common_expr_2, test.a * test.b
= Int32(0) AS __common_expr_3, test.a, test.b, test.c\
\n TableScan: test";
assert_optimized_plan_eq(expected, plan, None);
@@ -1888,10 +1939,12 @@ mod test {
let table_scan = test_table_scan()?;
let rand = rand_func().call(vec![]);
- let not_extracted_volatile_short_circuit_2 =
- rand.clone().eq(lit(0)).or(col("b").eq(lit(0)));
+ let extracted_short_circuit_leg_1 = col("a").eq(lit(0));
let not_extracted_volatile_short_circuit_1 =
- col("a").eq(lit(0)).or(rand.eq(lit(0)));
+ extracted_short_circuit_leg_1.or(rand.clone().eq(lit(0)));
+ let not_extracted_short_circuit_leg_2 = col("b").eq(lit(0));
+ let not_extracted_volatile_short_circuit_2 =
+ rand.eq(lit(0)).or(not_extracted_short_circuit_leg_2);
let plan = LogicalPlanBuilder::from(table_scan.clone())
.project(vec![
not_extracted_volatile_short_circuit_1.clone().alias("c1"),
@@ -1901,10 +1954,11 @@ mod test {
])?
.build()?;
- let expected = "Projection: test.a = Int32(0) OR random() = Int32(0)
AS c1, test.a = Int32(0) OR random() = Int32(0) AS c2, random() = Int32(0) OR
test.b = Int32(0) AS c3, random() = Int32(0) OR test.b = Int32(0) AS c4\
- \n TableScan: test";
+ let expected = "Projection: __common_expr_1 OR random() = Int32(0) AS
c1, __common_expr_1 OR random() = Int32(0) AS c2, random() = Int32(0) OR test.b
= Int32(0) AS c3, random() = Int32(0) OR test.b = Int32(0) AS c4\
+ \n Projection: test.a = Int32(0) AS __common_expr_1, test.a, test.b,
test.c\
+ \n TableScan: test";
- assert_non_optimized_plan_eq(expected, plan, None);
+ assert_optimized_plan_eq(expected, plan, None);
Ok(())
}
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs
b/datafusion/optimizer/src/optimize_projections/mod.rs
index cae2a7b2ca..58c1ae297b 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -19,7 +19,7 @@
mod required_indices;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use crate::optimizer::ApplyOrder;
@@ -42,7 +42,6 @@ use datafusion_common::tree_node::{
Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion,
};
use datafusion_expr::logical_plan::tree_node::unwrap_arc;
-use hashbrown::HashMap;
/// Optimizer rule to prune unnecessary columns from intermediate schemas
/// inside the [`LogicalPlan`]. This rule:
@@ -472,11 +471,8 @@ fn merge_consecutive_projections(proj: Projection) ->
Result<Transformed<Project
// Count usages (referrals) of each projection expression in its input
fields:
let mut column_referral_map = HashMap::<&Column, usize>::new();
- for columns in expr.iter().map(|expr| expr.column_refs()) {
- for col in columns.into_iter() {
- *column_referral_map.entry(col).or_default() += 1;
- }
- }
+ expr.iter()
+ .for_each(|expr| expr.add_column_ref_counts(&mut column_referral_map));
// If an expression is non-trivial and appears more than once, do not merge
// them as consecutive projections will benefit from a compute-once
approach.
diff --git a/datafusion/sqllogictest/test_files/cse.slt
b/datafusion/sqllogictest/test_files/cse.slt
index 3579c1c163..19b47fa50e 100644
--- a/datafusion/sqllogictest/test_files/cse.slt
+++ b/datafusion/sqllogictest/test_files/cse.slt
@@ -93,15 +93,16 @@ FROM t1
----
logical_plan
01)Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, __common_expr_2
AS c3, __common_expr_2 AS c4, __common_expr_3 AS c5, __common_expr_3 AS c6
-02)--Projection: t1.a = Float64(0) AND t1.b = Float64(0) AS __common_expr_1,
t1.a = Float64(0) OR t1.b = Float64(0) AS __common_expr_2, CASE WHEN t1.a =
Float64(0) THEN Int64(0) ELSE Int64(1) END AS __common_expr_3
-03)----TableScan: t1 projection=[a, b]
+02)--Projection: __common_expr_4 AND t1.b = Float64(0) AS __common_expr_1,
__common_expr_4 OR t1.b = Float64(0) AS __common_expr_2, CASE WHEN
__common_expr_4 THEN Int64(0) ELSE Int64(1) END AS __common_expr_3
+03)----Projection: t1.a = Float64(0) AS __common_expr_4, t1.b
+04)------TableScan: t1 projection=[a, b]
physical_plan
01)ProjectionExec: expr=[__common_expr_1@0 as c1, __common_expr_1@0 as c2,
__common_expr_2@1 as c3, __common_expr_2@1 as c4, __common_expr_3@2 as c5,
__common_expr_3@2 as c6]
-02)--ProjectionExec: expr=[a@0 = 0 AND b@1 = 0 as __common_expr_1, a@0 = 0 OR
b@1 = 0 as __common_expr_2, CASE WHEN a@0 = 0 THEN 0 ELSE 1 END as
__common_expr_3]
-03)----MemoryExec: partitions=1, partition_sizes=[0]
+02)--ProjectionExec: expr=[__common_expr_4@0 AND b@1 = 0 as __common_expr_1,
__common_expr_4@0 OR b@1 = 0 as __common_expr_2, CASE WHEN __common_expr_4@0
THEN 0 ELSE 1 END as __common_expr_3]
+03)----ProjectionExec: expr=[a@0 = 0 as __common_expr_4, b@1 as b]
+04)------MemoryExec: partitions=1, partition_sizes=[0]
# Common children of short-circuit expression
-# TODO: consider surely executed children of "short circuited"s for CSE. i.e.
`a = 0`, `a = 2`, `a = 4` should be extracted
query TT
EXPLAIN SELECT
a = 0 AND b = 0 AS c1,
@@ -121,14 +122,15 @@ EXPLAIN SELECT
FROM t1
----
logical_plan
-01)Projection: t1.a = Float64(0) AND t1.b = Float64(0) AS c1, t1.a =
Float64(0) AND t1.b = Float64(1) AS c2, t1.b = Float64(2) AND t1.a = Float64(1)
AS c3, t1.b = Float64(3) AND t1.a = Float64(1) AS c4, t1.a = Float64(2) OR t1.b
= Float64(4) AS c5, t1.a = Float64(2) OR t1.b = Float64(5) AS c6, t1.b =
Float64(6) OR t1.a = Float64(3) AS c7, t1.b = Float64(7) OR t1.a = Float64(3)
AS c8, CASE WHEN t1.a = Float64(4) THEN Int64(0) ELSE Int64(1) END AS c9, CASE
WHEN t1.a = Float64(4) THEN Int64 [...]
-02)--TableScan: t1 projection=[a, b]
+01)Projection: __common_expr_1 AND t1.b = Float64(0) AS c1, __common_expr_1
AND t1.b = Float64(1) AS c2, t1.b = Float64(2) AND t1.a = Float64(1) AS c3,
t1.b = Float64(3) AND t1.a = Float64(1) AS c4, __common_expr_2 OR t1.b =
Float64(4) AS c5, __common_expr_2 OR t1.b = Float64(5) AS c6, t1.b = Float64(6)
OR t1.a = Float64(3) AS c7, t1.b = Float64(7) OR t1.a = Float64(3) AS c8, CASE
WHEN __common_expr_3 THEN Int64(0) ELSE Int64(1) END AS c9, CASE WHEN
__common_expr_3 THEN Int64(0) ELSE Int [...]
+02)--Projection: t1.a = Float64(0) AS __common_expr_1, t1.a = Float64(2) AS
__common_expr_2, t1.a = Float64(4) AS __common_expr_3, t1.a, t1.b
+03)----TableScan: t1 projection=[a, b]
physical_plan
-01)ProjectionExec: expr=[a@0 = 0 AND b@1 = 0 as c1, a@0 = 0 AND b@1 = 1 as c2,
b@1 = 2 AND a@0 = 1 as c3, b@1 = 3 AND a@0 = 1 as c4, a@0 = 2 OR b@1 = 4 as c5,
a@0 = 2 OR b@1 = 5 as c6, b@1 = 6 OR a@0 = 3 as c7, b@1 = 7 OR a@0 = 3 as c8,
CASE WHEN a@0 = 4 THEN 0 ELSE 1 END as c9, CASE WHEN a@0 = 4 THEN 0 ELSE 2 END
as c10, CASE WHEN b@1 = 8 THEN a@0 + 1 ELSE 0 END as c11, CASE WHEN b@1 = 9
THEN a@0 + 1 ELSE 0 END as c12, CASE WHEN b@1 = 10 THEN 0 ELSE a@0 + 2 END as
c13, CASE WHEN b@1 = 1 [...]
-02)--MemoryExec: partitions=1, partition_sizes=[0]
+01)ProjectionExec: expr=[__common_expr_1@0 AND b@4 = 0 as c1,
__common_expr_1@0 AND b@4 = 1 as c2, b@4 = 2 AND a@3 = 1 as c3, b@4 = 3 AND a@3
= 1 as c4, __common_expr_2@1 OR b@4 = 4 as c5, __common_expr_2@1 OR b@4 = 5 as
c6, b@4 = 6 OR a@3 = 3 as c7, b@4 = 7 OR a@3 = 3 as c8, CASE WHEN
__common_expr_3@2 THEN 0 ELSE 1 END as c9, CASE WHEN __common_expr_3@2 THEN 0
ELSE 2 END as c10, CASE WHEN b@4 = 8 THEN a@3 + 1 ELSE 0 END as c11, CASE WHEN
b@4 = 9 THEN a@3 + 1 ELSE 0 END as c12, CASE WHE [...]
+02)--ProjectionExec: expr=[a@0 = 0 as __common_expr_1, a@0 = 2 as
__common_expr_2, a@0 = 4 as __common_expr_3, a@0 as a, b@1 as b]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
# Common children of volatile, short-circuit expression
-# TODO: consider surely executed children of "short circuited"s for CSE. i.e.
`a = 0`, `a = 2`, `a = 4` should be extracted
query TT
EXPLAIN SELECT
a = 0 AND b = random() AS c1,
@@ -148,11 +150,13 @@ EXPLAIN SELECT
FROM t1
----
logical_plan
-01)Projection: t1.a = Float64(0) AND t1.b = random() AS c1, t1.a = Float64(0)
AND t1.b = Float64(1) + random() AS c2, t1.b = Float64(2) + random() AND t1.a =
Float64(1) AS c3, t1.b = Float64(3) + random() AND t1.a = Float64(1) AS c4,
t1.a = Float64(2) OR t1.b = Float64(4) + random() AS c5, t1.a = Float64(2) OR
t1.b = Float64(5) + random() AS c6, t1.b = Float64(6) + random() OR t1.a =
Float64(3) AS c7, t1.b = Float64(7) + random() OR t1.a = Float64(3) AS c8, CASE
WHEN t1.a = Float64(4) TH [...]
-02)--TableScan: t1 projection=[a, b]
+01)Projection: __common_expr_1 AND t1.b = random() AS c1, __common_expr_1 AND
t1.b = Float64(1) + random() AS c2, t1.b = Float64(2) + random() AND t1.a =
Float64(1) AS c3, t1.b = Float64(3) + random() AND t1.a = Float64(1) AS c4,
__common_expr_2 OR t1.b = Float64(4) + random() AS c5, __common_expr_2 OR t1.b
= Float64(5) + random() AS c6, t1.b = Float64(6) + random() OR t1.a =
Float64(3) AS c7, t1.b = Float64(7) + random() OR t1.a = Float64(3) AS c8, CASE
WHEN __common_expr_3 THEN random( [...]
+02)--Projection: t1.a = Float64(0) AS __common_expr_1, t1.a = Float64(2) AS
__common_expr_2, t1.a = Float64(4) AS __common_expr_3, t1.a, t1.b
+03)----TableScan: t1 projection=[a, b]
physical_plan
-01)ProjectionExec: expr=[a@0 = 0 AND b@1 = random() as c1, a@0 = 0 AND b@1 = 1
+ random() as c2, b@1 = 2 + random() AND a@0 = 1 as c3, b@1 = 3 + random() AND
a@0 = 1 as c4, a@0 = 2 OR b@1 = 4 + random() as c5, a@0 = 2 OR b@1 = 5 +
random() as c6, b@1 = 6 + random() OR a@0 = 3 as c7, b@1 = 7 + random() OR a@0
= 3 as c8, CASE WHEN a@0 = 4 THEN random() ELSE 1 END as c9, CASE WHEN a@0 = 4
THEN random() ELSE 2 END as c10, CASE WHEN b@1 = 8 + random() THEN a@0 + 1 ELSE
0 END as c11, CASE WHEN [...]
-02)--MemoryExec: partitions=1, partition_sizes=[0]
+01)ProjectionExec: expr=[__common_expr_1@0 AND b@4 = random() as c1,
__common_expr_1@0 AND b@4 = 1 + random() as c2, b@4 = 2 + random() AND a@3 = 1
as c3, b@4 = 3 + random() AND a@3 = 1 as c4, __common_expr_2@1 OR b@4 = 4 +
random() as c5, __common_expr_2@1 OR b@4 = 5 + random() as c6, b@4 = 6 +
random() OR a@3 = 3 as c7, b@4 = 7 + random() OR a@3 = 3 as c8, CASE WHEN
__common_expr_3@2 THEN random() ELSE 1 END as c9, CASE WHEN __common_expr_3@2
THEN random() ELSE 2 END as c10, CASE WHEN [...]
+02)--ProjectionExec: expr=[a@0 = 0 as __common_expr_1, a@0 = 2 as
__common_expr_2, a@0 = 4 as __common_expr_3, a@0 as a, b@1 as b]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
# Common volatile children of short-circuit expression
query TT
@@ -171,3 +175,59 @@ logical_plan
physical_plan
01)ProjectionExec: expr=[a@0 = random() AND b@1 = 0 as c1, a@0 = random() AND
b@1 = 1 as c2, a@0 = 2 + random() OR b@1 = 4 as c3, a@0 = 2 + random() OR b@1 =
5 as c4, CASE WHEN a@0 = 4 + random() THEN 0 ELSE 1 END as c5, CASE WHEN a@0 =
4 + random() THEN 0 ELSE 2 END as c6]
02)--MemoryExec: partitions=1, partition_sizes=[0]
+
+# Surely only once but also conditionally evaluated expressions
+query TT
+EXPLAIN SELECT
+ (a = 1 OR random() = 0) AND a = 1 AS c1,
+ (a = 2 AND random() = 0) OR a = 2 AS c2,
+ CASE WHEN a + 3 = 0 THEN a + 3 ELSE 0 END AS c3,
+ CASE WHEN a + 4 = 0 THEN 0 WHEN a + 4 THEN 0 ELSE 0 END AS c4,
+ CASE WHEN a + 5 = 0 THEN 0 WHEN random() = 0 THEN a + 5 ELSE 0 END AS c5,
+ CASE WHEN a + 6 = 0 THEN 0 ELSE a + 6 END AS c6
+FROM t1
+----
+logical_plan
+01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_1
AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AS c2, CASE
WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS
c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN
CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE
WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0)
THEN __common_expr_5 ELSE Float64(0) END [...]
+02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS
__common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS
__common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS
__common_expr_6
+03)----TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND
__common_expr_1@0 as c1, __common_expr_2@1 AND random() = 0 OR
__common_expr_2@1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2
ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN
CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN
__common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0
END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 EN
[...]
+02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as
__common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 +
5 as __common_expr_5, a@0 + 6 as __common_expr_6]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
+
+# Surely only once but also conditionally evaluated subexpressions
+query TT
+EXPLAIN SELECT
+ (a = 1 OR random() = 0) AND (a = 1 OR random() = 1) AS c1,
+ (a = 2 AND random() = 0) OR (a = 2 AND random() = 1) AS c2,
+ CASE WHEN a + 3 = 0 THEN a + 3 + random() ELSE 0 END AS c3,
+ CASE WHEN a + 4 = 0 THEN 0 ELSE a + 4 + random() END AS c4
+FROM t1
+----
+logical_plan
+01)Projection: (__common_expr_1 OR random() = Float64(0)) AND (__common_expr_1
OR random() = Float64(1)) AS c1, __common_expr_2 AND random() = Float64(0) OR
__common_expr_2 AND random() = Float64(1) AS c2, CASE WHEN __common_expr_3 =
Float64(0) THEN __common_expr_3 + random() ELSE Float64(0) END AS c3, CASE WHEN
__common_expr_4 = Float64(0) THEN Float64(0) ELSE __common_expr_4 + random()
END AS c4
+02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS
__common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS
__common_expr_4
+03)----TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND
(__common_expr_1@0 OR random() = 1) as c1, __common_expr_2@1 AND random() = 0
OR __common_expr_2@1 AND random() = 1 as c2, CASE WHEN __common_expr_3@2 = 0
THEN __common_expr_3@2 + random() ELSE 0 END as c3, CASE WHEN __common_expr_4@3
= 0 THEN 0 ELSE __common_expr_4@3 + random() END as c4]
+02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as
__common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4]
+03)----MemoryExec: partitions=1, partition_sizes=[0]
+
+# Only conditionally evaluated expressions
+query TT
+EXPLAIN SELECT
+ (random() = 0 OR a = 1) AND a = 1 AS c1,
+ (random() = 0 AND a = 2) OR a = 2 AS c2,
+ CASE WHEN random() = 0 THEN a + 3 ELSE a + 3 END AS c3,
+ CASE WHEN random() = 0 THEN 0 WHEN a + 4 = 0 THEN a + 4 ELSE 0 END AS c4,
+ CASE WHEN random() = 0 THEN 0 WHEN a + 5 = 0 THEN 0 ELSE a + 5 END AS c5,
+ CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a + 6 ELSE a + 6 END
AS c6
+FROM t1
+----
+logical_plan
+01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a =
Float64(1) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a =
Float64(2) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE
t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0)
WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END
AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) =
Float64(0) THEN Float64(0) ELSE t1. [...]
+02)--TableScan: t1 projection=[a]
+physical_plan
+01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 1 as c1, random()
= 0 AND a@0 = 2 OR a@0 = 2 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0
+ 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE
0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5
END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE
a@0 + 6 END as c6]
+02)--MemoryExec: partitions=1, partition_sizes=[0]
diff --git a/datafusion/sqllogictest/test_files/select.slt
b/datafusion/sqllogictest/test_files/select.slt
index f9baf8db69..95f67245a9 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1504,21 +1504,25 @@ query TT
EXPLAIN SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t;
----
logical_plan
-01)Projection: t.y > Int32(0) AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS
t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND t.y > Int32(0)
AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x >
Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x
-02)--TableScan: t projection=[x, y]
+01)Projection: __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS
t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND
__common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS
Int64) AS t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x
+02)--Projection: t.y > Int32(0) AS __common_expr_1, t.x, t.y
+03)----TableScan: t projection=[x, y]
physical_plan
-01)ProjectionExec: expr=[y@1 > 0 AND 1 / CAST(y@1 AS Int64) < 1 as t.y >
Int64(0) AND Int64(1) / t.y < Int64(1), x@0 > 0 AND y@1 > 0 AND 1 / CAST(y@1 AS
Int64) < 1 / CAST(x@0 AS Int64) as t.x > Int64(0) AND t.y > Int64(0) AND
Int64(1) / t.y < Int64(1) / t.x]
-02)--MemoryExec: partitions=1, partition_sizes=[1]
+01)ProjectionExec: expr=[__common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 as
t.y > Int64(0) AND Int64(1) / t.y < Int64(1), x@1 > 0 AND __common_expr_1@0 AND
1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x > Int64(0) AND t.y >
Int64(0) AND Int64(1) / t.y < Int64(1) / t.x]
+02)--ProjectionExec: expr=[y@1 > 0 as __common_expr_1, x@0 as x, y@1 as y]
+03)----MemoryExec: partitions=1, partition_sizes=[1]
query TT
EXPLAIN SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t;
----
logical_plan
-01)Projection: t.y = Int32(0) OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS
t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR t.y = Int32(0)
OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x =
Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x
-02)--TableScan: t projection=[x, y]
+01)Projection: __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS
t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR __common_expr_1
OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x =
Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x
+02)--Projection: t.y = Int32(0) AS __common_expr_1, t.x, t.y
+03)----TableScan: t projection=[x, y]
physical_plan
-01)ProjectionExec: expr=[y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 as t.y =
Int64(0) OR Int64(1) / t.y < Int64(1), x@0 = 0 OR y@1 = 0 OR 1 / CAST(y@1 AS
Int64) < 1 / CAST(x@0 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1)
/ t.y < Int64(1) / t.x]
-02)--MemoryExec: partitions=1, partition_sizes=[1]
+01)ProjectionExec: expr=[__common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 as
t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@1 = 0 OR __common_expr_1@0 OR 1
/ CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x = Int64(0) OR t.y =
Int64(0) OR Int64(1) / t.y < Int64(1) / t.x]
+02)--ProjectionExec: expr=[y@1 = 0 as __common_expr_1, x@0 as x, y@1 as y]
+03)----MemoryExec: partitions=1, partition_sizes=[1]
# due to the reason describe in
https://github.com/apache/datafusion/issues/8927,
# the following queries will fail
diff --git a/datafusion/sqllogictest/test_files/tpch/q14.slt.part
b/datafusion/sqllogictest/test_files/tpch/q14.slt.part
index e56e463a61..3743c201ff 100644
--- a/datafusion/sqllogictest/test_files/tpch/q14.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q14.slt.part
@@ -32,9 +32,9 @@ where
and l_shipdate < date '1995-10-01';
----
logical_plan
-01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE
Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount
ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount) AS Float64) AS promo_revenue
-02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE
Utf8("PROMO%") THEN lineitem.l_extendedprice * (Decimal128(Some(1),20,0) -
lineitem.l_discount) ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN
part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice *
(Decimal128(Some(1),20,0) - lineitem.l_discount)) AS
sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
-03)----Projection: lineitem.l_extendedprice, lineitem.l_discount, part.p_type
+01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE
Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount
ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount) AS Float64) AS promo_revenue
+02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE
Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),38,4) END) AS
sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice *
Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(__common_expr_1) AS
sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+03)----Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) -
lineitem.l_discount) AS __common_expr_1, part.p_type
04)------Inner Join: lineitem.l_partkey = part.p_partkey
05)--------Projection: lineitem.l_partkey, lineitem.l_extendedprice,
lineitem.l_discount
06)----------Filter: lineitem.l_shipdate >= Date32("1995-09-01") AND
lineitem.l_shipdate < Date32("1995-10-01")
@@ -44,19 +44,20 @@ physical_plan
01)ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE
Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount
ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1)
- lineitem.l_discount)@1 AS Float64) as promo_revenue]
02)--AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE
Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount
ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount)]
03)----CoalescePartitionsExec
-04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type
LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1)
- lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------HashJoinExec: mode=Partitioned, join_type=Inner,
on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2,
p_type@4]
-07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4),
input_partitions=4
-09)----------------ProjectionExec: expr=[l_partkey@0 as l_partkey,
l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3
< 1995-10-01
-12)----------------------CsvExec: file_groups={4 groups:
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]},
projection=[l_partkey, l_extendedprice, l_discount, l_shipd [...]
-13)------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4),
input_partitions=4
-15)----------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
-16)------------------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]},
projection=[p_partkey, p_type], has_header=false
+04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type
LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) -
lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1)
- lineitem.l_discount)]
+05)--------ProjectionExec: expr=[l_extendedprice@0 * (Some(1),20,0 -
l_discount@1) as __common_expr_1, p_type@2 as p_type]
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------HashJoinExec: mode=Partitioned, join_type=Inner,
on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2,
p_type@4]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------RepartitionExec: partitioning=Hash([l_partkey@0], 4),
input_partitions=4
+10)------------------ProjectionExec: expr=[l_partkey@0 as l_partkey,
l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
+11)--------------------CoalesceBatchesExec: target_batch_size=8192
+12)----------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND
l_shipdate@3 < 1995-10-01
+13)------------------------CsvExec: file_groups={4 groups:
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]},
projection=[l_partkey, l_extendedprice, l_discount, l_shi [...]
+14)--------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------RepartitionExec: partitioning=Hash([p_partkey@0], 4),
input_partitions=4
+16)------------------RepartitionExec: partitioning=RoundRobinBatch(4),
input_partitions=1
+17)--------------------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]},
projection=[p_partkey, p_type], has_header=false
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]