Jefffrey commented on code in PR #8983:
URL: https://github.com/apache/arrow-datafusion/pull/8983#discussion_r1465469762
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -229,6 +235,60 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
self.guarantees = guarantees;
self
}
+
+ /// Should [`Canonicalizer`] be applied before simplification?
+ ///
+ /// If true (the default), the expression will be rewritten to canonical
+ /// form before simplification. This is useful to ensure that the
simplifier
+ /// can apply all possible simplifications.
+ ///
+ /// Some expressions, such as those in some Joins, can not be canonicalized
+ /// without changing their meaning. In these cases, canonicalization should
+ /// be disabled.
+ ///
+ /// ```rust
+ /// use arrow::datatypes::{DataType, Field, Schema};
+ /// use datafusion_expr::{col, lit, Expr};
+ /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
+ /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
+ /// use datafusion_physical_expr::execution_props::ExecutionProps;
+ /// use datafusion_optimizer::simplify_expressions::{
+ /// ExprSimplifier, SimplifyContext};
+ ///
+ /// let schema = Schema::new(vec![
+ /// Field::new("a", DataType::Int64, false),
+ /// Field::new("b", DataType::Int64, false),
+ /// Field::new("c", DataType::Int64, false),
+ /// ])
+ /// .to_dfschema_ref().unwrap();
+ ///
+ /// // Create the simplifier
+ /// let props = ExecutionProps::new();
+ /// let context = SimplifyContext::new(&props)
+ /// .with_schema(schema);
+ /// let simplifier = ExprSimplifier::new(context);
+ ///
+ /// // Expression: a = c AND 1 = b
+ /// let expr = col("a").eq(col("c")).and(lit(1).eq(col("b")));
+ ///
+ /// // With canonicalization, the expression is rewritten to canonical form
+ /// // (though it is no simpler in this case):
+ /// let canonical = simplifier.simplify(expr.clone()).unwrap();
+ /// // Expression has been rewritten to: (c = a AND b = 1)
+ /// assert_eq!(canonical, col("c").eq(col("a")).and(col("b").eq(lit(1))));
Review Comment:
Oh, seeing this just made me realize the doc for Canonicalize struct
actually states the opposite order:
https://github.com/apache/arrow-datafusion/blob/94a6192f6be30b7f6d009bc936a866bf5dcb280c/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs#L239
Probably can fix as part of this PR?
##########
datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs:
##########
@@ -85,44 +85,35 @@ impl SimplifyExpressions {
};
let info = SimplifyContext::new(execution_props).with_schema(schema);
- let simplifier = ExprSimplifier::new(info);
-
let new_inputs = plan
.inputs()
.iter()
.map(|input| Self::optimize_internal(input, execution_props))
.collect::<Result<Vec<_>>>()?;
- let expr = match plan {
- // Canonicalize step won't reorder expressions in a Join on clause.
- // The left and right expressions in a Join on clause are not
commutative,
- // since the order of the columns must match the order of the
children.
- LogicalPlan::Join(_) => {
- plan.expressions()
- .into_iter()
- .map(|e| {
- // TODO: unify with `rewrite_preserving_name`
- let original_name = e.name_for_alias()?;
- let new_e = simplifier.simplify(e)?;
- new_e.alias_if_changed(original_name)
- })
- .collect::<Result<Vec<_>>>()?
- }
- _ => {
- plan.expressions()
- .into_iter()
- .map(|e| {
- // TODO: unify with `rewrite_preserving_name`
- let original_name = e.name_for_alias()?;
- let cano_e = simplifier.canonicalize(e)?;
- let new_e = simplifier.simplify(cano_e)?;
- new_e.alias_if_changed(original_name)
- })
- .collect::<Result<Vec<_>>>()?
- }
+ let simplifier = ExprSimplifier::new(info);
+
+ // The left and right expressions in a Join on clause are not
commutative,
+ // since the order of the columns must match the order of the children.
+ // Thus, not reorder expressions in a Join `on` clause while
simplifying
Review Comment:
Actually, I just took a look at the join node:
https://github.com/apache/arrow-datafusion/blob/94a6192f6be30b7f6d009bc936a866bf5dcb280c/datafusion/expr/src/logical_plan/plan.rs#L2596-L2599
Was it actually a problem with the `filter` clause and not the `on` clause?
:thinking:
Because the `on` here is a vec of tuples to represent left and right (I was
assuming it was a single Expr before)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]