alamb commented on code in PR #3758:
URL: https://github.com/apache/arrow-datafusion/pull/3758#discussion_r990434857
##########
datafusion/optimizer/src/expr_simplifier.rs:
##########
@@ -15,17 +15,24 @@
// specific language governing permissions and limitations
// under the License.
-//! Expression simplifier
+//! Expression simplification API
Review Comment:
This PR looks bigger than it really is -- it moves a bunch of code around
and there are a significant number of docstrings
##########
datafusion/optimizer/src/expr_simplifier.rs:
##########
@@ -76,24 +96,181 @@ impl ExprSimplifiable for Expr {
/// }
/// }
///
+ /// // Create the simplifier
+ /// let simplifier = ExprSimplifier::new(Info::default());
+ ///
/// // b < 2
/// let b_lt_2 = col("b").gt(lit(2));
///
/// // (b < 2) OR (b < 2)
/// let expr = b_lt_2.clone().or(b_lt_2.clone());
///
/// // (b < 2) OR (b < 2) --> (b < 2)
- /// let expr = expr.simplify(&Info::default()).unwrap();
+ /// let expr = simplifier.simplify(expr).unwrap();
/// assert_eq!(expr, b_lt_2);
/// ```
- fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self> {
- let mut rewriter = Simplifier::new(info);
- let mut const_evaluator =
ConstEvaluator::try_new(info.execution_props())?;
+ pub fn simplify(&self, expr: Expr) -> Result<Expr> {
+ let mut rewriter = Simplifier::new(&self.info);
+ let mut const_evaluator =
ConstEvaluator::try_new(self.info.execution_props())?;
// TODO iterate until no changes are made during rewrite
// (evaluating constants can enable new simplifications and
// simplifications can enable new constant evaluation)
// https://github.com/apache/arrow-datafusion/issues/1160
- self.rewrite(&mut const_evaluator)?.rewrite(&mut rewriter)
+ expr.rewrite(&mut const_evaluator)?.rewrite(&mut rewriter)
+ }
+
+ /// Apply type coercion to an [`Expr`] so that it can be
+ /// evaluated as a
[`PhysicalExpr`](datafusion_physical_expr::PhysicalExpr).
+ ///
+ /// See the [type coercion module](datafusion_expr::type_coercion)
+ /// documentation for more details on type coercion
+ ///
+ // Would be nice if this API could use the SimplifyInfo
+ // rather than creating an DFSchemaRef coerces rather than doing
+ // it manually.
+ // TODO ticekt
+ pub fn coerce(&self, expr: Expr, schema: DFSchemaRef) -> Result<Expr> {
+ let mut expr_rewrite = TypeCoercionRewriter { schema };
+
+ expr.rewrite(&mut expr_rewrite)
+ }
+}
+
+/// Provides simplification information based on DFSchema and
+/// [`ExecutionProps`]. This is the default implementation used by DataFusion
+///
+/// For example:
+/// ```
+/// use arrow::datatypes::{Schema, Field, DataType};
+/// use datafusion_expr::{col, lit};
+/// use datafusion_common::{DataFusionError, ToDFSchema};
+/// use datafusion_physical_expr::execution_props::ExecutionProps;
+/// use datafusion_optimizer::expr_simplifier::{SimplifyContext,
ExprSimplifier};
+///
+/// // Create the schema
+/// let schema = Schema::new(vec![
+/// Field::new("i", DataType::Int64, false),
+/// ])
+/// .to_dfschema_ref().unwrap();
+///
+/// // Create the simplifier
+/// let props = ExecutionProps::new();
+/// let context = SimplifyContext::new(&props)
+/// .with_schema(schema);
+/// let simplifier = ExprSimplifier::new(context);
+///
+/// // Use the simplifier
+///
+/// // b < 2 or (1 > 3)
+/// let expr = col("b").lt(lit(2)).or(lit(1).gt(lit(3)));
+///
+/// // b < 2
+/// let simplified = simplifier.simplify(expr).unwrap();
+/// assert_eq!(simplified, col("b").lt(lit(2)));
+/// ```
+pub struct SimplifyContext<'a> {
Review Comment:
This was moved into the public API and I added some docs and a builder
interface `with_schema`
##########
datafusion/optimizer/src/simplify_expressions.rs:
##########
@@ -15,73 +15,25 @@
// specific language governing permissions and limitations
// under the License.
-//! Simplify expressions optimizer rule
+//! Simplify expressions optimizer rule and implementation
-use crate::expr_simplifier::ExprSimplifiable;
+use crate::expr_simplifier::{ExprSimplifier, SimplifyContext};
use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule};
use arrow::array::new_null_array;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::error::ArrowError;
use arrow::record_batch::RecordBatch;
-use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result,
ScalarValue};
+use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue};
use datafusion_expr::{
expr_fn::{and, or},
expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecursion},
lit,
logical_plan::LogicalPlan,
utils::from_plan,
- BuiltinScalarFunction, ColumnarValue, Expr, ExprSchemable, Operator,
Volatility,
+ BuiltinScalarFunction, ColumnarValue, Expr, Operator, Volatility,
};
use datafusion_physical_expr::{create_physical_expr,
execution_props::ExecutionProps};
-/// Provides simplification information based on schema and properties
Review Comment:
moved
##########
datafusion/optimizer/src/expr_simplifier.rs:
##########
@@ -76,24 +96,181 @@ impl ExprSimplifiable for Expr {
/// }
/// }
///
+ /// // Create the simplifier
+ /// let simplifier = ExprSimplifier::new(Info::default());
+ ///
/// // b < 2
/// let b_lt_2 = col("b").gt(lit(2));
///
/// // (b < 2) OR (b < 2)
/// let expr = b_lt_2.clone().or(b_lt_2.clone());
///
/// // (b < 2) OR (b < 2) --> (b < 2)
- /// let expr = expr.simplify(&Info::default()).unwrap();
+ /// let expr = simplifier.simplify(expr).unwrap();
/// assert_eq!(expr, b_lt_2);
/// ```
- fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self> {
- let mut rewriter = Simplifier::new(info);
- let mut const_evaluator =
ConstEvaluator::try_new(info.execution_props())?;
+ pub fn simplify(&self, expr: Expr) -> Result<Expr> {
+ let mut rewriter = Simplifier::new(&self.info);
+ let mut const_evaluator =
ConstEvaluator::try_new(self.info.execution_props())?;
// TODO iterate until no changes are made during rewrite
// (evaluating constants can enable new simplifications and
// simplifications can enable new constant evaluation)
// https://github.com/apache/arrow-datafusion/issues/1160
- self.rewrite(&mut const_evaluator)?.rewrite(&mut rewriter)
+ expr.rewrite(&mut const_evaluator)?.rewrite(&mut rewriter)
+ }
+
+ /// Apply type coercion to an [`Expr`] so that it can be
+ /// evaluated as a
[`PhysicalExpr`](datafusion_physical_expr::PhysicalExpr).
+ ///
+ /// See the [type coercion module](datafusion_expr::type_coercion)
+ /// documentation for more details on type coercion
+ ///
+ // Would be nice if this API could use the SimplifyInfo
+ // rather than creating an DFSchemaRef coerces rather than doing
+ // it manually.
+ // TODO ticekt
+ pub fn coerce(&self, expr: Expr, schema: DFSchemaRef) -> Result<Expr> {
Review Comment:
Here is the new API for coercion
##########
datafusion/optimizer/src/expr_simplifier.rs:
##########
@@ -37,28 +44,41 @@ pub trait SimplifyInfo {
fn execution_props(&self) -> &ExecutionProps;
}
-/// trait for types that can be simplified
Review Comment:
Removed in favor of calling `ExprSimplifier::simplify` directly
##########
datafusion/optimizer/src/simplify_expressions.rs:
##########
@@ -950,30 +909,12 @@ macro_rules! assert_contains {
};
}
-/// Apply simplification and constant propagation to ([Expr]).
Review Comment:
Instead of adding a `simplify` method on to `Expr` via this trait, I propose
to have an `ExprSimplifier` struct that has a simplify method.
cc @ygf11
I found it made the examples in `expr_api.rs` less awkward to write because
the schema wasn't needed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]