This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 0d1cd55ca remove the type coercion in the simplify_expressions rule
(#3657)
0d1cd55ca is described below
commit 0d1cd55cae62c4d2396169c12e9e769ea90de03d
Author: Kun Liu <[email protected]>
AuthorDate: Fri Sep 30 01:49:41 2022 +0800
remove the type coercion in the simplify_expressions rule (#3657)
---
datafusion/optimizer/src/simplify_expressions.rs | 52 ++++++++----------------
datafusion/optimizer/src/type_coercion.rs | 10 +----
2 files changed, 18 insertions(+), 44 deletions(-)
diff --git a/datafusion/optimizer/src/simplify_expressions.rs
b/datafusion/optimizer/src/simplify_expressions.rs
index 94a035fbd..01b46cc92 100644
--- a/datafusion/optimizer/src/simplify_expressions.rs
+++ b/datafusion/optimizer/src/simplify_expressions.rs
@@ -18,7 +18,6 @@
//! Simplify expressions optimizer rule
use crate::expr_simplifier::ExprSimplifiable;
-use crate::type_coercion::TypeCoercionRewriter;
use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule};
use arrow::array::new_null_array;
use arrow::datatypes::{DataType, Field, Schema};
@@ -33,7 +32,6 @@ use datafusion_expr::{
BuiltinScalarFunction, ColumnarValue, Expr, ExprSchemable, Operator,
Volatility,
};
use datafusion_physical_expr::{create_physical_expr,
execution_props::ExecutionProps};
-use std::sync::Arc;
/// Provides simplification information based on schema and properties
pub(crate) struct SimplifyContext<'a, 'b> {
@@ -377,9 +375,6 @@ pub struct ConstEvaluator<'a> {
execution_props: &'a ExecutionProps,
input_schema: DFSchema,
input_batch: RecordBatch,
- // Needed until we ensure type coercion is done before any optimizations
- // https://github.com/apache/arrow-datafusion/issues/3557
- type_coercion_helper: TypeCoercionRewriter,
}
impl<'a> ExprRewriter for ConstEvaluator<'a> {
@@ -434,14 +429,12 @@ impl<'a> ConstEvaluator<'a> {
// Need a single "input" row to produce a single output row
let col = new_null_array(&DataType::Null, 1);
let input_batch = RecordBatch::try_new(std::sync::Arc::new(schema),
vec![col])?;
- let type_coercion =
TypeCoercionRewriter::new(Arc::new(input_schema.clone()));
Ok(Self {
can_evaluate: vec![],
execution_props,
input_schema,
input_batch,
- type_coercion_helper: type_coercion,
})
}
@@ -510,15 +503,6 @@ impl<'a> ConstEvaluator<'a> {
return Ok(s);
}
- // TODO: https://github.com/apache/arrow-datafusion/issues/3582
- // TODO: https://github.com/apache/arrow-datafusion/issues/3556
- // Do the type coercion in the simplify expression
- // this is just a work around for removing the type coercion in the
physical phase
- // we need to support eval the result without the physical expr.
- // If we don't do the type coercion, we will meet the
- // https://github.com/apache/arrow-datafusion/issues/3556 when create
the physical expr
- // to try evaluate the result.
- let expr = expr.rewrite(&mut self.type_coercion_helper)?;
let phys_expr = create_physical_expr(
&expr,
&self.input_schema,
@@ -1223,12 +1207,6 @@ mod tests {
assert_eq!(simplify(expr_eq), lit(true));
}
- #[test]
- fn test_simplify_with_type_coercion() {
- let expr_plus = binary_expr(lit(1_i32), Operator::Plus, lit(1_i64));
- assert_eq!(simplify(expr_plus), lit(2_i64));
- }
-
#[test]
fn test_simplify_concat_ws_null_separator() {
fn build_concat_ws_expr(args: &[Expr]) -> Expr {
@@ -1351,13 +1329,13 @@ mod tests {
// now() --> ts
test_evaluate_with_start_time(now_expr(),
lit_timestamp_nano(ts_nanos), &time);
- // CAST(now() as int64) + 100 --> ts + 100
- let expr = cast_to_int64_expr(now_expr()) + lit(100);
+ // CAST(now() as int64) + 100_i64 --> ts + 100_i64
+ let expr = cast_to_int64_expr(now_expr()) + lit(100_i64);
test_evaluate_with_start_time(expr, lit(ts_nanos + 100), &time);
- // CAST(now() as int64) < cast(to_timestamp(...) as int64) + 50000
---> true
+ // CAST(now() as int64) < cast(to_timestamp(...) as int64) +
50000_i64 ---> true
let expr = cast_to_int64_expr(now_expr())
- .lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) + lit(50000));
+ .lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) +
lit(50000i64));
test_evaluate_with_start_time(expr, lit(true), &time);
}
@@ -2208,19 +2186,21 @@ mod tests {
let ts_string = "2020-09-08T12:05:00+00:00";
let time = chrono::Utc.timestamp_nanos(1599566400000000000i64);
- // cast(now() as int) < cast(to_timestamp(...) as int) + 5000000000
- let plan = LogicalPlanBuilder::from(table_scan)
- .filter(
- cast_to_int64_expr(now_expr())
- .lt(cast_to_int64_expr(to_timestamp_expr(ts_string)) +
lit(50000)),
- )
- .unwrap()
- .build()
- .unwrap();
+ // cast(now() as int) < cast(to_timestamp(...) as int) + 50000_i64
+ let plan =
+ LogicalPlanBuilder::from(table_scan)
+ .filter(
+ cast_to_int64_expr(now_expr())
+ .lt(cast_to_int64_expr(to_timestamp_expr(ts_string))
+ + lit(50000_i64)),
+ )
+ .unwrap()
+ .build()
+ .unwrap();
// Note that constant folder runs and folds the entire
// expression down to a single constant (true)
- let expected = "Filter: Boolean(true) AS now() <
totimestamp(Utf8(\"2020-09-08T12:05:00+00:00\")) + Int32(50000)\
+ let expected = "Filter: Boolean(true) AS now() <
totimestamp(Utf8(\"2020-09-08T12:05:00+00:00\")) + Int64(50000)\
\n TableScan: test";
let actual = get_optimized_plan_formatted(&plan, &time);
diff --git a/datafusion/optimizer/src/type_coercion.rs
b/datafusion/optimizer/src/type_coercion.rs
index 372d09326..3d45c5041 100644
--- a/datafusion/optimizer/src/type_coercion.rs
+++ b/datafusion/optimizer/src/type_coercion.rs
@@ -117,16 +117,10 @@ fn optimize_internal(
from_plan(plan, &new_expr, &new_inputs)
}
-pub(crate) struct TypeCoercionRewriter {
+struct TypeCoercionRewriter {
pub(crate) schema: DFSchemaRef,
}
-impl TypeCoercionRewriter {
- pub(crate) fn new(schema: DFSchemaRef) -> TypeCoercionRewriter {
- TypeCoercionRewriter { schema }
- }
-}
-
impl ExprRewriter for TypeCoercionRewriter {
fn pre_visit(&mut self, _expr: &Expr) -> Result<RewriteRecursion> {
Ok(RewriteRecursion::Continue)
@@ -796,7 +790,7 @@ mod test {
)
.unwrap(),
);
- let mut rewriter = TypeCoercionRewriter::new(schema);
+ let mut rewriter = TypeCoercionRewriter { schema };
let expr = is_true(lit(12i32).eq(lit(13i64)));
let expected = is_true(
cast(lit(ScalarValue::Int32(Some(12))), DataType::Int64)