sdf-jkl commented on code in PR #18789: URL: https://github.com/apache/datafusion/pull/18789#discussion_r2551068805
########## datafusion/optimizer/src/simplify_expressions/udf_preimage.rs: ########## @@ -0,0 +1,407 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::str::FromStr; + +use arrow::compute::kernels::cast_utils::IntervalUnit; +use datafusion_common::{internal_err, tree_node::Transformed, Result, ScalarValue}; +use datafusion_expr::{ + and, expr::ScalarFunction, lit, or, simplify::SimplifyInfo, BinaryExpr, Expr, + Operator, ScalarUDFImpl, +}; +use datafusion_functions::datetime::date_part::DatePartFunc; + +pub(super) fn preimage_in_comparison_for_binary( + info: &dyn SimplifyInfo, + udf_expr: Expr, + literal: Expr, + op: Operator, +) -> Result<Transformed<Expr>> { + let (func, args, lit_value) = match (udf_expr, literal) { + ( + Expr::ScalarFunction(ScalarFunction { func, args }), + Expr::Literal(lit_value, _), + ) => (func, args, lit_value), + _ => return internal_err!("Expect date_part expr and literal"), + }; + let expr = Box::new(args[1].clone()); Review Comment: Do we know that the expression will be 2nd argument? ########## datafusion/optimizer/src/simplify_expressions/udf_preimage.rs: ########## @@ -0,0 +1,407 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::str::FromStr; + +use arrow::compute::kernels::cast_utils::IntervalUnit; +use datafusion_common::{internal_err, tree_node::Transformed, Result, ScalarValue}; +use datafusion_expr::{ + and, expr::ScalarFunction, lit, or, simplify::SimplifyInfo, BinaryExpr, Expr, + Operator, ScalarUDFImpl, +}; +use datafusion_functions::datetime::date_part::DatePartFunc; + +pub(super) fn preimage_in_comparison_for_binary( + info: &dyn SimplifyInfo, + udf_expr: Expr, + literal: Expr, + op: Operator, +) -> Result<Transformed<Expr>> { + let (func, args, lit_value) = match (udf_expr, literal) { + ( + Expr::ScalarFunction(ScalarFunction { func, args }), + Expr::Literal(lit_value, _), + ) => (func, args, lit_value), + _ => return internal_err!("Expect date_part expr and literal"), + }; + let expr = Box::new(args[1].clone()); + + let Ok(expr_type) = info.get_data_type(&expr) else { + return internal_err!("Can't get the data type of the expr {:?}", &expr); + }; + + let preimage_func = match func.name() { Review Comment: Logic matching by name is not the best. Can be fixed by adding `supports_preimage` method to `ScalarUDFImpl` and `ScalarUDF` and adding `preimage_cast` to `ScalarUDF` can solve this issue. ########## datafusion/optimizer/src/simplify_expressions/udf_preimage.rs: ########## @@ -0,0 +1,407 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::str::FromStr; + +use arrow::compute::kernels::cast_utils::IntervalUnit; +use datafusion_common::{internal_err, tree_node::Transformed, Result, ScalarValue}; +use datafusion_expr::{ + and, expr::ScalarFunction, lit, or, simplify::SimplifyInfo, BinaryExpr, Expr, + Operator, ScalarUDFImpl, +}; +use datafusion_functions::datetime::date_part::DatePartFunc; + +pub(super) fn preimage_in_comparison_for_binary( + info: &dyn SimplifyInfo, + udf_expr: Expr, + literal: Expr, + op: Operator, +) -> Result<Transformed<Expr>> { + let (func, args, lit_value) = match (udf_expr, literal) { + ( + Expr::ScalarFunction(ScalarFunction { func, args }), + Expr::Literal(lit_value, _), + ) => (func, args, lit_value), + _ => return internal_err!("Expect date_part expr and literal"), + }; + let expr = Box::new(args[1].clone()); + + let Ok(expr_type) = info.get_data_type(&expr) else { + return internal_err!("Can't get the data type of the expr {:?}", &expr); + }; + + let preimage_func = match func.name() { + "date_part" => DatePartFunc::new(), + _ => return internal_err!("Preimage is not supported for {:?}", func.name()), + }; + + let rewritten_expr = match op { + Operator::Lt | Operator::GtEq => { + let v = match preimage_func.preimage_cast(&lit_value, &expr_type, op) { + Some(v) => v, + None => { + return internal_err!("Could not cast literal to the column type") + } + }; Review Comment: These 6 lines are repeating in every match arm. ########## datafusion/optimizer/src/simplify_expressions/udf_preimage.rs: ########## @@ -0,0 +1,407 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::str::FromStr; + +use arrow::compute::kernels::cast_utils::IntervalUnit; +use datafusion_common::{internal_err, tree_node::Transformed, Result, ScalarValue}; +use datafusion_expr::{ + and, expr::ScalarFunction, lit, or, simplify::SimplifyInfo, BinaryExpr, Expr, + Operator, ScalarUDFImpl, +}; +use datafusion_functions::datetime::date_part::DatePartFunc; + +pub(super) fn preimage_in_comparison_for_binary( + info: &dyn SimplifyInfo, + udf_expr: Expr, + literal: Expr, + op: Operator, +) -> Result<Transformed<Expr>> { + let (func, args, lit_value) = match (udf_expr, literal) { + ( + Expr::ScalarFunction(ScalarFunction { func, args }), + Expr::Literal(lit_value, _), + ) => (func, args, lit_value), + _ => return internal_err!("Expect date_part expr and literal"), + }; + let expr = Box::new(args[1].clone()); + + let Ok(expr_type) = info.get_data_type(&expr) else { + return internal_err!("Can't get the data type of the expr {:?}", &expr); + }; + + let preimage_func = match func.name() { + "date_part" => DatePartFunc::new(), + _ => return internal_err!("Preimage is not supported for {:?}", func.name()), + }; + + let rewritten_expr = match op { + Operator::Lt | Operator::GtEq => { + let v = match preimage_func.preimage_cast(&lit_value, &expr_type, op) { + Some(v) => v, + None => { + return internal_err!("Could not cast literal to the column type") + } + }; + Expr::BinaryExpr(BinaryExpr { + left: expr, + op, + right: Box::new(lit(v)), + }) + } + Operator::Gt => { + let v = match preimage_func.preimage_cast(&lit_value, &expr_type, op) { + Some(v) => v, + None => { + return internal_err!("Could not cast literal to the column type") + } + }; + Expr::BinaryExpr(BinaryExpr { + left: expr, + op: Operator::GtEq, + right: Box::new(lit(v)), + }) + } + Operator::LtEq => { + let v = match preimage_func.preimage_cast(&lit_value, &expr_type, op) { + Some(v) => v, + None => { + return internal_err!("Could not cast literal to the column type") + } + }; + Expr::BinaryExpr(BinaryExpr { + left: expr, + op: Operator::Lt, + right: Box::new(lit(v)), + }) + } + Operator::Eq => { + let lower = + match preimage_func.preimage_cast(&lit_value, &expr_type, Operator::GtEq) + { + Some(v) => v, + None => { + return internal_err!("Could not cast literal to the column type") + } + }; + let upper = + match preimage_func.preimage_cast(&lit_value, &expr_type, Operator::LtEq) + { + Some(v) => v, + None => { + return internal_err!("Could not cast literal to the column type") + } + }; + and( + Expr::BinaryExpr(BinaryExpr { + left: expr.clone(), + op: Operator::GtEq, + right: Box::new(lit(lower)), + }), + Expr::BinaryExpr(BinaryExpr { + left: expr, + op: Operator::Lt, + right: Box::new(lit(upper)), + }), + ) + } + Operator::NotEq => { + let lower = + match preimage_func.preimage_cast(&lit_value, &expr_type, Operator::Lt) { + Some(v) => v, + None => { + return internal_err!("Could not cast literal to the column type") + } + }; + let upper = + match preimage_func.preimage_cast(&lit_value, &expr_type, Operator::Gt) { + Some(v) => v, + None => { + return internal_err!("Could not cast literal to the column type") + } + }; + or( + Expr::BinaryExpr(BinaryExpr { + left: expr.clone(), + op: Operator::Lt, + right: Box::new(lit(lower)), + }), + Expr::BinaryExpr(BinaryExpr { + left: expr, + op: Operator::GtEq, + right: Box::new(lit(upper)), + }), + ) + } + _ => return internal_err!("Expect comparison operators"), + }; + Ok(Transformed::yes(rewritten_expr)) +} + +pub(super) fn is_scalar_udf_expr_and_support_preimage_in_comparison_for_binary< + S: SimplifyInfo, +>( + info: &S, + expr: &Expr, + op: Operator, + literal: &Expr, +) -> bool { + let (func, args, lit_value) = match (expr, op, literal) { + ( + Expr::ScalarFunction(ScalarFunction { func, args }), + Operator::Eq + | Operator::NotEq + | Operator::Gt + | Operator::Lt + | Operator::GtEq + | Operator::LtEq, + Expr::Literal(lit_value, _), + ) => (func, args, lit_value), + _ => return false, + }; + + match func.name() { + "date_part" => { + let left_expr = Box::new(args[1].clone()); + let Some(ScalarValue::Utf8(Some(part))) = args[0].as_literal() else { + return false; + }; + match IntervalUnit::from_str(part) { + Ok(IntervalUnit::Year) => {} + _ => return false, + }; + let Ok(expr_type) = info.get_data_type(&left_expr) else { + return false; + }; + let Ok(_lit_type) = info.get_data_type(literal) else { + return false; + }; + DatePartFunc::new() + .preimage_cast(lit_value, &expr_type, op) + .is_some() + } + _ => false, Review Comment: Matching by `func.name()` again and in general this should not be within the crate, but inside the `ScalarUDFImpl`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
