This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new d863853cd Add type coercion rule for `concat` and `concat_ws` (#3721)
d863853cd is described below
commit d863853cdf43a6d2b8e208fbf01c8987c46595d1
Author: Remzi Yang <[email protected]>
AuthorDate: Fri Oct 7 22:55:20 2022 +0800
Add type coercion rule for `concat` and `concat_ws` (#3721)
* concat type coercion
Signed-off-by: remzi <[email protected]>
* fix test
Signed-off-by: remzi <[email protected]>
* use builder function
Signed-off-by: remzi <[email protected]>
Signed-off-by: remzi <[email protected]>
---
datafusion/optimizer/src/type_coercion.rs | 61 +++++++++++++++++++++++++++++--
1 file changed, 58 insertions(+), 3 deletions(-)
diff --git a/datafusion/optimizer/src/type_coercion.rs
b/datafusion/optimizer/src/type_coercion.rs
index bb236fdde..f0470da87 100644
--- a/datafusion/optimizer/src/type_coercion.rs
+++ b/datafusion/optimizer/src/type_coercion.rs
@@ -29,8 +29,8 @@ use datafusion_expr::type_coercion::other::{
};
use datafusion_expr::utils::from_plan;
use datafusion_expr::{
- is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown,
Expr,
- LogicalPlan, Operator,
+ is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown,
+ BuiltinScalarFunction, Expr, LogicalPlan, Operator,
};
use datafusion_expr::{ExprSchemable, Signature};
use std::sync::Arc;
@@ -401,6 +401,20 @@ impl ExprRewriter for TypeCoercionRewriter {
}
}
}
+ Expr::ScalarFunction { fun, args } => match fun {
+ BuiltinScalarFunction::Concat
+ | BuiltinScalarFunction::ConcatWithSeparator => {
+ let new_args = args
+ .iter()
+ .map(|e| e.clone().cast_to(&DataType::Utf8,
&self.schema))
+ .collect::<Result<Vec<_>>>()?;
+ Ok(Expr::ScalarFunction {
+ fun,
+ args: new_args,
+ })
+ }
+ fun => Ok(Expr::ScalarFunction { fun, args }),
+ },
expr => Ok(expr),
}
}
@@ -449,7 +463,7 @@ mod test {
use arrow::datatypes::DataType;
use datafusion_common::{DFField, DFSchema, Result, ScalarValue};
use datafusion_expr::expr_rewriter::ExprRewritable;
- use datafusion_expr::{cast, col, is_true, ColumnarValue};
+ use datafusion_expr::{cast, col, concat, concat_ws, is_true,
ColumnarValue};
use datafusion_expr::{
lit,
logical_plan::{EmptyRelation, Projection},
@@ -782,6 +796,47 @@ mod test {
Ok(())
}
+ #[test]
+ fn concat_for_type_coercion() -> Result<()> {
+ let empty = empty_with_type(DataType::Utf8);
+ let args = [col("a"), lit("b"), lit(true), lit(false), lit(13)];
+
+ // concat
+ {
+ let expr = concat(&args);
+
+ let plan = LogicalPlan::Projection(Projection::try_new(
+ vec![expr],
+ empty.clone(),
+ None,
+ )?);
+ let rule = TypeCoercion::new();
+ let mut config = OptimizerConfig::default();
+ let plan = rule.optimize(&plan, &mut config).unwrap();
+ assert_eq!(
+ "Projection: concat(a, Utf8(\"b\"), CAST(Boolean(true) AS
Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS Utf8))\n EmptyRelation",
+ &format!("{:?}", plan)
+ );
+ }
+
+ // concat_ws
+ {
+ let expr = concat_ws("-", &args);
+
+ let plan =
+ LogicalPlan::Projection(Projection::try_new(vec![expr], empty,
None)?);
+ let rule = TypeCoercion::new();
+ let mut config = OptimizerConfig::default();
+ let plan = rule.optimize(&plan, &mut config).unwrap();
+ assert_eq!(
+ "Projection: concatwithseparator(Utf8(\"-\"), a, Utf8(\"b\"),
CAST(Boolean(true) AS Utf8), CAST(Boolean(false) AS Utf8), CAST(Int32(13) AS
Utf8))\n EmptyRelation",
+ &format!("{:?}", plan)
+ );
+ }
+
+ Ok(())
+ }
+
fn empty() -> Arc<LogicalPlan> {
Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,