This is an automated email from the ASF dual-hosted git repository. jayzhan pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push: new ec86acbc1f feat: expand `unnest` to accept arbitrary single array expression (#9342) ec86acbc1f is described below commit ec86acbc1fbc0da1e0bec9ad066a5177ec586c96 Author: Jonah Gao <jonah...@msn.com> AuthorDate: Mon Feb 26 21:33:19 2024 +0800 feat: expand `unnest` to accept arbitrary single array expression (#9342) * feat: expand `unnest` to accept any single array expression * unnest null * review feedback --- datafusion/sql/src/expr/function.rs | 58 ++++++++++++--------------- datafusion/sqllogictest/test_files/unnest.slt | 48 +++++++++++++++++----- 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index f56138066c..db572a23cf 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -16,16 +16,17 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; +use arrow_schema::DataType; use datafusion_common::{ - exec_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, DataFusionError, - Dependency, Result, + not_impl_err, plan_datafusion_err, plan_err, DFSchema, DataFusionError, Dependency, + Result, }; use datafusion_expr::expr::{ScalarFunction, Unnest}; use datafusion_expr::function::suggest_valid_function; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ - expr, AggregateFunction, BuiltinScalarFunction, Expr, ScalarFunctionDefinition, - WindowFrame, WindowFunctionDefinition, + expr, AggregateFunction, BuiltinScalarFunction, Expr, ExprSchemable, WindowFrame, + WindowFunctionDefinition, }; use sqlparser::ast::{ Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, WindowType, @@ -80,41 +81,34 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if name.eq("unnest") { let exprs = self.function_args_to_expr(args.clone(), schema, planner_context)?; - - match exprs.len() { + // Currently only one argument is supported + let arg = match exprs.len() { 0 => { - return exec_err!("unnest() requires at least one argument"); - } - 1 => { - if let Expr::ScalarFunction(ScalarFunction { - func_def: - ScalarFunctionDefinition::BuiltIn( - BuiltinScalarFunction::MakeArray, - ), - .. - }) = exprs[0] - { - // valid - } else if let Expr::Column(_) = exprs[0] { - // valid - } else if let Expr::ScalarFunction(ScalarFunction { - func_def: - ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Struct), - .. - }) = exprs[0] - { - return not_impl_err!("unnest() does not support struct yet"); - } else { - return plan_err!( - "unnest() can only be applied to array and structs and null" - ); - } + return plan_err!("unnest() requires at least one argument"); } + 1 => &exprs[0], _ => { return not_impl_err!( "unnest() does not support multiple arguments yet" ); } + }; + // Check argument type, array types are supported + match arg.get_type(schema)? { + DataType::List(_) + | DataType::LargeList(_) + | DataType::FixedSizeList(_, _) => {} + DataType::Struct(_) => { + return not_impl_err!("unnest() does not support struct yet"); + } + DataType::Null => { + return not_impl_err!("unnest() does not support null yet"); + } + _ => { + return plan_err!( + "unnest() can only be applied to array, struct and null" + ); + } } return Ok(Expr::Unnest(Unnest { exprs })); diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 7e4ce06be2..9990c00f75 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -36,7 +36,7 @@ select unnest([1,2,3]); 2 3 -query error DataFusion error: Error during planning: unnest\(\) can only be applied to array and structs and null +query error DataFusion error: This feature is not implemented: unnest\(\) does not support null yet select unnest(null); ## Unnest empty array @@ -71,27 +71,55 @@ NULL NULL ## Unnest column with scalars -# TODO: This should be an error, but unnest is able to process scalar values now. -query I +query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null select unnest(column3) from unnest_table; ----- -1 -2 -3 -NULL ## Unnest multiple columns query error DataFusion error: This feature is not implemented: Only support single unnest expression for now select unnest(column1), unnest(column2) from unnest_table; ## Unnest scalar -query error DataFusion error: Error during planning: unnest\(\) can only be applied to array and structs and null +query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null select unnest(1); ## Unnest empty expression -query error DataFusion error: Execution error: unnest\(\) requires at least one argument +query error DataFusion error: Error during planning: unnest\(\) requires at least one argument select unnest(); +## Unnest struct expression +query error DataFusion error: This feature is not implemented: unnest\(\) does not support struct yet +select unnest(struct(null)); + + +## Unnest array expression +query I +select unnest(range(1, 3)); +---- +1 +2 + +query I +select unnest(arrow_cast(range(1, 3), 'LargeList(Int64)')); +---- +1 +2 + +query I +select unnest(arrow_cast(range(1, 3), 'FixedSizeList(2, Int64)')); +---- +1 +2 + +query I +select unnest(array_remove(column1, 12)) from unnest_table; +---- +1 +2 +3 +4 +5 +6 + statement ok drop table unnest_table;