This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new ec86acbc1f feat: expand `unnest` to accept arbitrary single array
expression (#9342)
ec86acbc1f is described below
commit ec86acbc1fbc0da1e0bec9ad066a5177ec586c96
Author: Jonah Gao <[email protected]>
AuthorDate: Mon Feb 26 21:33:19 2024 +0800
feat: expand `unnest` to accept arbitrary single array expression (#9342)
* feat: expand `unnest` to accept any single array expression
* unnest null
* review feedback
---
datafusion/sql/src/expr/function.rs | 58 ++++++++++++---------------
datafusion/sqllogictest/test_files/unnest.slt | 48 +++++++++++++++++-----
2 files changed, 64 insertions(+), 42 deletions(-)
diff --git a/datafusion/sql/src/expr/function.rs
b/datafusion/sql/src/expr/function.rs
index f56138066c..db572a23cf 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -16,16 +16,17 @@
// under the License.
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
+use arrow_schema::DataType;
use datafusion_common::{
- exec_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema,
DataFusionError,
- Dependency, Result,
+ not_impl_err, plan_datafusion_err, plan_err, DFSchema, DataFusionError,
Dependency,
+ Result,
};
use datafusion_expr::expr::{ScalarFunction, Unnest};
use datafusion_expr::function::suggest_valid_function;
use datafusion_expr::window_frame::{check_window_frame,
regularize_window_order_by};
use datafusion_expr::{
- expr, AggregateFunction, BuiltinScalarFunction, Expr,
ScalarFunctionDefinition,
- WindowFrame, WindowFunctionDefinition,
+ expr, AggregateFunction, BuiltinScalarFunction, Expr, ExprSchemable,
WindowFrame,
+ WindowFunctionDefinition,
};
use sqlparser::ast::{
Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr,
WindowType,
@@ -80,41 +81,34 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
if name.eq("unnest") {
let exprs =
self.function_args_to_expr(args.clone(), schema,
planner_context)?;
-
- match exprs.len() {
+ // Currently only one argument is supported
+ let arg = match exprs.len() {
0 => {
- return exec_err!("unnest() requires at least one
argument");
- }
- 1 => {
- if let Expr::ScalarFunction(ScalarFunction {
- func_def:
- ScalarFunctionDefinition::BuiltIn(
- BuiltinScalarFunction::MakeArray,
- ),
- ..
- }) = exprs[0]
- {
- // valid
- } else if let Expr::Column(_) = exprs[0] {
- // valid
- } else if let Expr::ScalarFunction(ScalarFunction {
- func_def:
-
ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Struct),
- ..
- }) = exprs[0]
- {
- return not_impl_err!("unnest() does not support struct
yet");
- } else {
- return plan_err!(
- "unnest() can only be applied to array and structs
and null"
- );
- }
+ return plan_err!("unnest() requires at least one
argument");
}
+ 1 => &exprs[0],
_ => {
return not_impl_err!(
"unnest() does not support multiple arguments yet"
);
}
+ };
+ // Check argument type, array types are supported
+ match arg.get_type(schema)? {
+ DataType::List(_)
+ | DataType::LargeList(_)
+ | DataType::FixedSizeList(_, _) => {}
+ DataType::Struct(_) => {
+ return not_impl_err!("unnest() does not support struct
yet");
+ }
+ DataType::Null => {
+ return not_impl_err!("unnest() does not support null yet");
+ }
+ _ => {
+ return plan_err!(
+ "unnest() can only be applied to array, struct and
null"
+ );
+ }
}
return Ok(Expr::Unnest(Unnest { exprs }));
diff --git a/datafusion/sqllogictest/test_files/unnest.slt
b/datafusion/sqllogictest/test_files/unnest.slt
index 7e4ce06be2..9990c00f75 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -36,7 +36,7 @@ select unnest([1,2,3]);
2
3
-query error DataFusion error: Error during planning: unnest\(\) can only be
applied to array and structs and null
+query error DataFusion error: This feature is not implemented: unnest\(\) does
not support null yet
select unnest(null);
## Unnest empty array
@@ -71,27 +71,55 @@ NULL
NULL
## Unnest column with scalars
-# TODO: This should be an error, but unnest is able to process scalar values
now.
-query I
+query error DataFusion error: Error during planning: unnest\(\) can only be
applied to array, struct and null
select unnest(column3) from unnest_table;
-----
-1
-2
-3
-NULL
## Unnest multiple columns
query error DataFusion error: This feature is not implemented: Only support
single unnest expression for now
select unnest(column1), unnest(column2) from unnest_table;
## Unnest scalar
-query error DataFusion error: Error during planning: unnest\(\) can only be
applied to array and structs and null
+query error DataFusion error: Error during planning: unnest\(\) can only be
applied to array, struct and null
select unnest(1);
## Unnest empty expression
-query error DataFusion error: Execution error: unnest\(\) requires at least
one argument
+query error DataFusion error: Error during planning: unnest\(\) requires at
least one argument
select unnest();
+## Unnest struct expression
+query error DataFusion error: This feature is not implemented: unnest\(\) does
not support struct yet
+select unnest(struct(null));
+
+
+## Unnest array expression
+query I
+select unnest(range(1, 3));
+----
+1
+2
+
+query I
+select unnest(arrow_cast(range(1, 3), 'LargeList(Int64)'));
+----
+1
+2
+
+query I
+select unnest(arrow_cast(range(1, 3), 'FixedSizeList(2, Int64)'));
+----
+1
+2
+
+query I
+select unnest(array_remove(column1, 12)) from unnest_table;
+----
+1
+2
+3
+4
+5
+6
+
statement ok
drop table unnest_table;