This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 93b21bdcd3 Enable non-uniform field type for structs created in
DataFusion (#8463)
93b21bdcd3 is described below
commit 93b21bdcd3d465ed78b610b54edf1418a47fc497
Author: Dan Lovell <[email protected]>
AuthorDate: Mon Dec 11 06:21:24 2023 -0500
Enable non-uniform field type for structs created in DataFusion (#8463)
* feat: struct: implement variadic_any solution, enable all struct field
types
* fix: run cargo-fmt
* cln: remove unused imports
---
datafusion/expr/src/built_in_function.rs | 8 ++---
datafusion/physical-expr/src/struct_expressions.rs | 35 ++++++----------------
datafusion/sqllogictest/test_files/struct.slt | 11 +++++++
3 files changed, 22 insertions(+), 32 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 977b556b26..5a903a73ad 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -28,8 +28,7 @@ use crate::signature::TIMEZONE_WILDCARD;
use crate::type_coercion::binary::get_wider_type;
use crate::type_coercion::functions::data_types;
use crate::{
- conditional_expressions, struct_expressions, FuncMonotonicity, Signature,
- TypeSignature, Volatility,
+ conditional_expressions, FuncMonotonicity, Signature, TypeSignature,
Volatility,
};
use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
@@ -971,10 +970,7 @@ impl BuiltinScalarFunction {
],
self.volatility(),
),
- BuiltinScalarFunction::Struct => Signature::variadic(
- struct_expressions::SUPPORTED_STRUCT_TYPES.to_vec(),
- self.volatility(),
- ),
+ BuiltinScalarFunction::Struct =>
Signature::variadic_any(self.volatility()),
BuiltinScalarFunction::Concat
| BuiltinScalarFunction::ConcatWithSeparator => {
Signature::variadic(vec![Utf8], self.volatility())
diff --git a/datafusion/physical-expr/src/struct_expressions.rs
b/datafusion/physical-expr/src/struct_expressions.rs
index 0eed1d16fb..b0ccb2a3cc 100644
--- a/datafusion/physical-expr/src/struct_expressions.rs
+++ b/datafusion/physical-expr/src/struct_expressions.rs
@@ -18,8 +18,8 @@
//! Struct expressions
use arrow::array::*;
-use arrow::datatypes::{DataType, Field};
-use datafusion_common::{exec_err, not_impl_err, DataFusionError, Result};
+use arrow::datatypes::Field;
+use datafusion_common::{exec_err, DataFusionError, Result};
use datafusion_expr::ColumnarValue;
use std::sync::Arc;
@@ -34,31 +34,14 @@ fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
.enumerate()
.map(|(i, arg)| {
let field_name = format!("c{i}");
- match arg.data_type() {
- DataType::Utf8
- | DataType::LargeUtf8
- | DataType::Boolean
- | DataType::Float32
- | DataType::Float64
- | DataType::Int8
- | DataType::Int16
- | DataType::Int32
- | DataType::Int64
- | DataType::UInt8
- | DataType::UInt16
- | DataType::UInt32
- | DataType::UInt64 => Ok((
- Arc::new(Field::new(
- field_name.as_str(),
- arg.data_type().clone(),
- true,
- )),
- arg.clone(),
+ Ok((
+ Arc::new(Field::new(
+ field_name.as_str(),
+ arg.data_type().clone(),
+ true,
)),
- data_type => {
- not_impl_err!("Struct is not implemented for type
'{data_type:?}'.")
- }
- }
+ arg.clone(),
+ ))
})
.collect::<Result<Vec<_>>>()?;
diff --git a/datafusion/sqllogictest/test_files/struct.slt
b/datafusion/sqllogictest/test_files/struct.slt
index fc14798a3b..936dedcc89 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -58,5 +58,16 @@ select struct(a, b, c) from values;
{c0: 2, c1: 2.2, c2: b}
{c0: 3, c1: 3.3, c2: c}
+# explain struct scalar function with columns #1
+query TT
+explain select struct(a, b, c) from values;
+----
+logical_plan
+Projection: struct(values.a, values.b, values.c)
+--TableScan: values projection=[a, b, c]
+physical_plan
+ProjectionExec: expr=[struct(a@0, b@1, c@2) as
struct(values.a,values.b,values.c)]
+--MemoryExec: partitions=1, partition_sizes=[1]
+
statement ok
drop table values;