This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new f229dcc08b move ArrayDims, ArrayNdims and Cardinality to
datafusion-function-crate (#9425)
f229dcc08b is described below
commit f229dcc08b581fd863eb21885ba393d39a0d554b
Author: Alex Huang <[email protected]>
AuthorDate: Sun Mar 3 11:01:38 2024 +0800
move ArrayDims, ArrayNdims and Cardinality to datafusion-function-crate
(#9425)
* Update array functions and remove ArrayDims and Cardinality
* move ArrayNdims function
* add roundtrip tests
---
datafusion/expr/src/built_in_function.rs | 20 ---
datafusion/expr/src/expr_fn.rs | 22 ---
datafusion/functions-array/src/kernels.rs | 139 +++++++++++++++--
datafusion/functions-array/src/lib.rs | 6 +
datafusion/functions-array/src/udf.rs | 165 +++++++++++++++++++++
datafusion/physical-expr/src/array_expressions.rs | 122 ---------------
datafusion/physical-expr/src/functions.rs | 9 --
datafusion/proto/proto/datafusion.proto | 6 +-
datafusion/proto/src/generated/pbjson.rs | 9 --
datafusion/proto/src/generated/prost.rs | 12 +-
datafusion/proto/src/logical_plan/from_proto.rs | 30 ++--
datafusion/proto/src/logical_plan/to_proto.rs | 3 -
.../proto/tests/cases/roundtrip_logical_plan.rs | 5 +
13 files changed, 320 insertions(+), 228 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 91e3acd0f7..e658cde4dd 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -130,8 +130,6 @@ pub enum BuiltinScalarFunction {
ArrayPopFront,
/// array_pop_back
ArrayPopBack,
- /// array_dims
- ArrayDims,
/// array_distinct
ArrayDistinct,
/// array_element
@@ -140,8 +138,6 @@ pub enum BuiltinScalarFunction {
ArrayEmpty,
/// array_length
ArrayLength,
- /// array_ndims
- ArrayNdims,
/// array_position
ArrayPosition,
/// array_positions
@@ -172,8 +168,6 @@ pub enum BuiltinScalarFunction {
ArrayUnion,
/// array_except
ArrayExcept,
- /// cardinality
- Cardinality,
/// array_resize
ArrayResize,
/// construct an array from columns
@@ -385,12 +379,10 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable,
BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable,
BuiltinScalarFunction::ArrayHas => Volatility::Immutable,
- BuiltinScalarFunction::ArrayDims => Volatility::Immutable,
BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable,
BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
BuiltinScalarFunction::ArrayExcept => Volatility::Immutable,
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
- BuiltinScalarFunction::ArrayNdims => Volatility::Immutable,
BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable,
BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
@@ -409,7 +401,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
BuiltinScalarFunction::ArrayResize => Volatility::Immutable,
- BuiltinScalarFunction::Cardinality => Volatility::Immutable,
BuiltinScalarFunction::MakeArray => Volatility::Immutable,
BuiltinScalarFunction::Ascii => Volatility::Immutable,
BuiltinScalarFunction::BitLength => Volatility::Immutable,
@@ -561,9 +552,6 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::ArrayHasAny
| BuiltinScalarFunction::ArrayHas
| BuiltinScalarFunction::ArrayEmpty => Ok(Boolean),
- BuiltinScalarFunction::ArrayDims => {
- Ok(List(Arc::new(Field::new("item", UInt64, true))))
- }
BuiltinScalarFunction::ArrayDistinct =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] {
List(field)
@@ -574,7 +562,6 @@ impl BuiltinScalarFunction {
),
},
BuiltinScalarFunction::ArrayLength => Ok(UInt64),
- BuiltinScalarFunction::ArrayNdims => Ok(UInt64),
BuiltinScalarFunction::ArrayPopFront =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayPopBack =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
@@ -622,7 +609,6 @@ impl BuiltinScalarFunction {
(dt, _) => Ok(dt),
}
}
- BuiltinScalarFunction::Cardinality => Ok(UInt64),
BuiltinScalarFunction::MakeArray => match input_expr_types.len() {
0 => Ok(List(Arc::new(Field::new("item", Null, true)))),
_ => {
@@ -884,7 +870,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayConcat => {
Signature::variadic_any(self.volatility())
}
- BuiltinScalarFunction::ArrayDims =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayEmpty =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayElement => {
Signature::array_and_index(self.volatility())
@@ -900,7 +885,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayLength => {
Signature::variadic_any(self.volatility())
}
- BuiltinScalarFunction::ArrayNdims =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayDistinct =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayPosition => {
Signature::array_and_element_and_optional_index(self.volatility())
@@ -931,7 +915,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayIntersect => Signature::any(2,
self.volatility()),
BuiltinScalarFunction::ArrayUnion => Signature::any(2,
self.volatility()),
- BuiltinScalarFunction::Cardinality =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayResize => {
Signature::variadic_any(self.volatility())
}
@@ -1481,7 +1464,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayConcat => {
&["array_concat", "array_cat", "list_concat", "list_cat"]
}
- BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"],
BuiltinScalarFunction::ArrayDistinct => &["array_distinct",
"list_distinct"],
BuiltinScalarFunction::ArrayEmpty => &["empty"],
BuiltinScalarFunction::ArrayElement => &[
@@ -1498,7 +1480,6 @@ impl BuiltinScalarFunction {
&["array_has", "list_has", "array_contains", "list_contains"]
}
BuiltinScalarFunction::ArrayLength => &["array_length",
"list_length"],
- BuiltinScalarFunction::ArrayNdims => &["array_ndims",
"list_ndims"],
BuiltinScalarFunction::ArrayPopFront => {
&["array_pop_front", "list_pop_front"]
}
@@ -1534,7 +1515,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReverse => &["array_reverse",
"list_reverse"],
BuiltinScalarFunction::ArraySlice => &["array_slice",
"list_slice"],
BuiltinScalarFunction::ArrayUnion => &["array_union",
"list_union"],
- BuiltinScalarFunction::Cardinality => &["cardinality"],
BuiltinScalarFunction::ArrayResize => &["array_resize",
"list_resize"],
BuiltinScalarFunction::MakeArray => &["make_array", "make_list"],
BuiltinScalarFunction::ArrayIntersect => {
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 157b8b0989..07844c27fe 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -628,12 +628,6 @@ scalar_expr!(
array,
"flattens an array of arrays into a single array."
);
-scalar_expr!(
- ArrayDims,
- array_dims,
- array,
- "returns an array of the array's dimensions."
-);
scalar_expr!(
ArrayElement,
array_element,
@@ -652,12 +646,6 @@ scalar_expr!(
array dimension,
"returns the length of the array dimension."
);
-scalar_expr!(
- ArrayNdims,
- array_ndims,
- array,
- "returns the number of dimensions of the array."
-);
scalar_expr!(
ArrayDistinct,
array_distinct,
@@ -738,13 +726,6 @@ scalar_expr!(
);
scalar_expr!(ArrayUnion, array_union, array1 array2, "returns an array of the
elements in the union of array1 and array2 without duplicates.");
-scalar_expr!(
- Cardinality,
- cardinality,
- array,
- "returns the total number of elements in the array."
-);
-
scalar_expr!(
ArrayResize,
array_resize,
@@ -1389,9 +1370,7 @@ mod test {
test_scalar_expr!(ArraySort, array_sort, array, desc, null_first);
test_scalar_expr!(ArrayPopFront, array_pop_front, array);
test_scalar_expr!(ArrayPopBack, array_pop_back, array);
- test_unary_scalar_expr!(ArrayDims, array_dims);
test_scalar_expr!(ArrayLength, array_length, array, dimension);
- test_unary_scalar_expr!(ArrayNdims, array_ndims);
test_scalar_expr!(ArrayPosition, array_position, array, element,
index);
test_scalar_expr!(ArrayPositions, array_positions, array, element);
test_scalar_expr!(ArrayPrepend, array_prepend, array, element);
@@ -1402,7 +1381,6 @@ mod test {
test_scalar_expr!(ArrayReplace, array_replace, array, from, to);
test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to,
max);
test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to);
- test_unary_scalar_expr!(Cardinality, cardinality);
test_nary_scalar_expr!(MakeArray, array, input);
test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);
diff --git a/datafusion/functions-array/src/kernels.rs
b/datafusion/functions-array/src/kernels.rs
index b9a68b4666..8631e42a0f 100644
--- a/datafusion/functions-array/src/kernels.rs
+++ b/datafusion/functions-array/src/kernels.rs
@@ -17,16 +17,19 @@
//! implementation kernels for array functions
+use arrow::array::ListArray;
use arrow::array::{
Array, ArrayRef, BooleanArray, Float32Array, Float64Array,
GenericListArray,
Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray,
OffsetSizeTrait,
StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
};
-use arrow::datatypes::DataType;
+use arrow::buffer::OffsetBuffer;
+use arrow::datatypes::Field;
+use arrow::datatypes::{DataType, UInt64Type};
use datafusion_common::cast::{
as_int64_array, as_large_list_array, as_list_array, as_string_array,
};
-use datafusion_common::{exec_err, DataFusionError};
+use datafusion_common::{exec_err, DataFusionError, Result};
use std::any::type_name;
use std::sync::Arc;
macro_rules! downcast_arg {
@@ -102,7 +105,7 @@ macro_rules! call_array_function {
}
/// Array_to_string SQL function
-pub(super) fn array_to_string(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub(super) fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() < 2 || args.len() > 3 {
return exec_err!("array_to_string expects two or three arguments");
}
@@ -254,9 +257,6 @@ pub(super) fn array_to_string(args: &[ArrayRef]) ->
datafusion_common::Result<Ar
Ok(Arc::new(string_arr))
}
-use arrow::array::ListArray;
-use arrow::buffer::OffsetBuffer;
-use arrow::datatypes::Field;
/// Generates an array of integers from start to stop with a given step.
///
/// This function takes 1 to 3 ArrayRefs as arguments, representing start,
stop, and step values.
@@ -271,10 +271,7 @@ use arrow::datatypes::Field;
/// gen_range(3) => [0, 1, 2]
/// gen_range(1, 4) => [1, 2, 3]
/// gen_range(1, 7, 2) => [1, 3, 5]
-pub fn gen_range(
- args: &[ArrayRef],
- include_upper: i64,
-) -> datafusion_common::Result<ArrayRef> {
+pub fn gen_range(args: &[ArrayRef], include_upper: i64) -> Result<ArrayRef> {
let (start_array, stop_array, step_array) = match args.len() {
1 => (None, as_int64_array(&args[0])?, None),
2 => (
@@ -319,3 +316,125 @@ pub fn gen_range(
)?);
Ok(arr)
}
+
+/// Returns the length of each array dimension
+fn compute_array_dims(arr: Option<ArrayRef>) ->
Result<Option<Vec<Option<u64>>>> {
+ let mut value = match arr {
+ Some(arr) => arr,
+ None => return Ok(None),
+ };
+ if value.is_empty() {
+ return Ok(None);
+ }
+ let mut res = vec![Some(value.len() as u64)];
+
+ loop {
+ match value.data_type() {
+ DataType::List(..) => {
+ value = downcast_arg!(value, ListArray).value(0);
+ res.push(Some(value.len() as u64));
+ }
+ _ => return Ok(Some(res)),
+ }
+ }
+}
+
+fn generic_list_cardinality<O: OffsetSizeTrait>(
+ array: &GenericListArray<O>,
+) -> Result<ArrayRef> {
+ let result = array
+ .iter()
+ .map(|arr| match compute_array_dims(arr)? {
+ Some(vector) => Ok(Some(vector.iter().map(|x|
x.unwrap()).product::<u64>())),
+ None => Ok(None),
+ })
+ .collect::<Result<UInt64Array>>()?;
+ Ok(Arc::new(result) as ArrayRef)
+}
+
+/// Cardinality SQL function
+pub fn cardinality(args: &[ArrayRef]) -> Result<ArrayRef> {
+ if args.len() != 1 {
+ return exec_err!("cardinality expects one argument");
+ }
+
+ match &args[0].data_type() {
+ DataType::List(_) => {
+ let list_array = as_list_array(&args[0])?;
+ generic_list_cardinality::<i32>(list_array)
+ }
+ DataType::LargeList(_) => {
+ let list_array = as_large_list_array(&args[0])?;
+ generic_list_cardinality::<i64>(list_array)
+ }
+ other => {
+ exec_err!("cardinality does not support type '{:?}'", other)
+ }
+ }
+}
+
+/// Array_dims SQL function
+pub fn array_dims(args: &[ArrayRef]) -> Result<ArrayRef> {
+ if args.len() != 1 {
+ return exec_err!("array_dims needs one argument");
+ }
+
+ let data = match args[0].data_type() {
+ DataType::List(_) => {
+ let array = as_list_array(&args[0])?;
+ array
+ .iter()
+ .map(compute_array_dims)
+ .collect::<Result<Vec<_>>>()?
+ }
+ DataType::LargeList(_) => {
+ let array = as_large_list_array(&args[0])?;
+ array
+ .iter()
+ .map(compute_array_dims)
+ .collect::<Result<Vec<_>>>()?
+ }
+ array_type => {
+ return exec_err!("array_dims does not support type
'{array_type:?}'");
+ }
+ };
+
+ let result = ListArray::from_iter_primitive::<UInt64Type, _, _>(data);
+
+ Ok(Arc::new(result) as ArrayRef)
+}
+
+/// Array_ndims SQL function
+pub fn array_ndims(args: &[ArrayRef]) -> Result<ArrayRef> {
+ if args.len() != 1 {
+ return exec_err!("array_ndims needs one argument");
+ }
+
+ fn general_list_ndims<O: OffsetSizeTrait>(
+ array: &GenericListArray<O>,
+ ) -> Result<ArrayRef> {
+ let mut data = Vec::new();
+ let ndims = datafusion_common::utils::list_ndims(array.data_type());
+
+ for arr in array.iter() {
+ if arr.is_some() {
+ data.push(Some(ndims))
+ } else {
+ data.push(None)
+ }
+ }
+
+ Ok(Arc::new(UInt64Array::from(data)) as ArrayRef)
+ }
+ match args[0].data_type() {
+ DataType::List(_) => {
+ let array = as_list_array(&args[0])?;
+ general_list_ndims::<i32>(array)
+ }
+ DataType::LargeList(_) => {
+ let array = as_large_list_array(&args[0])?;
+ general_list_ndims::<i64>(array)
+ }
+ array_type => exec_err!("array_ndims does not support type
{array_type:?}"),
+ }
+}
diff --git a/datafusion/functions-array/src/lib.rs
b/datafusion/functions-array/src/lib.rs
index e3515ccf9f..e4cdf69aa9 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-array/src/lib.rs
@@ -39,7 +39,10 @@ use std::sync::Arc;
/// Fluent-style API for creating `Expr`s
pub mod expr_fn {
+ pub use super::udf::array_dims;
+ pub use super::udf::array_ndims;
pub use super::udf::array_to_string;
+ pub use super::udf::cardinality;
pub use super::udf::gen_series;
pub use super::udf::range;
}
@@ -50,6 +53,9 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) ->
Result<()> {
udf::array_to_string_udf(),
udf::range_udf(),
udf::gen_series_udf(),
+ udf::array_dims_udf(),
+ udf::cardinality_udf(),
+ udf::array_ndims_udf(),
];
functions.into_iter().try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
diff --git a/datafusion/functions-array/src/udf.rs
b/datafusion/functions-array/src/udf.rs
index 17769419c0..a35b454647 100644
--- a/datafusion/functions-array/src/udf.rs
+++ b/datafusion/functions-array/src/udf.rs
@@ -202,3 +202,168 @@ impl ScalarUDFImpl for GenSeries {
&self.aliases
}
}
+
+make_udf_function!(
+ ArrayDims,
+ array_dims,
+ array,
+ "returns an array of the array's dimensions.",
+ array_dims_udf
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayDims {
+ signature: Signature,
+ aliases: Vec<String>,
+}
+
+impl ArrayDims {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::array(Volatility::Immutable),
+ aliases: vec!["array_dims".to_string(), "list_dims".to_string()],
+ }
+ }
+}
+
+impl ScalarUDFImpl for ArrayDims {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+ fn name(&self) -> &str {
+ "array_dims"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ use DataType::*;
+ Ok(match arg_types[0] {
+ List(_) | LargeList(_) | FixedSizeList(_, _) => {
+ List(Arc::new(Field::new("item", UInt64, true)))
+ }
+ _ => {
+ return plan_err!("The array_dims function can only accept
List/LargeList/FixedSizeList.");
+ }
+ })
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ let args = ColumnarValue::values_to_arrays(args)?;
+ crate::kernels::array_dims(&args).map(ColumnarValue::Array)
+ }
+
+ fn aliases(&self) -> &[String] {
+ &self.aliases
+ }
+}
+
+make_udf_function!(
+ Cardinality,
+ cardinality,
+ array,
+ "returns the total number of elements in the array.",
+ cardinality_udf
+);
+
+impl Cardinality {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::array(Volatility::Immutable),
+ aliases: vec![String::from("cardinality")],
+ }
+ }
+}
+
+#[derive(Debug)]
+pub(super) struct Cardinality {
+ signature: Signature,
+ aliases: Vec<String>,
+}
+impl ScalarUDFImpl for Cardinality {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+ fn name(&self) -> &str {
+ "cardinality"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ use DataType::*;
+ Ok(match arg_types[0] {
+ List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
+ _ => {
+ return plan_err!("The cardinality function can only accept
List/LargeList/FixedSizeList.");
+ }
+ })
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ let args = ColumnarValue::values_to_arrays(args)?;
+ crate::kernels::cardinality(&args).map(ColumnarValue::Array)
+ }
+
+ fn aliases(&self) -> &[String] {
+ &self.aliases
+ }
+}
+
+make_udf_function!(
+ ArrayNdims,
+ array_ndims,
+ array,
+ "returns the number of dimensions of the array.",
+ array_ndims_udf
+);
+
+#[derive(Debug)]
+pub(super) struct ArrayNdims {
+ signature: Signature,
+ aliases: Vec<String>,
+}
+impl ArrayNdims {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::array(Volatility::Immutable),
+ aliases: vec![String::from("array_ndims"),
String::from("list_ndims")],
+ }
+ }
+}
+
+impl ScalarUDFImpl for ArrayNdims {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+ fn name(&self) -> &str {
+ "array_ndims"
+ }
+
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+
+ fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ use DataType::*;
+ Ok(match arg_types[0] {
+ List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
+ _ => {
+ return plan_err!("The array_ndims function can only accept
List/LargeList/FixedSizeList.");
+ }
+ })
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ let args = ColumnarValue::values_to_arrays(args)?;
+ crate::kernels::array_ndims(&args).map(ColumnarValue::Array)
+ }
+
+ fn aliases(&self) -> &[String] {
+ &self.aliases
+ }
+}
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 01b2ae13c8..c10f5df540 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -193,28 +193,6 @@ fn compute_array_length(
}
}
-/// Returns the length of each array dimension
-fn compute_array_dims(arr: Option<ArrayRef>) ->
Result<Option<Vec<Option<u64>>>> {
- let mut value = match arr {
- Some(arr) => arr,
- None => return Ok(None),
- };
- if value.is_empty() {
- return Ok(None);
- }
- let mut res = vec![Some(value.len() as u64)];
-
- loop {
- match value.data_type() {
- DataType::List(..) => {
- value = downcast_arg!(value, ListArray).value(0);
- res.push(Some(value.len() as u64));
- }
- _ => return Ok(Some(res)),
- }
- }
-}
-
fn check_datatypes(name: &str, args: &[&ArrayRef]) -> Result<()> {
let data_type = args[0].data_type();
if !args.iter().all(|arg| {
@@ -1938,40 +1916,6 @@ pub fn array_intersect(args: &[ArrayRef]) ->
Result<ArrayRef> {
general_set_op(array1, array2, SetOp::Intersect)
}
-/// Cardinality SQL function
-pub fn cardinality(args: &[ArrayRef]) -> Result<ArrayRef> {
- if args.len() != 1 {
- return exec_err!("cardinality expects one argument");
- }
-
- match &args[0].data_type() {
- DataType::List(_) => {
- let list_array = as_list_array(&args[0])?;
- generic_list_cardinality::<i32>(list_array)
- }
- DataType::LargeList(_) => {
- let list_array = as_large_list_array(&args[0])?;
- generic_list_cardinality::<i64>(list_array)
- }
- other => {
- exec_err!("cardinality does not support type '{:?}'", other)
- }
- }
-}
-
-fn generic_list_cardinality<O: OffsetSizeTrait>(
- array: &GenericListArray<O>,
-) -> Result<ArrayRef> {
- let result = array
- .iter()
- .map(|arr| match compute_array_dims(arr)? {
- Some(vector) => Ok(Some(vector.iter().map(|x|
x.unwrap()).product::<u64>())),
- None => Ok(None),
- })
- .collect::<Result<UInt64Array>>()?;
- Ok(Arc::new(result) as ArrayRef)
-}
-
// Create new offsets that are euqiavlent to `flatten` the array.
fn get_offsets_for_flatten<O: OffsetSizeTrait>(
offsets: OffsetBuffer<O>,
@@ -2074,72 +2018,6 @@ pub fn array_length(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-/// Array_dims SQL function
-pub fn array_dims(args: &[ArrayRef]) -> Result<ArrayRef> {
- if args.len() != 1 {
- return exec_err!("array_dims needs one argument");
- }
-
- let data = match args[0].data_type() {
- DataType::List(_) => {
- let array = as_list_array(&args[0])?;
- array
- .iter()
- .map(compute_array_dims)
- .collect::<Result<Vec<_>>>()?
- }
- DataType::LargeList(_) => {
- let array = as_large_list_array(&args[0])?;
- array
- .iter()
- .map(compute_array_dims)
- .collect::<Result<Vec<_>>>()?
- }
- array_type => {
- return exec_err!("array_dims does not support type
'{array_type:?}'");
- }
- };
-
- let result = ListArray::from_iter_primitive::<UInt64Type, _, _>(data);
-
- Ok(Arc::new(result) as ArrayRef)
-}
-
-/// Array_ndims SQL function
-pub fn array_ndims(args: &[ArrayRef]) -> Result<ArrayRef> {
- if args.len() != 1 {
- return exec_err!("array_ndims needs one argument");
- }
-
- fn general_list_ndims<O: OffsetSizeTrait>(
- array: &GenericListArray<O>,
- ) -> Result<ArrayRef> {
- let mut data = Vec::new();
- let ndims = datafusion_common::utils::list_ndims(array.data_type());
-
- for arr in array.iter() {
- if arr.is_some() {
- data.push(Some(ndims))
- } else {
- data.push(None)
- }
- }
-
- Ok(Arc::new(UInt64Array::from(data)) as ArrayRef)
- }
- match args[0].data_type() {
- DataType::List(_) => {
- let array = as_list_array(&args[0])?;
- general_list_ndims::<i32>(array)
- }
- DataType::LargeList(_) => {
- let array = as_large_list_array(&args[0])?;
- general_list_ndims::<i64>(array)
- }
- array_type => exec_err!("array_ndims does not support type
{array_type:?}"),
- }
-}
-
/// Represents the type of comparison for array_has.
#[derive(Debug, PartialEq)]
enum ComparisonType {
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 56ad92082d..d9d1d704db 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -339,9 +339,6 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayHas => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_has)(args)
}),
- BuiltinScalarFunction::ArrayDims => Arc::new(|args| {
- make_scalar_function_inner(array_expressions::array_dims)(args)
- }),
BuiltinScalarFunction::ArrayDistinct => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_distinct)(args)
}),
@@ -357,9 +354,6 @@ pub fn create_physical_fun(
BuiltinScalarFunction::Flatten => {
Arc::new(|args|
make_scalar_function_inner(array_expressions::flatten)(args))
}
- BuiltinScalarFunction::ArrayNdims => Arc::new(|args| {
- make_scalar_function_inner(array_expressions::array_ndims)(args)
- }),
BuiltinScalarFunction::ArrayPopFront => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_pop_front)(args)
}),
@@ -405,9 +399,6 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_intersect)(args)
}),
- BuiltinScalarFunction::Cardinality => Arc::new(|args| {
- make_scalar_function_inner(array_expressions::cardinality)(args)
- }),
BuiltinScalarFunction::ArrayResize => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_resize)(args)
}),
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index 528f977a94..ef2db5a75d 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -636,17 +636,17 @@ enum ScalarFunction {
Gcd = 85;
ArrayAppend = 86;
ArrayConcat = 87;
- ArrayDims = 88;
+ // 88 was ArrayDims
ArrayRepeat = 89;
ArrayLength = 90;
- ArrayNdims = 91;
+ // 91 was ArrayNdims
ArrayPosition = 92;
ArrayPositions = 93;
ArrayPrepend = 94;
ArrayRemove = 95;
ArrayReplace = 96;
// 97 was ArrayToString
- Cardinality = 98;
+ // 98 was Cardinality
ArrayElement = 99;
ArraySlice = 100;
Cot = 103;
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index d6ee204d5c..e9e1f27086 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22401,16 +22401,13 @@ impl serde::Serialize for ScalarFunction {
Self::Gcd => "Gcd",
Self::ArrayAppend => "ArrayAppend",
Self::ArrayConcat => "ArrayConcat",
- Self::ArrayDims => "ArrayDims",
Self::ArrayRepeat => "ArrayRepeat",
Self::ArrayLength => "ArrayLength",
- Self::ArrayNdims => "ArrayNdims",
Self::ArrayPosition => "ArrayPosition",
Self::ArrayPositions => "ArrayPositions",
Self::ArrayPrepend => "ArrayPrepend",
Self::ArrayRemove => "ArrayRemove",
Self::ArrayReplace => "ArrayReplace",
- Self::Cardinality => "Cardinality",
Self::ArrayElement => "ArrayElement",
Self::ArraySlice => "ArraySlice",
Self::Cot => "Cot",
@@ -22535,16 +22532,13 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Gcd",
"ArrayAppend",
"ArrayConcat",
- "ArrayDims",
"ArrayRepeat",
"ArrayLength",
- "ArrayNdims",
"ArrayPosition",
"ArrayPositions",
"ArrayPrepend",
"ArrayRemove",
"ArrayReplace",
- "Cardinality",
"ArrayElement",
"ArraySlice",
"Cot",
@@ -22698,16 +22692,13 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Gcd" => Ok(ScalarFunction::Gcd),
"ArrayAppend" => Ok(ScalarFunction::ArrayAppend),
"ArrayConcat" => Ok(ScalarFunction::ArrayConcat),
- "ArrayDims" => Ok(ScalarFunction::ArrayDims),
"ArrayRepeat" => Ok(ScalarFunction::ArrayRepeat),
"ArrayLength" => Ok(ScalarFunction::ArrayLength),
- "ArrayNdims" => Ok(ScalarFunction::ArrayNdims),
"ArrayPosition" => Ok(ScalarFunction::ArrayPosition),
"ArrayPositions" => Ok(ScalarFunction::ArrayPositions),
"ArrayPrepend" => Ok(ScalarFunction::ArrayPrepend),
"ArrayRemove" => Ok(ScalarFunction::ArrayRemove),
"ArrayReplace" => Ok(ScalarFunction::ArrayReplace),
- "Cardinality" => Ok(ScalarFunction::Cardinality),
"ArrayElement" => Ok(ScalarFunction::ArrayElement),
"ArraySlice" => Ok(ScalarFunction::ArraySlice),
"Cot" => Ok(ScalarFunction::Cot),
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index 432dd4a8a6..eca0156dd2 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2725,17 +2725,17 @@ pub enum ScalarFunction {
Gcd = 85,
ArrayAppend = 86,
ArrayConcat = 87,
- ArrayDims = 88,
+ /// 88 was ArrayDims
ArrayRepeat = 89,
ArrayLength = 90,
- ArrayNdims = 91,
+ /// 91 was ArrayNdims
ArrayPosition = 92,
ArrayPositions = 93,
ArrayPrepend = 94,
ArrayRemove = 95,
ArrayReplace = 96,
/// 97 was ArrayToString
- Cardinality = 98,
+ /// 98 was Cardinality
ArrayElement = 99,
ArraySlice = 100,
Cot = 103,
@@ -2861,16 +2861,13 @@ impl ScalarFunction {
ScalarFunction::Gcd => "Gcd",
ScalarFunction::ArrayAppend => "ArrayAppend",
ScalarFunction::ArrayConcat => "ArrayConcat",
- ScalarFunction::ArrayDims => "ArrayDims",
ScalarFunction::ArrayRepeat => "ArrayRepeat",
ScalarFunction::ArrayLength => "ArrayLength",
- ScalarFunction::ArrayNdims => "ArrayNdims",
ScalarFunction::ArrayPosition => "ArrayPosition",
ScalarFunction::ArrayPositions => "ArrayPositions",
ScalarFunction::ArrayPrepend => "ArrayPrepend",
ScalarFunction::ArrayRemove => "ArrayRemove",
ScalarFunction::ArrayReplace => "ArrayReplace",
- ScalarFunction::Cardinality => "Cardinality",
ScalarFunction::ArrayElement => "ArrayElement",
ScalarFunction::ArraySlice => "ArraySlice",
ScalarFunction::Cot => "Cot",
@@ -2989,16 +2986,13 @@ impl ScalarFunction {
"Gcd" => Some(Self::Gcd),
"ArrayAppend" => Some(Self::ArrayAppend),
"ArrayConcat" => Some(Self::ArrayConcat),
- "ArrayDims" => Some(Self::ArrayDims),
"ArrayRepeat" => Some(Self::ArrayRepeat),
"ArrayLength" => Some(Self::ArrayLength),
- "ArrayNdims" => Some(Self::ArrayNdims),
"ArrayPosition" => Some(Self::ArrayPosition),
"ArrayPositions" => Some(Self::ArrayPositions),
"ArrayPrepend" => Some(Self::ArrayPrepend),
"ArrayRemove" => Some(Self::ArrayRemove),
"ArrayReplace" => Some(Self::ArrayReplace),
- "Cardinality" => Some(Self::Cardinality),
"ArrayElement" => Some(Self::ArrayElement),
"ArraySlice" => Some(Self::ArraySlice),
"Cot" => Some(Self::Cot),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index b1fd128d08..29e51e0373 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -47,15 +47,15 @@ use datafusion_common::{
use datafusion_expr::expr::Unnest;
use datafusion_expr::window_frame::{check_window_frame,
regularize_window_order_by};
use datafusion_expr::{
- acosh, array, array_append, array_concat, array_dims, array_distinct,
array_element,
- array_empty, array_except, array_has, array_has_all, array_has_any,
array_intersect,
- array_length, array_ndims, array_pop_back, array_pop_front, array_position,
- array_positions, array_prepend, array_remove, array_remove_all,
array_remove_n,
- array_repeat, array_replace, array_replace_all, array_replace_n,
array_resize,
- array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan,
atan2, atanh,
- bit_length, btrim, cardinality, cbrt, ceil, character_length, chr,
coalesce,
- concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time,
date_bin,
- date_part, date_trunc, degrees, digest, ends_with, exp,
+ acosh, array, array_append, array_concat, array_distinct, array_element,
array_empty,
+ array_except, array_has, array_has_all, array_has_any, array_intersect,
array_length,
+ array_pop_back, array_pop_front, array_position, array_positions,
array_prepend,
+ array_remove, array_remove_all, array_remove_n, array_repeat,
array_replace,
+ array_replace_all, array_replace_n, array_resize, array_slice, array_sort,
+ array_union, arrow_typeof, ascii, asinh, atan, atan2, atanh, bit_length,
btrim, cbrt,
+ ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos,
cosh, cot,
+ current_date, current_time, date_bin, date_part, date_trunc, degrees,
digest,
+ ends_with, exp,
expr::{self, InList, Sort, WindowFunction},
factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap,
instr, iszero,
lcm, left, levenshtein, ln, log, log10, log2,
@@ -486,12 +486,10 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::ArrayHasAll => Self::ArrayHasAll,
ScalarFunction::ArrayHasAny => Self::ArrayHasAny,
ScalarFunction::ArrayHas => Self::ArrayHas,
- ScalarFunction::ArrayDims => Self::ArrayDims,
ScalarFunction::ArrayDistinct => Self::ArrayDistinct,
ScalarFunction::ArrayElement => Self::ArrayElement,
ScalarFunction::Flatten => Self::Flatten,
ScalarFunction::ArrayLength => Self::ArrayLength,
- ScalarFunction::ArrayNdims => Self::ArrayNdims,
ScalarFunction::ArrayPopFront => Self::ArrayPopFront,
ScalarFunction::ArrayPopBack => Self::ArrayPopBack,
ScalarFunction::ArrayPosition => Self::ArrayPosition,
@@ -509,7 +507,6 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
ScalarFunction::ArrayUnion => Self::ArrayUnion,
ScalarFunction::ArrayResize => Self::ArrayResize,
- ScalarFunction::Cardinality => Self::Cardinality,
ScalarFunction::Array => Self::MakeArray,
ScalarFunction::DatePart => Self::DatePart,
ScalarFunction::DateTrunc => Self::DateTrunc,
@@ -1526,16 +1523,10 @@ pub fn parse_expr(
parse_expr(&args[2], registry, codec)?,
parse_expr(&args[3], registry, codec)?,
)),
- ScalarFunction::Cardinality => {
- Ok(cardinality(parse_expr(&args[0], registry, codec)?))
- }
ScalarFunction::ArrayLength => Ok(array_length(
parse_expr(&args[0], registry, codec)?,
parse_expr(&args[1], registry, codec)?,
)),
- ScalarFunction::ArrayDims => {
- Ok(array_dims(parse_expr(&args[0], registry, codec)?))
- }
ScalarFunction::ArrayDistinct => {
Ok(array_distinct(parse_expr(&args[0], registry, codec)?))
}
@@ -1546,9 +1537,6 @@ pub fn parse_expr(
ScalarFunction::ArrayEmpty => {
Ok(array_empty(parse_expr(&args[0], registry, codec)?))
}
- ScalarFunction::ArrayNdims => {
- Ok(array_ndims(parse_expr(&args[0], registry, codec)?))
- }
ScalarFunction::ArrayUnion => Ok(array_union(
parse_expr(&args[0], registry, codec)?,
parse_expr(&args[1], registry, codec)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index d238884374..424e1414af 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1463,12 +1463,10 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::ArrayHasAll => Self::ArrayHasAll,
BuiltinScalarFunction::ArrayHasAny => Self::ArrayHasAny,
BuiltinScalarFunction::ArrayHas => Self::ArrayHas,
- BuiltinScalarFunction::ArrayDims => Self::ArrayDims,
BuiltinScalarFunction::ArrayDistinct => Self::ArrayDistinct,
BuiltinScalarFunction::ArrayElement => Self::ArrayElement,
BuiltinScalarFunction::Flatten => Self::Flatten,
BuiltinScalarFunction::ArrayLength => Self::ArrayLength,
- BuiltinScalarFunction::ArrayNdims => Self::ArrayNdims,
BuiltinScalarFunction::ArrayPopFront => Self::ArrayPopFront,
BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack,
BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition,
@@ -1486,7 +1484,6 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::ArraySlice => Self::ArraySlice,
BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion,
- BuiltinScalarFunction::Cardinality => Self::Cardinality,
BuiltinScalarFunction::MakeArray => Self::Array,
BuiltinScalarFunction::DatePart => Self::DatePart,
BuiltinScalarFunction::DateTrunc => Self::DateTrunc,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 0ec44190ef..702ae99bab 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -583,6 +583,11 @@ async fn roundtrip_expr_api() -> Result<()> {
encode(col("a").cast_to(&DataType::Utf8, &schema)?, lit("hex")),
decode(lit("1234"), lit("hex")),
array_to_string(array(vec![lit(1), lit(2), lit(3)]), lit(",")),
+ array_dims(array(vec![lit(1), lit(2), lit(3)])),
+ array_ndims(array(vec![lit(1), lit(2), lit(3)])),
+ cardinality(array(vec![lit(1), lit(2), lit(3)])),
+ range(lit(1), lit(10), lit(2)),
+ gen_series(lit(1), lit(10), lit(2)),
];
// ensure expressions created with the expr api can be round tripped