This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 8ebff9e9fb refactor: Apply minor refactorings to `functions-array`
crate (#9788)
8ebff9e9fb is described below
commit 8ebff9e9fb7ac365cc3be687f42f6315c2303fe4
Author: Eren Avsarogullari <[email protected]>
AuthorDate: Mon Mar 25 04:19:04 2024 -0700
refactor: Apply minor refactorings to `functions-array` crate (#9788)
* Issue-9787 - Apply minor refactorings to functions-array create
* Issue-9787 - Clean-up redundant datafusion_common::Result definitions
* Issue-9787 - Addressed review comment
---
datafusion/functions-array/src/array_has.rs | 14 +++---
datafusion/functions-array/src/cardinality.rs | 11 +++--
datafusion/functions-array/src/concat.rs | 10 ++--
datafusion/functions-array/src/empty.rs | 12 ++---
datafusion/functions-array/src/except.rs | 15 +++---
datafusion/functions-array/src/extract.rs | 37 +++++++--------
datafusion/functions-array/src/flatten.rs | 12 ++---
datafusion/functions-array/src/length.rs | 16 +++----
datafusion/functions-array/src/lib.rs | 6 +--
.../functions-array/src/{core.rs => make_array.rs} | 23 ++++------
datafusion/functions-array/src/position.rs | 42 +++++++----------
datafusion/functions-array/src/range.rs | 25 +++-------
datafusion/functions-array/src/remove.rs | 37 +++++++--------
datafusion/functions-array/src/repeat.rs | 13 +++---
datafusion/functions-array/src/replace.rs | 23 ++++++----
datafusion/functions-array/src/resize.rs | 15 +++---
datafusion/functions-array/src/reverse.rs | 18 ++++----
datafusion/functions-array/src/rewrite.rs | 3 +-
datafusion/functions-array/src/set_ops.rs | 53 +++++++++++-----------
datafusion/functions-array/src/sort.rs | 13 +++---
datafusion/functions-array/src/string.rs | 16 +++----
21 files changed, 195 insertions(+), 219 deletions(-)
diff --git a/datafusion/functions-array/src/array_has.rs
b/datafusion/functions-array/src/array_has.rs
index 17c0ad1619..4e4ebaf035 100644
--- a/datafusion/functions-array/src/array_has.rs
+++ b/datafusion/functions-array/src/array_has.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! [`ScalarUDFImpl`] definitions for array functions.
+//! [`ScalarUDFImpl`] definitions for array_has, array_has_all and
array_has_any functions.
use arrow::array::{Array, ArrayRef, BooleanArray, OffsetSizeTrait};
use arrow::datatypes::DataType;
@@ -85,11 +85,11 @@ impl ScalarUDFImpl for ArrayHas {
&self.signature
}
- fn return_type(&self, _: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Boolean)
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
if args.len() != 2 {
@@ -147,11 +147,11 @@ impl ScalarUDFImpl for ArrayHasAll {
&self.signature
}
- fn return_type(&self, _: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Boolean)
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
if args.len() != 2 {
return exec_err!("array_has_all needs two arguments");
@@ -204,11 +204,11 @@ impl ScalarUDFImpl for ArrayHasAny {
&self.signature
}
- fn return_type(&self, _: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Boolean)
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(args)?;
if args.len() != 2 {
diff --git a/datafusion/functions-array/src/cardinality.rs
b/datafusion/functions-array/src/cardinality.rs
index 483336fe08..ed9f8d01f9 100644
--- a/datafusion/functions-array/src/cardinality.rs
+++ b/datafusion/functions-array/src/cardinality.rs
@@ -22,6 +22,7 @@ use arrow_array::{ArrayRef, GenericListArray,
OffsetSizeTrait, UInt64Array};
use arrow_schema::DataType;
use arrow_schema::DataType::{FixedSizeList, LargeList, List, UInt64};
use datafusion_common::cast::{as_large_list_array, as_list_array};
+use datafusion_common::Result;
use datafusion_common::{exec_err, plan_err};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
@@ -62,7 +63,7 @@ impl ScalarUDFImpl for Cardinality {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
_ => {
@@ -71,7 +72,7 @@ impl ScalarUDFImpl for Cardinality {
})
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(cardinality_inner)(args)
}
@@ -81,7 +82,7 @@ impl ScalarUDFImpl for Cardinality {
}
/// Cardinality SQL function
-pub fn cardinality_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 {
return exec_err!("cardinality expects one argument");
}
@@ -103,13 +104,13 @@ pub fn cardinality_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRe
fn generic_list_cardinality<O: OffsetSizeTrait>(
array: &GenericListArray<O>,
-) -> datafusion_common::Result<ArrayRef> {
+) -> Result<ArrayRef> {
let result = array
.iter()
.map(|arr| match crate::utils::compute_array_dims(arr)? {
Some(vector) => Ok(Some(vector.iter().map(|x|
x.unwrap()).product::<u64>())),
None => Ok(None),
})
- .collect::<datafusion_common::Result<UInt64Array>>()?;
+ .collect::<Result<UInt64Array>>()?;
Ok(Arc::new(result) as ArrayRef)
}
diff --git a/datafusion/functions-array/src/concat.rs
b/datafusion/functions-array/src/concat.rs
index a8e7d1008f..cb76192e29 100644
--- a/datafusion/functions-array/src/concat.rs
+++ b/datafusion/functions-array/src/concat.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-// Includes `array append`, `array prepend`, and `array concat` functions
+//! [`ScalarUDFImpl`] definitions for `array_append`, `array_prepend` and
`array_concat` functions.
use std::{any::Any, cmp::Ordering, sync::Arc};
@@ -39,7 +39,7 @@ use crate::utils::{align_array_dimensions, check_datatypes,
make_scalar_function
make_udf_function!(
ArrayAppend,
array_append,
- array element, // arg name
+ array element, // arg name
"appends an element to the end of an array.", // doc
array_append_udf // internal function name
);
@@ -283,9 +283,9 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef])
-> Result<ArrayRef> {
.collect::<Vec<&dyn Array>>();
// Concatenated array on i-th row
- let concated_array = arrow::compute::concat(elements.as_slice())?;
- array_lengths.push(concated_array.len());
- arrays.push(concated_array);
+ let concatenated_array =
arrow::compute::concat(elements.as_slice())?;
+ array_lengths.push(concatenated_array.len());
+ arrays.push(concatenated_array);
valid.append(true);
}
}
diff --git a/datafusion/functions-array/src/empty.rs
b/datafusion/functions-array/src/empty.rs
index 37b247deb4..f11a6f07cf 100644
--- a/datafusion/functions-array/src/empty.rs
+++ b/datafusion/functions-array/src/empty.rs
@@ -22,7 +22,7 @@ use arrow_array::{ArrayRef, BooleanArray, OffsetSizeTrait};
use arrow_schema::DataType;
use arrow_schema::DataType::{Boolean, FixedSizeList, LargeList, List};
use datafusion_common::cast::{as_generic_list_array, as_null_array};
-use datafusion_common::{exec_err, plan_err};
+use datafusion_common::{exec_err, plan_err, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
use std::any::Any;
@@ -62,7 +62,7 @@ impl ScalarUDFImpl for ArrayEmpty {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => Boolean,
_ => {
@@ -71,7 +71,7 @@ impl ScalarUDFImpl for ArrayEmpty {
})
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_empty_inner)(args)
}
@@ -81,7 +81,7 @@ impl ScalarUDFImpl for ArrayEmpty {
}
/// Array_empty SQL function
-pub fn array_empty_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_empty_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 {
return exec_err!("array_empty expects one argument");
}
@@ -99,9 +99,7 @@ pub fn array_empty_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRe
}
}
-fn general_array_empty<O: OffsetSizeTrait>(
- array: &ArrayRef,
-) -> datafusion_common::Result<ArrayRef> {
+fn general_array_empty<O: OffsetSizeTrait>(array: &ArrayRef) ->
Result<ArrayRef> {
let array = as_generic_list_array::<O>(array)?;
let builder = array
.iter()
diff --git a/datafusion/functions-array/src/except.rs
b/datafusion/functions-array/src/except.rs
index 72932d530a..444c7c7587 100644
--- a/datafusion/functions-array/src/except.rs
+++ b/datafusion/functions-array/src/except.rs
@@ -17,13 +17,13 @@
//! [`ScalarUDFImpl`] definitions for array_except function.
-use crate::utils::check_datatypes;
+use crate::utils::{check_datatypes, make_scalar_function};
use arrow::row::{RowConverter, SortField};
use arrow_array::cast::AsArray;
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
use arrow_schema::{DataType, FieldRef};
-use datafusion_common::{exec_err, internal_err};
+use datafusion_common::{exec_err, internal_err, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::Expr;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
@@ -66,16 +66,15 @@ impl ScalarUDFImpl for ArrayExcept {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
match (&arg_types[0].clone(), &arg_types[1].clone()) {
(DataType::Null, _) | (_, DataType::Null) =>
Ok(arg_types[0].clone()),
(dt, _) => Ok(dt.clone()),
}
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
- array_except_inner(&args).map(ColumnarValue::Array)
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ make_scalar_function(array_except_inner)(args)
}
fn aliases(&self) -> &[String] {
@@ -84,7 +83,7 @@ impl ScalarUDFImpl for ArrayExcept {
}
/// Array_except SQL function
-pub fn array_except_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_except_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
return exec_err!("array_except needs two arguments");
}
@@ -118,7 +117,7 @@ fn general_except<OffsetSize: OffsetSizeTrait>(
l: &GenericListArray<OffsetSize>,
r: &GenericListArray<OffsetSize>,
field: &FieldRef,
-) -> datafusion_common::Result<GenericListArray<OffsetSize>> {
+) -> Result<GenericListArray<OffsetSize>> {
let converter = RowConverter::new(vec![SortField::new(l.value_type())])?;
let l_values = l.values().to_owned();
diff --git a/datafusion/functions-array/src/extract.rs
b/datafusion/functions-array/src/extract.rs
index 86eeaea3c9..0dbd106b6f 100644
--- a/datafusion/functions-array/src/extract.rs
+++ b/datafusion/functions-array/src/extract.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-// Array Element and Array Slice
+//! [`ScalarUDFImpl`] definitions for array_element, array_slice,
array_pop_front and array_pop_back functions.
use arrow::array::Array;
use arrow::array::ArrayRef;
@@ -27,15 +27,14 @@ use arrow::array::MutableArrayData;
use arrow::array::OffsetSizeTrait;
use arrow::buffer::OffsetBuffer;
use arrow::datatypes::DataType;
+use arrow_schema::DataType::{FixedSizeList, LargeList, List};
use arrow_schema::Field;
use datafusion_common::cast::as_int64_array;
use datafusion_common::cast::as_large_list_array;
use datafusion_common::cast::as_list_array;
-use datafusion_common::exec_err;
-use datafusion_common::internal_datafusion_err;
-use datafusion_common::plan_err;
-use datafusion_common::DataFusionError;
-use datafusion_common::Result;
+use datafusion_common::{
+ exec_err, internal_datafusion_err, plan_err, DataFusionError, Result,
+};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::Expr;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
@@ -110,7 +109,6 @@ impl ScalarUDFImpl for ArrayElement {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
match &arg_types[0] {
List(field)
| LargeList(field)
@@ -137,18 +135,18 @@ impl ScalarUDFImpl for ArrayElement {
///
/// For example:
/// > array_element(\[1, 2, 3], 2) -> 2
-fn array_element_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+fn array_element_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
return exec_err!("array_element needs two arguments");
}
match &args[0].data_type() {
- DataType::List(_) => {
+ List(_) => {
let array = as_list_array(&args[0])?;
let indexes = as_int64_array(&args[1])?;
general_array_element::<i32>(array, indexes)
}
- DataType::LargeList(_) => {
+ LargeList(_) => {
let array = as_large_list_array(&args[0])?;
let indexes = as_int64_array(&args[1])?;
general_array_element::<i64>(array, indexes)
@@ -163,7 +161,7 @@ fn array_element_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef>
fn general_array_element<O: OffsetSizeTrait>(
array: &GenericListArray<O>,
indexes: &Int64Array,
-) -> datafusion_common::Result<ArrayRef>
+) -> Result<ArrayRef>
where
i64: TryInto<O>,
{
@@ -175,10 +173,7 @@ where
let mut mutable =
MutableArrayData::with_capacities(vec![&original_data], true,
capacity);
- fn adjusted_array_index<O: OffsetSizeTrait>(
- index: i64,
- len: O,
- ) -> datafusion_common::Result<Option<O>>
+ fn adjusted_array_index<O: OffsetSizeTrait>(index: i64, len: O) ->
Result<Option<O>>
where
i64: TryInto<O>,
{
@@ -302,11 +297,11 @@ fn array_slice_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
let array_data_type = args[0].data_type();
match array_data_type {
- DataType::List(_) => {
+ List(_) => {
let array = as_list_array(&args[0])?;
general_array_slice::<i32>(array, from_array, to_array, stride)
}
- DataType::LargeList(_) => {
+ LargeList(_) => {
let array = as_large_list_array(&args[0])?;
let from_array = as_int64_array(&args[1])?;
let to_array = as_int64_array(&args[2])?;
@@ -545,11 +540,11 @@ impl ScalarUDFImpl for ArrayPopFront {
fn array_pop_front_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let array_data_type = args[0].data_type();
match array_data_type {
- DataType::List(_) => {
+ List(_) => {
let array = as_list_array(&args[0])?;
general_pop_front_list::<i32>(array)
}
- DataType::LargeList(_) => {
+ LargeList(_) => {
let array = as_large_list_array(&args[0])?;
general_pop_front_list::<i64>(array)
}
@@ -627,11 +622,11 @@ fn array_pop_back_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
let array_data_type = args[0].data_type();
match array_data_type {
- DataType::List(_) => {
+ List(_) => {
let array = as_list_array(&args[0])?;
general_pop_back_list::<i32>(array)
}
- DataType::LargeList(_) => {
+ LargeList(_) => {
let array = as_large_list_array(&args[0])?;
general_pop_back_list::<i64>(array)
}
diff --git a/datafusion/functions-array/src/flatten.rs
b/datafusion/functions-array/src/flatten.rs
index 27d4b1d5f9..e2b50c6c02 100644
--- a/datafusion/functions-array/src/flatten.rs
+++ b/datafusion/functions-array/src/flatten.rs
@@ -25,7 +25,7 @@ use arrow_schema::DataType::{FixedSizeList, LargeList, List,
Null};
use datafusion_common::cast::{
as_generic_list_array, as_large_list_array, as_list_array,
};
-use datafusion_common::exec_err;
+use datafusion_common::{exec_err, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
use std::any::Any;
@@ -66,8 +66,8 @@ impl ScalarUDFImpl for Flatten {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
- fn get_base_type(data_type: &DataType) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ fn get_base_type(data_type: &DataType) -> Result<DataType> {
match data_type {
List(field) | FixedSizeList(field, _)
if matches!(field.data_type(), List(_) | FixedSizeList(_,
_)) =>
@@ -89,7 +89,7 @@ impl ScalarUDFImpl for Flatten {
Ok(data_type)
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(flatten_inner)(args)
}
@@ -99,7 +99,7 @@ impl ScalarUDFImpl for Flatten {
}
/// Flatten SQL function
-pub fn flatten_inner(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef>
{
+pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 {
return exec_err!("flatten expects one argument");
}
@@ -126,7 +126,7 @@ pub fn flatten_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
fn flatten_internal<O: OffsetSizeTrait>(
list_arr: GenericListArray<O>,
indexes: Option<OffsetBuffer<O>>,
-) -> datafusion_common::Result<GenericListArray<O>> {
+) -> Result<GenericListArray<O>> {
let (field, offsets, values, _) = list_arr.clone().into_parts();
let data_type = field.data_type();
diff --git a/datafusion/functions-array/src/length.rs
b/datafusion/functions-array/src/length.rs
index e8e3611317..9bbd11950d 100644
--- a/datafusion/functions-array/src/length.rs
+++ b/datafusion/functions-array/src/length.rs
@@ -26,7 +26,7 @@ use arrow_schema::DataType::{FixedSizeList, LargeList, List,
UInt64};
use core::any::type_name;
use datafusion_common::cast::{as_generic_list_array, as_int64_array};
use datafusion_common::DataFusionError;
-use datafusion_common::{exec_err, plan_err};
+use datafusion_common::{exec_err, plan_err, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
use std::any::Any;
@@ -66,7 +66,7 @@ impl ScalarUDFImpl for ArrayLength {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64,
_ => {
@@ -75,7 +75,7 @@ impl ScalarUDFImpl for ArrayLength {
})
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_length_inner)(args)
}
@@ -85,7 +85,7 @@ impl ScalarUDFImpl for ArrayLength {
}
/// Array_length SQL function
-pub fn array_length_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 && args.len() != 2 {
return exec_err!("array_length expects one or two arguments");
}
@@ -98,9 +98,7 @@ pub fn array_length_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayR
}
/// Dispatch array length computation based on the offset type.
-fn general_array_length<O: OffsetSizeTrait>(
- array: &[ArrayRef],
-) -> datafusion_common::Result<ArrayRef> {
+fn general_array_length<O: OffsetSizeTrait>(array: &[ArrayRef]) ->
Result<ArrayRef> {
let list_array = as_generic_list_array::<O>(&array[0])?;
let dimension = if array.len() == 2 {
as_int64_array(&array[1])?.clone()
@@ -112,7 +110,7 @@ fn general_array_length<O: OffsetSizeTrait>(
.iter()
.zip(dimension.iter())
.map(|(arr, dim)| compute_array_length(arr, dim))
- .collect::<datafusion_common::Result<UInt64Array>>()?;
+ .collect::<Result<UInt64Array>>()?;
Ok(Arc::new(result) as ArrayRef)
}
@@ -121,7 +119,7 @@ fn general_array_length<O: OffsetSizeTrait>(
fn compute_array_length(
arr: Option<ArrayRef>,
dimension: Option<i64>,
-) -> datafusion_common::Result<Option<u64>> {
+) -> Result<Option<u64>> {
let mut current_dimension: i64 = 1;
let mut value = match arr {
Some(arr) => arr,
diff --git a/datafusion/functions-array/src/lib.rs
b/datafusion/functions-array/src/lib.rs
index 30a63deee0..7c261f958b 100644
--- a/datafusion/functions-array/src/lib.rs
+++ b/datafusion/functions-array/src/lib.rs
@@ -31,13 +31,13 @@ pub mod macros;
mod array_has;
mod cardinality;
mod concat;
-mod core;
mod dimension;
mod empty;
mod except;
mod extract;
mod flatten;
mod length;
+mod make_array;
mod position;
mod range;
mod remove;
@@ -66,7 +66,6 @@ pub mod expr_fn {
pub use super::concat::array_append;
pub use super::concat::array_concat;
pub use super::concat::array_prepend;
- pub use super::core::make_array;
pub use super::dimension::array_dims;
pub use super::dimension::array_ndims;
pub use super::empty::array_empty;
@@ -77,6 +76,7 @@ pub mod expr_fn {
pub use super::extract::array_slice;
pub use super::flatten::flatten;
pub use super::length::array_length;
+ pub use super::make_array::make_array;
pub use super::position::array_position;
pub use super::position::array_positions;
pub use super::range::gen_series;
@@ -116,7 +116,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) ->
Result<()> {
extract::array_pop_back_udf(),
extract::array_pop_front_udf(),
extract::array_slice_udf(),
- core::make_array_udf(),
+ make_array::make_array_udf(),
array_has::array_has_udf(),
array_has::array_has_all_udf(),
array_has::array_has_any_udf(),
diff --git a/datafusion/functions-array/src/core.rs
b/datafusion/functions-array/src/make_array.rs
similarity index 92%
rename from datafusion/functions-array/src/core.rs
rename to datafusion/functions-array/src/make_array.rs
index fdd127cc3f..8eaae09f28 100644
--- a/datafusion/functions-array/src/core.rs
+++ b/datafusion/functions-array/src/make_array.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-// core array function like `make_array`
+//! [`ScalarUDFImpl`] definitions for `make_array` function.
use std::{any::Any, sync::Arc};
@@ -24,9 +24,9 @@ use arrow_array::{
new_null_array, Array, ArrayRef, GenericListArray, NullArray,
OffsetSizeTrait,
};
use arrow_buffer::OffsetBuffer;
+use arrow_schema::DataType::{LargeList, List, Null};
use arrow_schema::{DataType, Field};
-use datafusion_common::Result;
-use datafusion_common::{plan_err, utils::array_into_list_array};
+use datafusion_common::{plan_err, utils::array_into_list_array, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::Expr;
use datafusion_expr::{
@@ -73,7 +73,7 @@ impl ScalarUDFImpl for MakeArray {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
match arg_types.len() {
0 => Ok(DataType::List(Arc::new(Field::new(
"item",
@@ -89,9 +89,7 @@ impl ScalarUDFImpl for MakeArray {
}
}
- Ok(DataType::List(Arc::new(Field::new(
- "item", expr_type, true,
- ))))
+ Ok(List(Arc::new(Field::new("item", expr_type, true))))
}
}
}
@@ -109,10 +107,10 @@ impl ScalarUDFImpl for MakeArray {
/// Constructs an array using the input `data` as `ArrayRef`.
/// Returns a reference-counted `Array` instance result.
pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
- let mut data_type = DataType::Null;
+ let mut data_type = Null;
for arg in arrays {
let arg_data_type = arg.data_type();
- if !arg_data_type.equals_datatype(&DataType::Null) {
+ if !arg_data_type.equals_datatype(&Null) {
data_type = arg_data_type.clone();
break;
}
@@ -120,12 +118,11 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) ->
Result<ArrayRef> {
match data_type {
// Either an empty array or all nulls:
- DataType::Null => {
- let array =
- new_null_array(&DataType::Null, arrays.iter().map(|a|
a.len()).sum());
+ Null => {
+ let array = new_null_array(&Null, arrays.iter().map(|a|
a.len()).sum());
Ok(Arc::new(array_into_list_array(array)))
}
- DataType::LargeList(..) => array_array::<i64>(arrays, data_type),
+ LargeList(..) => array_array::<i64>(arrays, data_type),
_ => array_array::<i32>(arrays, data_type),
}
}
diff --git a/datafusion/functions-array/src/position.rs
b/datafusion/functions-array/src/position.rs
index 627cf3cb0c..a5a7a7405a 100644
--- a/datafusion/functions-array/src/position.rs
+++ b/datafusion/functions-array/src/position.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! [`ScalarUDFImpl`] definitions for array_position function.
+//! [`ScalarUDFImpl`] definitions for array_position and array_positions
functions.
use arrow_schema::DataType::{LargeList, List, UInt64};
use arrow_schema::{DataType, Field};
@@ -32,10 +32,10 @@ use arrow_array::{
use datafusion_common::cast::{
as_generic_list_array, as_int64_array, as_large_list_array, as_list_array,
};
-use datafusion_common::{exec_err, internal_err};
+use datafusion_common::{exec_err, internal_err, Result};
use itertools::Itertools;
-use crate::utils::compare_element_to_list;
+use crate::utils::{compare_element_to_list, make_scalar_function};
make_udf_function!(
ArrayPosition,
@@ -78,16 +78,12 @@ impl ScalarUDFImpl for ArrayPosition {
&self.signature
}
- fn return_type(
- &self,
- _arg_types: &[DataType],
- ) -> datafusion_common::Result<DataType> {
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(UInt64)
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
- array_position_inner(&args).map(ColumnarValue::Array)
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ make_scalar_function(array_position_inner)(args)
}
fn aliases(&self) -> &[String] {
@@ -96,7 +92,7 @@ impl ScalarUDFImpl for ArrayPosition {
}
/// Array_position SQL function
-pub fn array_position_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_position_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() < 2 || args.len() > 3 {
return exec_err!("array_position expects two or three arguments");
}
@@ -106,9 +102,7 @@ pub fn array_position_inner(args: &[ArrayRef]) ->
datafusion_common::Result<Arra
array_type => exec_err!("array_position does not support type
'{array_type:?}'."),
}
}
-fn general_position_dispatch<O: OffsetSizeTrait>(
- args: &[ArrayRef],
-) -> datafusion_common::Result<ArrayRef> {
+fn general_position_dispatch<O: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
let list_array = as_generic_list_array::<O>(&args[0])?;
let element_array = &args[1];
@@ -146,7 +140,7 @@ fn generic_position<OffsetSize: OffsetSizeTrait>(
list_array: &GenericListArray<OffsetSize>,
element_array: &ArrayRef,
arr_from: Vec<i64>, // 0-indexed
-) -> datafusion_common::Result<ArrayRef> {
+) -> Result<ArrayRef> {
let mut data = Vec::with_capacity(list_array.len());
for (row_index, (list_array_row, &from)) in
@@ -211,16 +205,12 @@ impl ScalarUDFImpl for ArrayPositions {
&self.signature
}
- fn return_type(
- &self,
- _arg_types: &[DataType],
- ) -> datafusion_common::Result<DataType> {
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(List(Arc::new(Field::new("item", UInt64, true))))
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
- array_positions_inner(&args).map(ColumnarValue::Array)
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ make_scalar_function(array_positions_inner)(args)
}
fn aliases(&self) -> &[String] {
@@ -229,7 +219,7 @@ impl ScalarUDFImpl for ArrayPositions {
}
/// Array_positions SQL function
-pub fn array_positions_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_positions_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
return exec_err!("array_positions expects two arguments");
}
@@ -237,12 +227,12 @@ pub fn array_positions_inner(args: &[ArrayRef]) ->
datafusion_common::Result<Arr
let element = &args[1];
match &args[0].data_type() {
- DataType::List(_) => {
+ List(_) => {
let arr = as_list_array(&args[0])?;
crate::utils::check_datatypes("array_positions", &[arr.values(),
element])?;
general_positions::<i32>(arr, element)
}
- DataType::LargeList(_) => {
+ LargeList(_) => {
let arr = as_large_list_array(&args[0])?;
crate::utils::check_datatypes("array_positions", &[arr.values(),
element])?;
general_positions::<i64>(arr, element)
@@ -256,7 +246,7 @@ pub fn array_positions_inner(args: &[ArrayRef]) ->
datafusion_common::Result<Arr
fn general_positions<OffsetSize: OffsetSizeTrait>(
list_array: &GenericListArray<OffsetSize>,
element_array: &ArrayRef,
-) -> datafusion_common::Result<ArrayRef> {
+) -> Result<ArrayRef> {
let mut data = Vec::with_capacity(list_array.len());
for (row_index, list_array_row) in list_array.iter().enumerate() {
diff --git a/datafusion/functions-array/src/range.rs
b/datafusion/functions-array/src/range.rs
index 176a5617d5..1c9e0c878e 100644
--- a/datafusion/functions-array/src/range.rs
+++ b/datafusion/functions-array/src/range.rs
@@ -25,6 +25,7 @@ use std::any::Any;
use crate::utils::make_scalar_function;
use arrow_array::types::{Date32Type, IntervalMonthDayNanoType};
use arrow_array::Date32Array;
+use arrow_schema::DataType::{Date32, Int64, Interval, List};
use arrow_schema::IntervalUnit::MonthDayNano;
use datafusion_common::cast::{as_date32_array, as_int64_array,
as_interval_mdn_array};
use datafusion_common::{exec_err, not_impl_datafusion_err, Result};
@@ -49,7 +50,6 @@ pub(super) struct Range {
}
impl Range {
pub fn new() -> Self {
- use DataType::*;
Self {
signature: Signature::one_of(
vec![
@@ -77,7 +77,6 @@ impl ScalarUDFImpl for Range {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
@@ -87,12 +86,8 @@ impl ScalarUDFImpl for Range {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
match args[0].data_type() {
- DataType::Int64 => {
- make_scalar_function(|args| gen_range_inner(args, false))(args)
- }
- DataType::Date32 => {
- make_scalar_function(|args| gen_range_date(args, false))(args)
- }
+ Int64 => make_scalar_function(|args| gen_range_inner(args,
false))(args),
+ Date32 => make_scalar_function(|args| gen_range_date(args,
false))(args),
_ => {
exec_err!("unsupported type for range")
}
@@ -118,7 +113,6 @@ pub(super) struct GenSeries {
}
impl GenSeries {
pub fn new() -> Self {
- use DataType::*;
Self {
signature: Signature::one_of(
vec![
@@ -146,7 +140,6 @@ impl ScalarUDFImpl for GenSeries {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
@@ -156,12 +149,8 @@ impl ScalarUDFImpl for GenSeries {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
match args[0].data_type() {
- DataType::Int64 => {
- make_scalar_function(|args| gen_range_inner(args, true))(args)
- }
- DataType::Date32 => {
- make_scalar_function(|args| gen_range_date(args, true))(args)
- }
+ Int64 => make_scalar_function(|args| gen_range_inner(args,
true))(args),
+ Date32 => make_scalar_function(|args| gen_range_date(args,
true))(args),
_ => {
exec_err!("unsupported type for range")
}
@@ -242,7 +231,7 @@ pub(super) fn gen_range_inner(
};
}
let arr = Arc::new(ListArray::try_new(
- Arc::new(Field::new("item", DataType::Int64, true)),
+ Arc::new(Field::new("item", Int64, true)),
OffsetBuffer::new(offsets.into()),
Arc::new(Int64Array::from(values)),
Some(NullBuffer::new(valid.finish())),
@@ -330,7 +319,7 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool)
-> Result<ArrayRef> {
}
let arr = Arc::new(ListArray::try_new(
- Arc::new(Field::new("item", DataType::Date32, true)),
+ Arc::new(Field::new("item", Date32, true)),
OffsetBuffer::new(offsets.into()),
Arc::new(Date32Array::from(values)),
None,
diff --git a/datafusion/functions-array/src/remove.rs
b/datafusion/functions-array/src/remove.rs
index 91c76a6708..21e3730810 100644
--- a/datafusion/functions-array/src/remove.rs
+++ b/datafusion/functions-array/src/remove.rs
@@ -18,6 +18,7 @@
//! [`ScalarUDFImpl`] definitions for array_remove, array_remove_n,
array_remove_all functions.
use crate::utils;
+use crate::utils::make_scalar_function;
use arrow_array::cast::AsArray;
use arrow_array::{
new_empty_array, Array, ArrayRef, BooleanArray, GenericListArray,
OffsetSizeTrait,
@@ -25,7 +26,7 @@ use arrow_array::{
use arrow_buffer::OffsetBuffer;
use arrow_schema::{DataType, Field};
use datafusion_common::cast::as_int64_array;
-use datafusion_common::exec_err;
+use datafusion_common::{exec_err, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
use std::any::Any;
@@ -58,6 +59,7 @@ impl ScalarUDFImpl for ArrayRemove {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_remove"
}
@@ -66,13 +68,12 @@ impl ScalarUDFImpl for ArrayRemove {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(arg_types[0].clone())
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
- array_remove_inner(&args).map(ColumnarValue::Array)
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ make_scalar_function(array_remove_inner)(args)
}
fn aliases(&self) -> &[String] {
@@ -107,6 +108,7 @@ impl ScalarUDFImpl for ArrayRemoveN {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_remove_n"
}
@@ -115,13 +117,12 @@ impl ScalarUDFImpl for ArrayRemoveN {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(arg_types[0].clone())
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
- array_remove_n_inner(&args).map(ColumnarValue::Array)
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ make_scalar_function(array_remove_n_inner)(args)
}
fn aliases(&self) -> &[String] {
@@ -159,6 +160,7 @@ impl ScalarUDFImpl for ArrayRemoveAll {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_remove_all"
}
@@ -167,13 +169,12 @@ impl ScalarUDFImpl for ArrayRemoveAll {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(arg_types[0].clone())
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
- array_remove_all_inner(&args).map(ColumnarValue::Array)
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ make_scalar_function(array_remove_all_inner)(args)
}
fn aliases(&self) -> &[String] {
@@ -182,7 +183,7 @@ impl ScalarUDFImpl for ArrayRemoveAll {
}
/// Array_remove SQL function
-pub fn array_remove_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_remove_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
return exec_err!("array_remove expects two arguments");
}
@@ -192,7 +193,7 @@ pub fn array_remove_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayR
}
/// Array_remove_n SQL function
-pub fn array_remove_n_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_remove_n_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 3 {
return exec_err!("array_remove_n expects three arguments");
}
@@ -202,7 +203,7 @@ pub fn array_remove_n_inner(args: &[ArrayRef]) ->
datafusion_common::Result<Arra
}
/// Array_remove_all SQL function
-pub fn array_remove_all_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_remove_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
return exec_err!("array_remove_all expects two arguments");
}
@@ -215,7 +216,7 @@ fn array_remove_internal(
array: &ArrayRef,
element_array: &ArrayRef,
arr_n: Vec<i64>,
-) -> datafusion_common::Result<ArrayRef> {
+) -> Result<ArrayRef> {
match array.data_type() {
DataType::List(_) => {
let list_array = array.as_list::<i32>();
@@ -252,7 +253,7 @@ fn general_remove<OffsetSize: OffsetSizeTrait>(
list_array: &GenericListArray<OffsetSize>,
element_array: &ArrayRef,
arr_n: Vec<i64>,
-) -> datafusion_common::Result<ArrayRef> {
+) -> Result<ArrayRef> {
let data_type = list_array.value_type();
let mut new_values = vec![];
// Build up the offsets for the final output array
diff --git a/datafusion/functions-array/src/repeat.rs
b/datafusion/functions-array/src/repeat.rs
index bf967f6572..89b766bdcd 100644
--- a/datafusion/functions-array/src/repeat.rs
+++ b/datafusion/functions-array/src/repeat.rs
@@ -28,7 +28,7 @@ use arrow_buffer::OffsetBuffer;
use arrow_schema::DataType::{LargeList, List};
use arrow_schema::{DataType, Field};
use datafusion_common::cast::{as_int64_array, as_large_list_array,
as_list_array};
-use datafusion_common::exec_err;
+use datafusion_common::{exec_err, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
use std::any::Any;
@@ -60,6 +60,7 @@ impl ScalarUDFImpl for ArrayRepeat {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_repeat"
}
@@ -68,7 +69,7 @@ impl ScalarUDFImpl for ArrayRepeat {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(List(Arc::new(Field::new(
"item",
arg_types[0].clone(),
@@ -76,7 +77,7 @@ impl ScalarUDFImpl for ArrayRepeat {
))))
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_repeat_inner)(args)
}
@@ -86,7 +87,7 @@ impl ScalarUDFImpl for ArrayRepeat {
}
/// Array_repeat SQL function
-pub fn array_repeat_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_repeat_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
return exec_err!("array_repeat expects two arguments");
}
@@ -122,7 +123,7 @@ pub fn array_repeat_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayR
fn general_repeat<O: OffsetSizeTrait>(
array: &ArrayRef,
count_array: &Int64Array,
-) -> datafusion_common::Result<ArrayRef> {
+) -> Result<ArrayRef> {
let data_type = array.data_type();
let mut new_values = vec![];
@@ -176,7 +177,7 @@ fn general_repeat<O: OffsetSizeTrait>(
fn general_list_repeat<O: OffsetSizeTrait>(
list_array: &GenericListArray<O>,
count_array: &Int64Array,
-) -> datafusion_common::Result<ArrayRef> {
+) -> Result<ArrayRef> {
let data_type = list_array.data_type();
let value_type = list_array.value_type();
let mut new_values = vec![];
diff --git a/datafusion/functions-array/src/replace.rs
b/datafusion/functions-array/src/replace.rs
index 8ff65d3154..c32305bb45 100644
--- a/datafusion/functions-array/src/replace.rs
+++ b/datafusion/functions-array/src/replace.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! [`ScalarUDFImpl`] definitions for array functions.
+//! [`ScalarUDFImpl`] definitions for array_replace, array_replace_n and
array_replace_all functions.
use arrow::array::{
Array, ArrayRef, AsArray, Capacities, MutableArrayData, OffsetSizeTrait,
@@ -76,6 +76,7 @@ impl ScalarUDFImpl for ArrayReplace {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_replace"
}
@@ -84,11 +85,11 @@ impl ScalarUDFImpl for ArrayReplace {
&self.signature
}
- fn return_type(&self, args: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, args: &[DataType]) -> Result<DataType> {
Ok(args[0].clone())
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_replace_inner)(args)
}
@@ -119,6 +120,7 @@ impl ScalarUDFImpl for ArrayReplaceN {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_replace_n"
}
@@ -127,11 +129,11 @@ impl ScalarUDFImpl for ArrayReplaceN {
&self.signature
}
- fn return_type(&self, args: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, args: &[DataType]) -> Result<DataType> {
Ok(args[0].clone())
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_replace_n_inner)(args)
}
@@ -162,6 +164,7 @@ impl ScalarUDFImpl for ArrayReplaceAll {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_replace_all"
}
@@ -170,11 +173,11 @@ impl ScalarUDFImpl for ArrayReplaceAll {
&self.signature
}
- fn return_type(&self, args: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, args: &[DataType]) -> Result<DataType> {
Ok(args[0].clone())
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_replace_all_inner)(args)
}
@@ -183,7 +186,7 @@ impl ScalarUDFImpl for ArrayReplaceAll {
}
}
-/// For each element of `list_array[i]`, replaces up to `arr_n[i]` occurences
+/// For each element of `list_array[i]`, replaces up to `arr_n[i]` occurrences
/// of `from_array[i]`, `to_array[i]`.
///
/// The type of each **element** in `list_array` must be the same as the type
of
@@ -299,7 +302,7 @@ pub(crate) fn array_replace_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
return exec_err!("array_replace expects three arguments");
}
- // replace at most one occurence for each element
+ // replace at most one occurrence for each element
let arr_n = vec![1; args[0].len()];
let array = &args[0];
match array.data_type() {
@@ -320,7 +323,7 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
return exec_err!("array_replace_n expects four arguments");
}
- // replace the specified number of occurences
+ // replace the specified number of occurrences
let arr_n = as_int64_array(&args[3])?.values().to_vec();
let array = &args[0];
match array.data_type() {
diff --git a/datafusion/functions-array/src/resize.rs
b/datafusion/functions-array/src/resize.rs
index f3996110f9..c5855d0544 100644
--- a/datafusion/functions-array/src/resize.rs
+++ b/datafusion/functions-array/src/resize.rs
@@ -24,7 +24,7 @@ use arrow_buffer::{ArrowNativeType, OffsetBuffer};
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
use arrow_schema::{DataType, FieldRef};
use datafusion_common::cast::{as_int64_array, as_large_list_array,
as_list_array};
-use datafusion_common::{exec_err, internal_datafusion_err, ScalarValue};
+use datafusion_common::{exec_err, internal_datafusion_err, Result,
ScalarValue};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
use std::any::Any;
@@ -57,6 +57,7 @@ impl ScalarUDFImpl for ArrayResize {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_resize"
}
@@ -65,7 +66,7 @@ impl ScalarUDFImpl for ArrayResize {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
match &arg_types[0] {
List(field) | FixedSizeList(field, _) => Ok(List(field.clone())),
LargeList(field) => Ok(LargeList(field.clone())),
@@ -75,7 +76,7 @@ impl ScalarUDFImpl for ArrayResize {
}
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_resize_inner)(args)
}
@@ -85,7 +86,7 @@ impl ScalarUDFImpl for ArrayResize {
}
/// array_resize SQL function
-pub fn array_resize_inner(arg: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
if arg.len() < 2 || arg.len() > 3 {
return exec_err!("array_resize needs two or three arguments");
}
@@ -98,11 +99,11 @@ pub fn array_resize_inner(arg: &[ArrayRef]) ->
datafusion_common::Result<ArrayRe
};
match &arg[0].data_type() {
- DataType::List(field) => {
+ List(field) => {
let array = as_list_array(&arg[0])?;
general_list_resize::<i32>(array, new_len, field, new_element)
}
- DataType::LargeList(field) => {
+ LargeList(field) => {
let array = as_large_list_array(&arg[0])?;
general_list_resize::<i64>(array, new_len, field, new_element)
}
@@ -116,7 +117,7 @@ fn general_list_resize<O: OffsetSizeTrait>(
count_array: &Int64Array,
field: &FieldRef,
default_element: Option<ArrayRef>,
-) -> datafusion_common::Result<ArrayRef>
+) -> Result<ArrayRef>
where
O: TryInto<i64>,
{
diff --git a/datafusion/functions-array/src/reverse.rs
b/datafusion/functions-array/src/reverse.rs
index 7eb9e53dee..8324c407bd 100644
--- a/datafusion/functions-array/src/reverse.rs
+++ b/datafusion/functions-array/src/reverse.rs
@@ -21,9 +21,10 @@ use crate::utils::make_scalar_function;
use arrow::array::{Capacities, MutableArrayData};
use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
+use arrow_schema::DataType::{LargeList, List, Null};
use arrow_schema::{DataType, FieldRef};
use datafusion_common::cast::{as_large_list_array, as_list_array};
-use datafusion_common::exec_err;
+use datafusion_common::{exec_err, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
use std::any::Any;
@@ -56,6 +57,7 @@ impl ScalarUDFImpl for ArrayReverse {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_reverse"
}
@@ -64,11 +66,11 @@ impl ScalarUDFImpl for ArrayReverse {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(arg_types[0].clone())
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_reverse_inner)(args)
}
@@ -78,21 +80,21 @@ impl ScalarUDFImpl for ArrayReverse {
}
/// array_reverse SQL function
-pub fn array_reverse_inner(arg: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
if arg.len() != 1 {
return exec_err!("array_reverse needs one argument");
}
match &arg[0].data_type() {
- DataType::List(field) => {
+ List(field) => {
let array = as_list_array(&arg[0])?;
general_array_reverse::<i32>(array, field)
}
- DataType::LargeList(field) => {
+ LargeList(field) => {
let array = as_large_list_array(&arg[0])?;
general_array_reverse::<i64>(array, field)
}
- DataType::Null => Ok(arg[0].clone()),
+ Null => Ok(arg[0].clone()),
array_type => exec_err!("array_reverse does not support type
'{array_type:?}'."),
}
}
@@ -100,7 +102,7 @@ pub fn array_reverse_inner(arg: &[ArrayRef]) ->
datafusion_common::Result<ArrayR
fn general_array_reverse<O: OffsetSizeTrait>(
array: &GenericListArray<O>,
field: &FieldRef,
-) -> datafusion_common::Result<ArrayRef>
+) -> Result<ArrayRef>
where
O: TryFrom<i64>,
{
diff --git a/datafusion/functions-array/src/rewrite.rs
b/datafusion/functions-array/src/rewrite.rs
index 6a91e90782..d231dce4cb 100644
--- a/datafusion/functions-array/src/rewrite.rs
+++ b/datafusion/functions-array/src/rewrite.rs
@@ -23,6 +23,7 @@ use crate::extract::{array_element, array_slice};
use datafusion_common::config::ConfigOptions;
use datafusion_common::tree_node::Transformed;
use datafusion_common::utils::list_ndims;
+use datafusion_common::Result;
use datafusion_common::{Column, DFSchema};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::expr_rewriter::FunctionRewrite;
@@ -42,7 +43,7 @@ impl FunctionRewrite for ArrayFunctionRewriter {
expr: Expr,
schema: &DFSchema,
_config: &ConfigOptions,
- ) -> datafusion_common::Result<Transformed<Expr>> {
+ ) -> Result<Transformed<Expr>> {
let transformed = match expr {
// array1 @> array2 -> array_has_all(array1, array2)
Expr::BinaryExpr(BinaryExpr { left, op, right })
diff --git a/datafusion/functions-array/src/set_ops.rs
b/datafusion/functions-array/src/set_ops.rs
index df5bc91a26..5f3087fafd 100644
--- a/datafusion/functions-array/src/set_ops.rs
+++ b/datafusion/functions-array/src/set_ops.rs
@@ -15,15 +15,16 @@
// specific language governing permissions and limitations
// under the License.
-//! Array Intersection, Union, and Distinct functions
+//! [`ScalarUDFImpl`] definitions for array_union, array_intersect and
array_distinct functions.
-use crate::core::make_array_inner;
+use crate::make_array::make_array_inner;
use crate::utils::make_scalar_function;
use arrow::array::{new_empty_array, Array, ArrayRef, GenericListArray,
OffsetSizeTrait};
use arrow::buffer::OffsetBuffer;
use arrow::compute;
use arrow::datatypes::{DataType, Field, FieldRef};
use arrow::row::{RowConverter, SortField};
+use arrow_schema::DataType::{FixedSizeList, LargeList, List, Null};
use datafusion_common::cast::{as_large_list_array, as_list_array};
use datafusion_common::{exec_err, internal_err, Result};
use datafusion_expr::expr::ScalarFunction;
@@ -48,7 +49,7 @@ make_udf_function!(
ArrayIntersect,
array_intersect,
first_array second_array,
- "Returns an array of the elements in the intersection of array1 and
array2.",
+ "returns an array of the elements in the intersection of array1 and
array2.",
array_intersect_udf
);
@@ -56,7 +57,7 @@ make_udf_function!(
ArrayDistinct,
array_distinct,
array,
- "return distinct values from the array after removing duplicates.",
+ "returns distinct values from the array after removing duplicates.",
array_distinct_udf
);
@@ -79,6 +80,7 @@ impl ScalarUDFImpl for ArrayUnion {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_union"
}
@@ -89,8 +91,8 @@ impl ScalarUDFImpl for ArrayUnion {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
match (&arg_types[0], &arg_types[1]) {
- (&DataType::Null, dt) => Ok(dt.clone()),
- (dt, DataType::Null) => Ok(dt.clone()),
+ (&Null, dt) => Ok(dt.clone()),
+ (dt, Null) => Ok(dt.clone()),
(dt, _) => Ok(dt.clone()),
}
}
@@ -126,6 +128,7 @@ impl ScalarUDFImpl for ArrayIntersect {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_intersect"
}
@@ -136,12 +139,8 @@ impl ScalarUDFImpl for ArrayIntersect {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
match (arg_types[0].clone(), arg_types[1].clone()) {
- (DataType::Null, DataType::Null) | (DataType::Null, _) =>
Ok(DataType::Null),
- (_, DataType::Null) => Ok(DataType::List(Arc::new(Field::new(
- "item",
- DataType::Null,
- true,
- )))),
+ (Null, Null) | (Null, _) => Ok(Null),
+ (_, Null) => Ok(List(Arc::new(Field::new("item", Null, true)))),
(dt, _) => Ok(dt),
}
}
@@ -174,6 +173,7 @@ impl ScalarUDFImpl for ArrayDistinct {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_distinct"
}
@@ -183,7 +183,6 @@ impl ScalarUDFImpl for ArrayDistinct {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
match &arg_types[0] {
List(field) | FixedSizeList(field, _) =>
Ok(List(Arc::new(Field::new(
"item",
@@ -218,17 +217,17 @@ fn array_distinct_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
// handle null
- if args[0].data_type() == &DataType::Null {
+ if args[0].data_type() == &Null {
return Ok(args[0].clone());
}
// handle for list & largelist
match args[0].data_type() {
- DataType::List(field) => {
+ List(field) => {
let array = as_list_array(&args[0])?;
general_array_distinct(array, field)
}
- DataType::LargeList(field) => {
+ LargeList(field) => {
let array = as_large_list_array(&args[0])?;
general_array_distinct(array, field)
}
@@ -257,10 +256,10 @@ fn generic_set_lists<OffsetSize: OffsetSizeTrait>(
field: Arc<Field>,
set_op: SetOp,
) -> Result<ArrayRef> {
- if matches!(l.value_type(), DataType::Null) {
+ if matches!(l.value_type(), Null) {
let field = Arc::new(Field::new("item", r.value_type(), true));
return general_array_distinct::<OffsetSize>(r, &field);
- } else if matches!(r.value_type(), DataType::Null) {
+ } else if matches!(r.value_type(), Null) {
let field = Arc::new(Field::new("item", l.value_type(), true));
return general_array_distinct::<OffsetSize>(l, &field);
}
@@ -331,43 +330,43 @@ fn general_set_op(
set_op: SetOp,
) -> Result<ArrayRef> {
match (array1.data_type(), array2.data_type()) {
- (DataType::Null, DataType::List(field)) => {
+ (Null, List(field)) => {
if set_op == SetOp::Intersect {
- return Ok(new_empty_array(&DataType::Null));
+ return Ok(new_empty_array(&Null));
}
let array = as_list_array(&array2)?;
general_array_distinct::<i32>(array, field)
}
- (DataType::List(field), DataType::Null) => {
+ (List(field), Null) => {
if set_op == SetOp::Intersect {
return make_array_inner(&[]);
}
let array = as_list_array(&array1)?;
general_array_distinct::<i32>(array, field)
}
- (DataType::Null, DataType::LargeList(field)) => {
+ (Null, LargeList(field)) => {
if set_op == SetOp::Intersect {
- return Ok(new_empty_array(&DataType::Null));
+ return Ok(new_empty_array(&Null));
}
let array = as_large_list_array(&array2)?;
general_array_distinct::<i64>(array, field)
}
- (DataType::LargeList(field), DataType::Null) => {
+ (LargeList(field), Null) => {
if set_op == SetOp::Intersect {
return make_array_inner(&[]);
}
let array = as_large_list_array(&array1)?;
general_array_distinct::<i64>(array, field)
}
- (DataType::Null, DataType::Null) =>
Ok(new_empty_array(&DataType::Null)),
+ (Null, Null) => Ok(new_empty_array(&Null)),
- (DataType::List(field), DataType::List(_)) => {
+ (List(field), List(_)) => {
let array1 = as_list_array(&array1)?;
let array2 = as_list_array(&array2)?;
generic_set_lists::<i32>(array1, array2, field.clone(), set_op)
}
- (DataType::LargeList(field), DataType::LargeList(_)) => {
+ (LargeList(field), LargeList(_)) => {
let array1 = as_large_list_array(&array1)?;
let array2 = as_large_list_array(&array2)?;
generic_set_lists::<i64>(array1, array2, field.clone(), set_op)
diff --git a/datafusion/functions-array/src/sort.rs
b/datafusion/functions-array/src/sort.rs
index 2f3fa33e68..af78712065 100644
--- a/datafusion/functions-array/src/sort.rs
+++ b/datafusion/functions-array/src/sort.rs
@@ -24,7 +24,7 @@ use arrow_buffer::{BooleanBufferBuilder, NullBuffer,
OffsetBuffer};
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
use arrow_schema::{DataType, Field, SortOptions};
use datafusion_common::cast::{as_list_array, as_string_array};
-use datafusion_common::exec_err;
+use datafusion_common::{exec_err, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature,
Volatility};
use std::any::Any;
@@ -57,6 +57,7 @@ impl ScalarUDFImpl for ArraySort {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_sort"
}
@@ -65,7 +66,7 @@ impl ScalarUDFImpl for ArraySort {
&self.signature
}
- fn return_type(&self, arg_types: &[DataType]) ->
datafusion_common::Result<DataType> {
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
match &arg_types[0] {
List(field) | FixedSizeList(field, _) =>
Ok(List(Arc::new(Field::new(
"item",
@@ -83,7 +84,7 @@ impl ScalarUDFImpl for ArraySort {
}
}
- fn invoke(&self, args: &[ColumnarValue]) ->
datafusion_common::Result<ColumnarValue> {
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
make_scalar_function(array_sort_inner)(args)
}
@@ -93,7 +94,7 @@ impl ScalarUDFImpl for ArraySort {
}
/// Array_sort SQL function
-pub fn array_sort_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef> {
+pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.is_empty() || args.len() > 3 {
return exec_err!("array_sort expects one to three arguments");
}
@@ -157,7 +158,7 @@ pub fn array_sort_inner(args: &[ArrayRef]) ->
datafusion_common::Result<ArrayRef
Ok(Arc::new(list_arr))
}
-fn order_desc(modifier: &str) -> datafusion_common::Result<bool> {
+fn order_desc(modifier: &str) -> Result<bool> {
match modifier.to_uppercase().as_str() {
"DESC" => Ok(true),
"ASC" => Ok(false),
@@ -165,7 +166,7 @@ fn order_desc(modifier: &str) ->
datafusion_common::Result<bool> {
}
}
-fn order_nulls_first(modifier: &str) -> datafusion_common::Result<bool> {
+fn order_nulls_first(modifier: &str) -> Result<bool> {
match modifier.to_uppercase().as_str() {
"NULLS FIRST" => Ok(true),
"NULLS LAST" => Ok(false),
diff --git a/datafusion/functions-array/src/string.rs
b/datafusion/functions-array/src/string.rs
index 3140866f5f..3805903500 100644
--- a/datafusion/functions-array/src/string.rs
+++ b/datafusion/functions-array/src/string.rs
@@ -32,7 +32,7 @@ use datafusion_common::{plan_err, DataFusionError, Result};
use std::any::{type_name, Any};
use crate::utils::{downcast_arg, make_scalar_function};
-use arrow_schema::DataType::{LargeUtf8, Utf8};
+use arrow_schema::DataType::{FixedSizeList, LargeList, LargeUtf8, List, Null,
Utf8};
use datafusion_common::cast::{
as_generic_string_array, as_large_list_array, as_list_array,
as_string_array,
};
@@ -133,6 +133,7 @@ impl ScalarUDFImpl for ArrayToString {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"array_to_string"
}
@@ -142,7 +143,6 @@ impl ScalarUDFImpl for ArrayToString {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => Utf8,
_ => {
@@ -195,6 +195,7 @@ impl ScalarUDFImpl for StringToArray {
fn as_any(&self) -> &dyn Any {
self
}
+
fn name(&self) -> &str {
"string_to_array"
}
@@ -204,7 +205,6 @@ impl ScalarUDFImpl for StringToArray {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
Ok(match arg_types[0] {
Utf8 | LargeUtf8 => {
List(Arc::new(Field::new("item", arg_types[0].clone(), true)))
@@ -258,7 +258,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
with_null_string: bool,
) -> Result<&mut String> {
match arr.data_type() {
- DataType::List(..) => {
+ List(..) => {
let list_array = as_list_array(&arr)?;
for i in 0..list_array.len() {
compute_array_to_string(
@@ -272,7 +272,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(arg)
}
- DataType::LargeList(..) => {
+ LargeList(..) => {
let list_array = as_large_list_array(&arr)?;
for i in 0..list_array.len() {
compute_array_to_string(
@@ -286,7 +286,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
Ok(arg)
}
- DataType::Null => Ok(arg),
+ Null => Ok(arg),
data_type => {
macro_rules! array_function {
($ARRAY_TYPE:ident) => {
@@ -339,7 +339,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
let arr_type = arr.data_type();
let string_arr = match arr_type {
- DataType::List(_) | DataType::FixedSizeList(_, _) => {
+ List(_) | FixedSizeList(_, _) => {
let list_array = as_list_array(&arr)?;
generate_string_array::<i32>(
list_array,
@@ -348,7 +348,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) ->
Result<ArrayRef> {
with_null_string,
)?
}
- DataType::LargeList(_) => {
+ LargeList(_) => {
let list_array = as_large_list_array(&arr)?;
generate_string_array::<i64>(
list_array,