This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b728232b91 feat: support `FixedSizeList` Type Coercion (#9108)
b728232b91 is described below
commit b728232b91862826061dfa878cefe90d25576f78
Author: Alex Huang <[email protected]>
AuthorDate: Mon Feb 26 21:10:13 2024 +0800
feat: support `FixedSizeList` Type Coercion (#9108)
* support FixedSizeList Type Coercion
* add allow null type coercion parameter
* support null column in FixedSizeList
* Add test
* Add tests for cardinality with fixed size lists
* chore
* fix ci
* add comment
* Fix array_element function signature
* Remove unused imports and simplify code
* Fix array function signatures and behavior
* fix conflict
* fix conflict
* add tests for FixedSizeList
* remove unreacheable null check
* simplify the code
* remove null checking
* reformat output
* simplify code
* add tests for array_dims
* Refactor type coercion functions in datafusion/expr module
---
datafusion/expr/src/built_in_function.rs | 23 +-
datafusion/expr/src/signature.rs | 17 +-
datafusion/expr/src/type_coercion/functions.rs | 109 ++---
datafusion/sqllogictest/test_files/array.slt | 562 ++++++++++++++++++++++++-
4 files changed, 628 insertions(+), 83 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index f92ae87d6e..8b4e65121c 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -31,7 +31,7 @@ use crate::{
};
use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
+use datafusion_common::{exec_err, plan_err, DataFusionError, Result};
use strum::IntoEnumIterator;
use strum_macros::EnumIter;
@@ -543,10 +543,11 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Flatten => {
fn get_base_type(data_type: &DataType) -> Result<DataType> {
match data_type {
- DataType::List(field) if matches!(field.data_type(),
DataType::List(_)) => get_base_type(field.data_type()),
+ DataType::List(field) | DataType::FixedSizeList(field,
_) if matches!(field.data_type(), DataType::List(_)|DataType::FixedSizeList(_,_
)) => get_base_type(field.data_type()),
DataType::LargeList(field) if
matches!(field.data_type(), DataType::LargeList(_)) =>
get_base_type(field.data_type()),
DataType::Null | DataType::List(_) |
DataType::LargeList(_) => Ok(data_type.to_owned()),
- _ => internal_err!("Not reachable, data_type should be
List or LargeList"),
+ DataType::FixedSizeList(field,_ ) =>
Ok(DataType::List(field.clone())),
+ _ => exec_err!("Not reachable, data_type should be
List, LargeList or FixedSizeList"),
}
}
@@ -929,18 +930,18 @@ impl BuiltinScalarFunction {
// 0 or more arguments of arbitrary type
Signature::one_of(vec![VariadicEqual, Any(0)],
self.volatility())
}
- BuiltinScalarFunction::ArrayPopFront => Signature::any(1,
self.volatility()),
- BuiltinScalarFunction::ArrayPopBack => Signature::any(1,
self.volatility()),
+ BuiltinScalarFunction::ArrayPopFront =>
Signature::array(self.volatility()),
+ BuiltinScalarFunction::ArrayPopBack =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayConcat => {
Signature::variadic_any(self.volatility())
}
- BuiltinScalarFunction::ArrayDims => Signature::any(1,
self.volatility()),
- BuiltinScalarFunction::ArrayEmpty => Signature::any(1,
self.volatility()),
+ BuiltinScalarFunction::ArrayDims =>
Signature::array(self.volatility()),
+ BuiltinScalarFunction::ArrayEmpty =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayElement => {
Signature::array_and_index(self.volatility())
}
BuiltinScalarFunction::ArrayExcept => Signature::any(2,
self.volatility()),
- BuiltinScalarFunction::Flatten => Signature::any(1,
self.volatility()),
+ BuiltinScalarFunction::Flatten =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayHasAll |
BuiltinScalarFunction::ArrayHasAny => {
Signature::any(2, self.volatility())
}
@@ -950,8 +951,8 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayLength => {
Signature::variadic_any(self.volatility())
}
- BuiltinScalarFunction::ArrayNdims => Signature::any(1,
self.volatility()),
- BuiltinScalarFunction::ArrayDistinct => Signature::any(1,
self.volatility()),
+ BuiltinScalarFunction::ArrayNdims =>
Signature::array(self.volatility()),
+ BuiltinScalarFunction::ArrayDistinct =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayPosition => {
Signature::array_and_element_and_optional_index(self.volatility())
}
@@ -981,7 +982,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayIntersect => Signature::any(2,
self.volatility()),
BuiltinScalarFunction::ArrayUnion => Signature::any(2,
self.volatility()),
- BuiltinScalarFunction::Cardinality => Signature::any(1,
self.volatility()),
+ BuiltinScalarFunction::Cardinality =>
Signature::array(self.volatility()),
BuiltinScalarFunction::ArrayResize => {
Signature::variadic_any(self.volatility())
}
diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
index e8d9d8fb39..663ecf7b1b 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr/src/signature.rs
@@ -123,7 +123,7 @@ pub enum TypeSignature {
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum ArrayFunctionSignature {
/// Specialized Signature for ArrayAppend and similar functions
- /// The first argument should be List/LargeList, and the second argument
should be non-list or list.
+ /// The first argument should be List/LargeList/FixedSizedList, and the
second argument should be non-list or list.
/// The second argument's list dimension should be one dimension less than
the first argument's list dimension.
/// List dimension of the List/LargeList is equivalent to the number of
List.
/// List dimension of the non-list is 0.
@@ -133,9 +133,14 @@ pub enum ArrayFunctionSignature {
/// The first argument's list dimension should be one dimension less than
the second argument's list dimension.
ElementAndArray,
/// Specialized Signature for Array functions of the form (List/LargeList,
Index)
+ /// The first argument should be List/LargeList/FixedSizedList, and the
second argument should be Int64.
ArrayAndIndex,
/// Specialized Signature for Array functions of the form (List/LargeList,
Element, Optional Index)
ArrayAndElementAndOptionalIndex,
+ /// Specialized Signature for ArrayEmpty and similar functions
+ /// The function takes a single argument that must be a
List/LargeList/FixedSizeList
+ /// or something that can be coerced to one of those types.
+ Array,
}
impl std::fmt::Display for ArrayFunctionSignature {
@@ -153,6 +158,9 @@ impl std::fmt::Display for ArrayFunctionSignature {
ArrayFunctionSignature::ArrayAndIndex => {
write!(f, "array, index")
}
+ ArrayFunctionSignature::Array => {
+ write!(f, "array")
+ }
}
}
}
@@ -325,6 +333,13 @@ impl Signature {
volatility,
}
}
+ /// Specialized Signature for ArrayEmpty and similar functions
+ pub fn array(volatility: Volatility) -> Self {
+ Signature {
+ type_signature:
TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
+ volatility,
+ }
+ }
}
/// Monotonicity of the `ScalarFunctionExpr` with respect to its arguments.
diff --git a/datafusion/expr/src/type_coercion/functions.rs
b/datafusion/expr/src/type_coercion/functions.rs
index 9cab04bc76..2022d67879 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -80,6 +80,36 @@ fn get_valid_types(
signature: &TypeSignature,
current_types: &[DataType],
) -> Result<Vec<Vec<DataType>>> {
+ fn array_element_and_optional_index(
+ current_types: &[DataType],
+ ) -> Result<Vec<Vec<DataType>>> {
+ // make sure there's 2 or 3 arguments
+ if !(current_types.len() == 2 || current_types.len() == 3) {
+ return Ok(vec![vec![]]);
+ }
+
+ let first_two_types = ¤t_types[0..2];
+ let mut valid_types =
array_append_or_prepend_valid_types(first_two_types, true)?;
+
+ // Early return if there are only 2 arguments
+ if current_types.len() == 2 {
+ return Ok(valid_types);
+ }
+
+ let valid_types_with_index = valid_types
+ .iter()
+ .map(|t| {
+ let mut t = t.clone();
+ t.push(DataType::Int64);
+ t
+ })
+ .collect::<Vec<_>>();
+
+ valid_types.extend(valid_types_with_index);
+
+ Ok(valid_types)
+ }
+
fn array_append_or_prepend_valid_types(
current_types: &[DataType],
is_append: bool,
@@ -111,71 +141,37 @@ fn get_valid_types(
)
})?;
- let array_type =
datafusion_common::utils::coerced_type_with_base_type_only(
+ let new_array_type =
datafusion_common::utils::coerced_type_with_base_type_only(
array_type,
&new_base_type,
);
- match array_type {
+ match new_array_type {
DataType::List(ref field)
| DataType::LargeList(ref field)
| DataType::FixedSizeList(ref field, _) => {
- let elem_type = field.data_type();
+ let new_elem_type = field.data_type();
if is_append {
- Ok(vec![vec![array_type.clone(), elem_type.clone()]])
+ Ok(vec![vec![new_array_type.clone(),
new_elem_type.clone()]])
} else {
- Ok(vec![vec![elem_type.to_owned(), array_type.clone()]])
+ Ok(vec![vec![new_elem_type.to_owned(),
new_array_type.clone()]])
}
}
_ => Ok(vec![vec![]]),
}
}
- fn array_element_and_optional_index(
- current_types: &[DataType],
- ) -> Result<Vec<Vec<DataType>>> {
- // make sure there's 2 or 3 arguments
- if !(current_types.len() == 2 || current_types.len() == 3) {
- return Ok(vec![vec![]]);
- }
-
- let first_two_types = ¤t_types[0..2];
- let mut valid_types =
array_append_or_prepend_valid_types(first_two_types, true)?;
-
- // Early return if there are only 2 arguments
- if current_types.len() == 2 {
- return Ok(valid_types);
- }
-
- let valid_types_with_index = valid_types
- .iter()
- .map(|t| {
- let mut t = t.clone();
- t.push(DataType::Int64);
- t
- })
- .collect::<Vec<_>>();
-
- valid_types.extend(valid_types_with_index);
-
- Ok(valid_types)
- }
- fn array_and_index(current_types: &[DataType]) ->
Result<Vec<Vec<DataType>>> {
- if current_types.len() != 2 {
- return Ok(vec![vec![]]);
- }
-
- let array_type = ¤t_types[0];
-
+ fn array(array_type: &DataType) -> Option<DataType> {
match array_type {
DataType::List(_)
| DataType::LargeList(_)
| DataType::FixedSizeList(_, _) => {
let array_type = coerced_fixed_size_list_to_list(array_type);
- Ok(vec![vec![array_type, DataType::Int64]])
+ Some(array_type)
}
- _ => Ok(vec![vec![]]),
+ _ => None,
}
}
+
let valid_types = match signature {
TypeSignature::Variadic(valid_types) => valid_types
.iter()
@@ -211,19 +207,32 @@ fn get_valid_types(
TypeSignature::ArraySignature(ref function_signature) => match
function_signature
{
ArrayFunctionSignature::ArrayAndElement => {
- return array_append_or_prepend_valid_types(current_types, true)
+ array_append_or_prepend_valid_types(current_types, true)?
}
- ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
- return array_element_and_optional_index(current_types)
+ ArrayFunctionSignature::ElementAndArray => {
+ array_append_or_prepend_valid_types(current_types, false)?
}
ArrayFunctionSignature::ArrayAndIndex => {
- return array_and_index(current_types)
+ if current_types.len() != 2 {
+ return Ok(vec![vec![]]);
+ }
+ array(¤t_types[0]).map_or_else(
+ || vec![vec![]],
+ |array_type| vec![vec![array_type, DataType::Int64]],
+ )
}
- ArrayFunctionSignature::ElementAndArray => {
- return array_append_or_prepend_valid_types(current_types,
false)
+ ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
+ array_element_and_optional_index(current_types)?
}
- },
+ ArrayFunctionSignature::Array => {
+ if current_types.len() != 1 {
+ return Ok(vec![vec![]]);
+ }
+ array(¤t_types[0])
+ .map_or_else(|| vec![vec![]], |array_type|
vec![vec![array_type]])
+ }
+ },
TypeSignature::Any(number) => {
if current_types.len() != *number {
return plan_err!(
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index 7f263d9048..da02a80a10 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -123,6 +123,13 @@ AS VALUES
(make_array(NULL, 10, 11, 12))
;
+statement ok
+CREATE TABLE large_arrayspop
+AS SELECT
+ arrow_cast(column1, 'LargeList(Int64)') AS column1
+FROM arrayspop
+;
+
statement ok
CREATE TABLE nested_arrays
AS VALUES
@@ -172,6 +179,15 @@ AS SELECT
FROM arrays_values
;
+statement ok
+CREATE TABLE fixed_arrays_values
+AS SELECT
+ arrow_cast(column1, 'FixedSizeList(10, Int64)') AS column1,
+ column2,
+ column3,
+ column4
+FROM arrays_values
+;
statement ok
CREATE TABLE arrays_values_v2
@@ -212,6 +228,22 @@ AS
FROM flatten_table
;
+statement ok
+CREATE TABLE fixed_size_flatten_table
+AS VALUES
+ (arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'),
+ arrow_cast(make_array([[1, 2, 3]], [[4, 5]], [[6]]), 'FixedSizeList(3,
List(List(Int64)))'),
+ arrow_cast(make_array([[[1]]], [[[2, 3]]]), 'FixedSizeList(2,
List(List(List(Int64))))'),
+ arrow_cast(make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4]),
'FixedSizeList(3, List(Float64))')
+ ),
+ (
+ arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3,
List(Int64))'),
+ arrow_cast(make_array([[8]], [[9, 10]], [[11, 12, 13]]), 'FixedSizeList(3,
List(List(Int64)))'),
+ arrow_cast(make_array([[[1,2]]], [[[3]]]), 'FixedSizeList(2,
List(List(List(Int64))))'),
+ arrow_cast(make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0]),
'FixedSizeList(3, List(Float64))')
+ )
+;
+
statement ok
CREATE TABLE array_has_table_1D
AS VALUES
@@ -346,10 +378,31 @@ AS VALUES
statement ok
CREATE TABLE array_distinct_table_1D_large
+AS SELECT
+ arrow_cast(column1, 'LargeList(Int64)') AS column1
+FROM array_distinct_table_1D
+;
+
+statement ok
+CREATE TABLE array_distinct_table_1D_fixed
+AS SELECT
+ arrow_cast(column1, 'FixedSizeList(5, Int64)') AS column1
+FROM array_distinct_table_1D
+;
+
+statement ok
+CREATE TABLE array_distinct_table_1D_UTF8_fixed
+AS SELECT
+ arrow_cast(column1, 'FixedSizeList(5, Utf8)') AS column1
+FROM array_distinct_table_1D_UTF8
+;
+
+statement ok
+CREATE TABLE array_distinct_table_2D_fixed
AS VALUES
- (arrow_cast(make_array(1, 1, 2, 2, 3), 'LargeList(Int64)')),
- (arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')),
- (arrow_cast(make_array(3, 5, 3, 3, 3), 'LargeList(Int64)'))
+ (arrow_cast(make_array([1,2], [1,2], [3,4], [3,4], [5,6]), 'FixedSizeList(5,
List(Int64))')),
+ (arrow_cast(make_array([1,2], [3,4], [5,6], [7,8], [9,10]),
'FixedSizeList(5, List(Int64))')),
+ (arrow_cast(make_array([5,6], [5,6], NULL, NULL, NULL), 'FixedSizeList(5,
List(Int64))'))
;
statement ok
@@ -1103,7 +1156,7 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4,
5), 'LargeList(Int64)'),
NULL NULL
query IT
-select array_element(arrow_cast(make_array(1, 2, 3, 4, 5),
'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l',
'l', 'o'), 'LargeList(Utf8)'), 11);
+select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5,
Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'),
'FixedSizeList(5, Utf8)'), 11);
----
NULL NULL
@@ -1303,6 +1356,14 @@ NULL 43
## array_pop_back (aliases: `list_pop_back`)
+# array_pop_back scalar function with null
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+# follow clickhouse and duckdb
+#query ?
+#select array_pop_back(null);
+#----
+#NULL
+
# array_pop_back scalar function #1
query ??
select array_pop_back(make_array(1, 2, 3, 4, 5)),
array_pop_back(make_array('h', 'e', 'l', 'l', 'o'));
@@ -1314,6 +1375,11 @@ select array_pop_back(arrow_cast(make_array(1, 2, 3, 4,
5), 'LargeList(Int64)'))
----
[1, 2, 3, 4] [h, e, l, l]
+query ??
+select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5,
Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'),
'FixedSizeList(5, Utf8)'));
+----
+[1, 2, 3, 4] [h, e, l, l]
+
# array_pop_back scalar function #2 (after array_pop_back, array is empty)
query ?
select array_pop_back(make_array(1));
@@ -1325,6 +1391,11 @@ select array_pop_back(arrow_cast(make_array(1),
'LargeList(Int64)'));
----
[]
+query ?
+select array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+----
+[]
+
# array_pop_back scalar function #3 (array_pop_back the empty array)
query ?
select array_pop_back(array_pop_back(make_array(1)));
@@ -1336,12 +1407,27 @@ select
array_pop_back(array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)
----
[]
+query ?
+select array_pop_back(array_pop_back(arrow_cast(make_array(1),
'FixedSizeList(1, Int64)')));
+----
+[]
+
# array_pop_back scalar function #4 (array_pop_back the arrays which have NULL)
query ??
select array_pop_back(make_array(1, 2, 3, 4, NULL)),
array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o'));
----
[1, 2, 3, 4] [, e, l, ]
+query ??
+select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL),
'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l',
NULL, 'o'), 'LargeList(Utf8)'));
+----
+[1, 2, 3, 4] [, e, l, ]
+
+query ??
+select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL),
'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e',
'l', NULL, 'o'), 'FixedSizeList(5, Utf8)'));
+----
+[1, 2, 3, 4] [, e, l, ]
+
# array_pop_back scalar function #5 (array_pop_back the nested arrays)
query ?
select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5,
6)));
@@ -1353,6 +1439,11 @@ select
array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9
----
[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2,
9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4),
make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
# array_pop_back scalar function #6 (array_pop_back the nested arrays with
NULL)
query ?
select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL));
@@ -1364,6 +1455,11 @@ select
array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9
----
[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2,
9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL),
'FixedSizeList(6, List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
# array_pop_back scalar function #7 (array_pop_back the nested arrays with
NULL)
query ?
select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), NULL, make_array(1, 7, 4)));
@@ -1375,6 +1471,11 @@ select
array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9
----
[[1, 2, 3], [2, 9, 1], [7, 8, 9], ]
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2,
9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5,
List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], ]
+
# array_pop_back scalar function #8 (after array_pop_back, nested array is
empty)
query ?
select array_pop_back(make_array(make_array(1, 2, 3)));
@@ -1386,6 +1487,11 @@ select
array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(Lis
----
[]
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)),
'FixedSizeList(1, List(Int64))'));
+----
+[]
+
# array_pop_back with columns
query ?
select array_pop_back(column1) from arrayspop;
@@ -1407,8 +1513,36 @@ select array_pop_back(arrow_cast(column1,
'LargeList(Int64)')) from arrayspop;
[]
[, 10, 11]
+query ?
+select array_pop_back(column1) from large_arrayspop;
+----
+[1, 2]
+[3, 4, 5]
+[6, 7, 8, ]
+[, ]
+[]
+[, 10, 11]
+
+query ?
+select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from
large_arrayspop;
+----
+[1, 2]
+[3, 4, 5]
+[6, 7, 8, ]
+[, ]
+[]
+[, 10, 11]
+
## array_pop_front (aliases: `list_pop_front`)
+#TODO:https://github.com/apache/arrow-datafusion/issues/7142
+# array_pop_front scalar function with null
+# follow clickhouse and duckdb
+#query ?
+#select array_pop_front(null);
+#----
+#NULL
+
# array_pop_front scalar function #1
query ??
select array_pop_front(make_array(1, 2, 3, 4, 5)),
array_pop_front(make_array('h', 'e', 'l', 'l', 'o'));
@@ -1420,6 +1554,11 @@ select array_pop_front(arrow_cast(make_array(1, 2, 3, 4,
5), 'LargeList(Int64)')
----
[2, 3, 4, 5] [e, l, l, o]
+query ??
+select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5,
Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'),
'FixedSizeList(5, Utf8)'));
+----
+[2, 3, 4, 5] [e, l, l, o]
+
# array_pop_front scalar function #2 (after array_pop_front, array is empty)
query ?
select array_pop_front(make_array(1));
@@ -1431,6 +1570,11 @@ select array_pop_front(arrow_cast(make_array(1),
'LargeList(Int64)'));
----
[]
+query ?
+select array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+----
+[]
+
# array_pop_front scalar function #3 (array_pop_front the empty array)
query ?
select array_pop_front(array_pop_front(make_array(1)));
@@ -1442,6 +1586,11 @@ select
array_pop_front(array_pop_front(arrow_cast(make_array(1), 'LargeList(Int6
----
[]
+query ?
+select array_pop_front(array_pop_front(arrow_cast(make_array(1),
'FixedSizeList(1, Int64)')));
+----
+[]
+
# array_pop_front scalar function #5 (array_pop_front the nested arrays)
query ?
select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5,
6)));
@@ -1453,6 +1602,11 @@ select
array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2,
----
[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+query ?
+select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3),
make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7,
4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'));
+----
+[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+
# array_pop_front scalar function #6 (array_pop_front the nested arrays with
NULL)
query ?
select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9,
1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)));
@@ -1464,6 +1618,11 @@ select array_pop_front(arrow_cast(make_array(NULL,
make_array(1, 2, 3), make_arr
----
[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+query ?
+select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3),
make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7,
4)), 'FixedSizeList(6, List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
# array_pop_front scalar function #8 (after array_pop_front, nested array is
empty)
query ?
select array_pop_front(make_array(make_array(1, 2, 3)));
@@ -1475,6 +1634,11 @@ select
array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(Li
----
[]
+query ?
+select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)),
'FixedSizeList(1, List(Int64))'));
+----
+[]
+
## array_slice (aliases: list_slice)
# array_slice scalar function #1 (with positive indexes)
@@ -1874,6 +2038,14 @@ select
----
[4] [] [1, , 3, 4] [, , 1]
+query ??
+select
+ array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'),
4),
+ array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'),
1)
+;
+----
+[1, , 3, 4] [, , 1]
+
# test invalid (non-null)
query error
select array_append(1, 2);
@@ -1898,6 +2070,13 @@ select
----
[[1, , 3], []] [[1, , 3], ]
+query ??
+select
+ array_append(arrow_cast(make_array(make_array(1, null, 3)),
'FixedSizeList(1, List(Int64))'), [null]),
+ array_append(arrow_cast(make_array(make_array(1, null, 3)),
'FixedSizeList(1, List(Int64))'), null);
+----
+[[1, , 3], []] [[1, , 3], ]
+
# array_append scalar function #3
query ???
select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0,
3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
@@ -1905,7 +2084,12 @@ select array_append(make_array(1, 2, 3), 4),
array_append(make_array(1.0, 2.0, 3
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
query ???
-select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4),
array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0),
array_append(make_array('h', 'e', 'l', 'l'), 'o');
+select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4),
array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0),
array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'LargeList(Utf8)'),
'o');
+----
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+
+query ???
+select array_append(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3,
Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0),
'FixedSizeList(3, Float64)'), 4.0), array_append(arrow_cast(make_array('h',
'e', 'l', 'l'), 'FixedSizeList(4, Utf8)'), 'o');
----
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
@@ -1920,6 +2104,11 @@ select array_append(arrow_cast(make_array([1], [2],
[3]), 'LargeList(LargeList(I
----
[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]]
+query ???
+select array_append(arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3,
List(Int64))'), [4]), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]),
'FixedSizeList(3, List(Float64))'), [4.0]),
array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']),
'FixedSizeList(4, List(Utf8))'), ['o']);
+----
+[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]]
+
# list_append scalar function #5 (function alias `array_append`)
query ???
select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0,
3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o');
@@ -1978,6 +2167,18 @@ select array_append(column1, column2) from
large_arrays_values;
[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55]
[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66]
+query ?
+select array_append(column1, column2) from fixed_arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1]
+[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12]
+[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23]
+[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34]
+[, , , , , , , , , , 44]
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ]
+[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55]
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66]
+
# array_append with columns #2 (element is list)
query ?
select array_append(column1, column2) from nested_arrays;
@@ -1991,6 +2192,12 @@ select array_append(column1, column2) from
large_nested_arrays;
[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]]
[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10,
11, 12]]
+query ?
+select array_append(column1, column2) from fixed_size_nested_arrays;
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]]
+[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10,
11, 12]]
+
# array_append with columns and scalars #1
query ??
select array_append(column2, 100.1), array_append(column3, '.') from arrays;
@@ -2014,6 +2221,17 @@ select array_append(column2, 100.1),
array_append(column3, '.') from large_array
[100.1] [,, .]
[16.6, 17.7, 18.8, 100.1] [.]
+query ??
+select array_append(column2, 100.1), array_append(column3, '.') from
fixed_size_arrays;
+----
+[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .]
+[, 5.5, 6.6, 100.1] [i, p, , u, m, .]
+[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .]
+[10.1, , 12.2, 100.1] [s, i, t, a, b, .]
+[13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .]
+[, , , 100.1] [,, a, b, c, d, .]
+[16.6, 17.7, 18.8, 100.1] [, , , , , .]
+
# array_append with columns and scalars #2
query ??
select array_append(column1, make_array(1, 11, 111)),
array_append(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), column2)
from nested_arrays;
@@ -2027,6 +2245,12 @@ select array_append(column1, arrow_cast(make_array(1,
11, 111), 'LargeList(Int64
[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11,
111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]]
[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1,
11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]]
+query ??
+select array_append(column1, arrow_cast(make_array(1, 11, 111),
'FixedSizeList(3, Int64)')), array_append(arrow_cast(make_array(make_array(1,
2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))'), column2) from
fixed_size_nested_arrays;
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11,
111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]]
+[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1,
11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]]
+
## array_prepend (aliases: `list_prepend`, `array_push_front`,
`list_push_front`)
# array_prepend with NULLs
@@ -2093,6 +2317,11 @@ select array_prepend(1, arrow_cast(make_array(2, 3, 4),
'LargeList(Int64)')), ar
----
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+query ???
+select array_prepend(1, arrow_cast([2, 3, 4], 'FixedSizeList(3, Int64)')),
array_prepend(1.0, arrow_cast([2.0, 3.0, 4.0], 'FixedSizeList(3, Float64)')),
array_prepend('h', arrow_cast(['e', 'l', 'l', 'o'], 'FixedSizeList(4, Utf8)'));
+----
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+
# array_prepend scalar function #4 (element is list)
query ???
select array_prepend(make_array(1), make_array(make_array(2), make_array(3),
make_array(4))), array_prepend(make_array(1.0), make_array([2.0], [3.0],
[4.0])), array_prepend(make_array('h'), make_array(['e'], ['l'], ['l'], ['o']));
@@ -2106,6 +2335,13 @@ select array_prepend(arrow_cast(make_array(1),
'LargeList(Int64)'), arrow_cast(m
----
[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]]
+query ???
+select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'),
arrow_cast([[1], [2], [3]], 'FixedSizeList(3, List(Int64))')),
+ array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'),
arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, List(Float64))')),
+ array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'),
arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, List(Utf8))'));
+----
+[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]]
+
# list_prepend scalar function #5 (function alias `array_prepend`)
query ???
select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0,
3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o'));
@@ -2164,6 +2400,18 @@ select array_prepend(column2, column1) from
large_arrays_values;
[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60]
[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+query ?
+select array_prepend(column2, column1) from fixed_arrays_values;
+----
+[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10]
+[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20]
+[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30]
+[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40]
+[44, , , , , , , , , , ]
+[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
+[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60]
+[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+
# array_prepend with columns #2 (element is list)
query ?
select array_prepend(column2, column1) from nested_arrays;
@@ -2177,6 +2425,12 @@ select array_prepend(column2, column1) from
large_nested_arrays;
[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12],
[1, 8, 7]]
+query ?
+select array_prepend(column2, column1) from fixed_size_nested_arrays;
+----
+[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12],
[1, 8, 7]]
+
# array_prepend with columns and scalars #1
query ??
select array_prepend(100.1, column2), array_prepend('.', column3) from arrays;
@@ -2200,6 +2454,17 @@ select array_prepend(100.1, column2), array_prepend('.',
column3) from large_arr
[100.1] [., ,]
[100.1, 16.6, 17.7, 18.8] [.]
+query ??
+select array_prepend(100.1, column2), array_prepend('.', column3) from
fixed_size_arrays;
+----
+[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m]
+[100.1, , 5.5, 6.6] [., i, p, , u, m]
+[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r]
+[100.1, 10.1, , 12.2] [., s, i, t, a, b]
+[100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x]
+[100.1, , , ] [., ,, a, b, c, d]
+[100.1, 16.6, 17.7, 18.8] [., , , , , ]
+
# array_prepend with columns and scalars #2 (element is list)
query ??
select array_prepend(make_array(1, 11, 111), column1), array_prepend(column2,
make_array(make_array(1, 2, 3), make_array(11, 12, 13))) from nested_arrays;
@@ -2213,6 +2478,12 @@ select array_prepend(arrow_cast(make_array(1, 11, 111),
'LargeList(Int64)'), col
[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5,
6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]]
[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12],
[1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]]
+query ??
+select array_prepend(arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3,
Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1,
2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))')) from
fixed_size_nested_arrays;
+----
+[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5,
6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]]
+[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12],
[1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]]
+
## array_repeat (aliases: `list_repeat`)
# array_repeat scalar function #1
@@ -2723,12 +2994,18 @@ NULL 1 NULL
## array_positions (aliases: `list_positions`)
-# array_position with NULL (follow PostgreSQL)
query ?
select array_positions([1, 2, 3, 4, 5], null);
----
[]
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+# array_positions with NULL (follow PostgreSQL)
+#query ?
+#select array_positions(null, 1);
+#----
+#NULL
+
# array_positions scalar function #1
query ???
select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2,
3, 4, 5], 5), array_positions([1, 1, 1], 1);
@@ -3748,6 +4025,11 @@ select cardinality(arrow_cast(make_array(1, 2, 3, 4, 5),
'LargeList(Int64)')), c
----
5 3 5
+query III
+select cardinality(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)')),
cardinality(arrow_cast([1, 3, 5], 'FixedSizeList(3, Int64)')),
cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5,
Utf8)'));
+----
+5 3 5
+
# cardinality scalar function #2
query II
select cardinality(make_array([1, 2], [3, 4], [5, 6])),
cardinality(array_repeat(array_repeat(array_repeat(3, 3), 2), 3));
@@ -3759,6 +4041,11 @@ select cardinality(arrow_cast(make_array([1, 2], [3, 4],
[5, 6]), 'LargeList(Lis
----
6
+query I
+select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3,
List(Int64))'));
+----
+6
+
# cardinality scalar function #3
query II
select cardinality(make_array()), cardinality(make_array(make_array()))
@@ -3770,6 +4057,13 @@ select cardinality(arrow_cast(make_array(),
'LargeList(Null)')), cardinality(arr
----
NULL 0
+#TODO
+#https://github.com/apache/arrow-datafusion/issues/9158
+#query II
+#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')),
cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1,
List(Null))'))
+#----
+#NULL 0
+
# cardinality with columns
query III
select cardinality(column1), cardinality(column2), cardinality(column3) from
arrays;
@@ -3793,6 +4087,17 @@ NULL 3 4
4 NULL 1
4 3 NULL
+query III
+select cardinality(column1), cardinality(column2), cardinality(column3) from
fixed_size_arrays;
+----
+4 3 5
+4 3 5
+4 3 5
+4 3 5
+NULL 3 5
+4 NULL 5
+4 3 NULL
+
## array_remove (aliases: `list_remove`)
# array_remove scalar function #1
@@ -3801,6 +4106,13 @@ select array_remove(make_array(1, 2, 2, 1, 1), 2),
array_remove(make_array(1.0,
----
[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o]
+query ???
+select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'),
2),
+ array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0),
'LargeList(Float64)'), 1.0),
+ array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'),
'LargeList(Utf8)'), 'l');
+----
+[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o]
+
query ???
select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5,
Int64)'), 2),
array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0),
'FixedSizeList(5, Float64)'), 1.0),
@@ -3816,6 +4128,14 @@ select
----
[1, , 3] [, 2.2, 3.3] [, bc]
+query ???
+select
+ array_remove(arrow_cast(make_array(1, null, 2, 3), 'LargeList(Int64)'), 2),
+ array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3),
'LargeList(Float64)'), 1.1),
+ array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'),
'a');
+----
+[1, , 3] [, 2.2, 3.3] [, bc]
+
query ???
select
array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4,
Int64)'), 2),
@@ -3824,6 +4144,14 @@ select
----
[1, , 3] [, 2.2, 3.3] [, bc]
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+# follow PostgreSQL behavior
+#query ?
+#select
+# array_remove(NULL, 1)
+#----
+#NULL
+
query ??
select
array_remove(make_array(1, null, 2), null),
@@ -3831,12 +4159,32 @@ select
----
[1, 2] [1, 2, ]
+query ??
+select
+ array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null),
+ array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'),
null);
+----
+[1, 2] [1, 2, ]
+
+query ??
+select
+ array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'),
null),
+ array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4,
Int64)'), null);
+----
+[1, 2] [1, 2, ]
+
# array_remove scalar function #2 (element is list)
query ??
select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7,
8, 9]), [4, 5, 6]), array_remove(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4],
[5, 3, 1], [1, 3, 2]), [2, 3, 4]);
----
[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1],
[1, 3, 2]]
+query ??
+select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4,
5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]),
+ array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5,
3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]);
+----
+[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1],
[1, 3, 2]]
+
query ??
select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4,
5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]),
array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5,
3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]);
@@ -3864,6 +4212,14 @@ select array_remove(column1, column2) from
arrays_with_repeating_elements;
[7, 7, 8, 7, 9, 7, 8, 7, 7]
[11, 12, 10, 11, 12, 10, 11, 12, 10]
+query ?
+select array_remove(column1, column2) from
large_arrays_with_repeating_elements;
+----
+[1, 1, 3, 2, 2, 1, 3, 2, 3]
+[4, 5, 5, 6, 5, 5, 5, 4, 4]
+[7, 7, 8, 7, 9, 7, 8, 7, 7]
+[11, 12, 10, 11, 12, 10, 11, 12, 10]
+
query ?
select array_remove(column1, column2) from
fixed_arrays_with_repeating_elements;
----
@@ -3881,6 +4237,14 @@ select array_remove(column1, column2) from
nested_arrays_with_repeating_elements
[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19,
20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]]
[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28,
29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]]
+query ?
+select array_remove(column1, column2) from
large_nested_arrays_with_repeating_elements;
+----
+[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9],
[4, 5, 6], [7, 8, 9]]
+[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13,
14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]]
+[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19,
20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]]
+[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28,
29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]]
+
query ?
select array_remove(column1, column2) from
fixed_size_nested_arrays_with_repeating_elements;
----
@@ -3898,6 +4262,14 @@ select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7,
7, 10, 7, 8), column2), a
[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7]
[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10]
+query ??
+select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2),
array_remove(column1, 1) from large_arrays_with_repeating_elements;
+----
+[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3]
+[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4]
+[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7]
+[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10]
+
query ??
select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2),
array_remove(column1, 1) from fixed_arrays_with_repeating_elements;
----
@@ -3916,6 +4288,15 @@ select array_remove(make_array([1, 2, 3], [4, 5, 6], [4,
5, 6], [10, 11, 12], [1
[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12],
[10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20,
21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19,
20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]]
[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12],
[10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29,
30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28,
29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]]
+query ??
+select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12],
[13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29,
30], [19, 20, 21], [22, 23, 24]), column2),
+ array_remove(column1, make_array(1, 2, 3)) from
large_nested_arrays_with_repeating_elements;
+----
+[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12],
[19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5,
6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5,
6], [7, 8, 9]]
+[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12],
[19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11,
12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13,
14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]]
+[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12],
[10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20,
21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19,
20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]]
+[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12],
[10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29,
30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28,
29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]]
+
query ??
select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12],
[13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29,
30], [19, 20, 21], [22, 23, 24]), column2),
array_remove(column1, make_array(1, 2, 3)) from
fixed_size_nested_arrays_with_repeating_elements;
@@ -3983,7 +4364,13 @@ select array_remove_n(make_array([1, 2, 3], [4, 5, 6],
[4, 5, 6], [10, 11, 12],
## array_remove_all (aliases: `list_removes`)
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
# array_remove_all with NULL elements
+#query ?
+#select array_remove_all(NULL, 1);
+#----
+#NULL
+
query ?
select array_remove_all(make_array(1, 2, 2, 1, 1), NULL);
----
@@ -4217,7 +4604,7 @@ NULL 10
## array_dims (aliases: `list_dims`)
# array dims error
-query error Execution error: array_dims does not support type 'Int64'
+query error DataFusion error: Error during planning: No function matches the
given name and argument types 'array_dims\(Int64\)'. You might need to add
explicit type casts.\n\tCandidate functions:\n\tarray_dims\(array\)
select array_dims(1);
# array_dims scalar function
@@ -4231,12 +4618,27 @@ select array_dims(arrow_cast(make_array(1, 2, 3),
'LargeList(Int64)')), array_di
----
[3] [2, 2] [1, 1, 1, 2, 1]
+query ???
+select array_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')),
array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2,
List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]),
'FixedSizeList(1, List(List(List(List(Int64)))))'));
+----
+[3] [2, 2] [1, 1, 1, 2, 1]
+
# array_dims scalar function #2
query ??
select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)),
array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2));
----
[1, 2, 3] [2, 5, 4]
+query ??
+select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2),
1), 'LargeList(List(List(Int64)))')),
array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2),
'LargeList(List(List(Int64)))'));
+----
+[1, 2, 3] [2, 5, 4]
+
+query ??
+select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2),
1), 'FixedSizeList(1, List(List(Int64)))')),
array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2),
'FixedSizeList(2, List(List(Int64)))'));
+----
+[1, 2, 3] [2, 5, 4]
+
# array_dims scalar function #3
query ??
select array_dims(make_array()), array_dims(make_array(make_array()))
@@ -4259,6 +4661,11 @@ select list_dims(arrow_cast(make_array(1, 2, 3),
'LargeList(Int64)')), list_dims
----
[3] [2, 2] [1, 1, 1, 2, 1]
+query ???
+select list_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')),
list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2,
List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]),
'FixedSizeList(1, List(List(List(List(Int64)))))'));
+----
+[3] [2, 2] [1, 1, 1, 2, 1]
+
# array_dims with columns
query ???
select array_dims(column1), array_dims(column2), array_dims(column3) from
arrays;
@@ -4282,14 +4689,22 @@ NULL [3] [4]
[2, 2] NULL [1]
[2, 2] [3] NULL
+query ???
+select array_dims(column1), array_dims(column2), array_dims(column3) from
fixed_size_arrays;
+----
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+NULL [3] [5]
+[2, 2] NULL [5]
+[2, 2] [3] NULL
+
## array_ndims (aliases: `list_ndims`)
# array_ndims scalar function #1
-query error
-select array_ndims(1);
-
#follow PostgreSQL
query error
select
@@ -4306,6 +4721,7 @@ CREATE TABLE array_ndims_table
AS VALUES
([1], [1, 2, 3], [[7]], [[[[[10]]]]]),
([2], [4, 5], [[8]], [[[[[10]]]]]),
+ (NUll, [6, 7], [[9]], [[[[[10]]]]]),
([3], [6], [[9]], [[[[[10]]]]])
;
@@ -4318,6 +4734,15 @@ AS SELECT
arrow_cast(column4, 'LargeList(List(List(List(List(Int64)))))') as column4
FROM array_ndims_table;
+statement ok
+CREATE TABLE fixed_array_ndims_table
+AS VALUES
+ (arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([1, 2, 3],
'FixedSizeList(3, Int64)'), arrow_cast([[7]], 'FixedSizeList(1, List(Int64))'),
arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')),
+ (arrow_cast([2], 'FixedSizeList(1, Int64)'), arrow_cast([4, 5, 6],
'FixedSizeList(3, Int64)'), arrow_cast([[8]], 'FixedSizeList(1, List(Int64))'),
arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')),
+ (null, arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]],
'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1,
List(List(List(List(Int64)))))')),
+ (arrow_cast([3], 'FixedSizeList(1, Int64)'), arrow_cast([6, 7, 8],
'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'),
arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))'))
+;
+
query IIII
select
array_ndims(column1),
@@ -4328,6 +4753,7 @@ from array_ndims_table;
----
1 1 2 5
1 1 2 5
+NULL 1 2 5
1 1 2 5
query IIII
@@ -4340,8 +4766,24 @@ from large_array_ndims_table;
----
1 1 2 5
1 1 2 5
+NULL 1 2 5
+1 1 2 5
+
+query IIII
+select
+ array_ndims(column1),
+ array_ndims(column2),
+ array_ndims(column3),
+ array_ndims(column4)
+from fixed_array_ndims_table;
+----
+1 1 2 5
+1 1 2 5
+NULL 1 2 5
1 1 2 5
+
+
statement ok
drop table array_ndims_table;
@@ -4794,10 +5236,11 @@ true false true false false false true true false false
true false true
## array_distinct
-query ?
-select array_distinct(null);
-----
-NULL
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+#query ?
+#select array_distinct(null);
+#----
+#NULL
query ?
select array_distinct([]);
@@ -4841,6 +5284,30 @@ from array_distinct_table_1D_large;
[1, 2, 3, 4, 5]
[3, 5]
+query ?
+select array_distinct(column1)
+from array_distinct_table_1D_fixed;
+----
+[1, 2, 3]
+[1, 2, 3, 4, 5]
+[3, 5]
+
+query ?
+select array_distinct(column1)
+from array_distinct_table_1D_UTF8_fixed;
+----
+[a, bc, def]
+[a, bc, def, defg]
+[defg]
+
+query ?
+select array_distinct(column1)
+from array_distinct_table_2D_fixed;
+----
+[[1, 2], [3, 4], [5, 6]]
+[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
+[, [5, 6]]
+
query ???
select array_intersect(column1, column2),
array_intersect(column3, column4),
@@ -5382,11 +5849,13 @@ select array_concat(column1, [7]) from arrays_values_v2;
[7]
# flatten
+
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
# follow DuckDB
-query ?
-select flatten(NULL);
-----
-NULL
+#query ?
+#select flatten(NULL);
+#----
+#NULL
# flatten with scalar values #1
query ???
@@ -5403,6 +5872,13 @@ select flatten(arrow_cast(make_array(1, 2, 1, 3, 2),
'LargeList(Int64)')),
----
[1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4]
+query ???
+select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5,
Int64)')),
+ flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null,
5)), 'FixedSizeList(4, List(Int64))')),
+ flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]),
'FixedSizeList(2, List(List(Float64)))'));
+----
+[1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4]
+
# flatten with column values
query ????
select flatten(column1),
@@ -5424,6 +5900,16 @@ from large_flatten_table;
[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
[1, 2, 3, 4, 5, 6] [8] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+query ????
+select flatten(column1),
+ flatten(column2),
+ flatten(column3),
+ flatten(column4)
+from fixed_size_flatten_table;
+----
+[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
+[1, 2, 3, 4, 5, 6] [8, 9, 10, 11, 12, 13] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0,
6.0]
+
## empty
# empty scalar function #1
query B
@@ -5436,6 +5922,11 @@ select empty(arrow_cast(make_array(1),
'LargeList(Int64)'));
----
false
+query B
+select empty(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+----
+false
+
# empty scalar function #2
query B
select empty(make_array());
@@ -5447,6 +5938,12 @@ select empty(arrow_cast(make_array(),
'LargeList(Null)'));
----
true
+#TODO: https://github.com/apache/arrow-datafusion/issues/9158
+#query B
+#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)'));
+#----
+#true
+
# empty scalar function #3
query B
select empty(make_array(NULL));
@@ -5458,11 +5955,17 @@ select empty(arrow_cast(make_array(NULL),
'LargeList(Null)'));
----
false
-# empty scalar function #4
query B
-select empty(NULL);
+select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Null)'));
----
-NULL
+false
+
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+# empty scalar function #4
+#query B
+#select empty(NULL);
+#----
+#NULL
# empty scalar function #5
query B
@@ -5487,6 +5990,17 @@ NULL
false
false
+query B
+select empty(column1) from fixed_size_arrays;
+----
+false
+false
+false
+false
+NULL
+false
+false
+
query ?
SELECT string_to_array('abcxxxdef', 'xxx')
----
@@ -5686,6 +6200,9 @@ drop table fixed_slices;
statement ok
drop table arrayspop;
+statement ok
+drop table large_arrayspop;
+
statement ok
drop table arrays_values;
@@ -5806,6 +6323,9 @@ drop table flatten_table;
statement ok
drop table large_flatten_table;
+statement ok
+drop table fixed_size_flatten_table;
+
statement ok
drop table arrays_values_without_nulls;