This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 3ffeb52c1c feat: support `array_pop_back` function (#7348)
3ffeb52c1c is described below
commit 3ffeb52c1c9891e63fcb17db41283d7299af6f18
Author: Ruixiang Tan <[email protected]>
AuthorDate: Thu Aug 24 00:35:42 2023 +0800
feat: support `array_pop_back` function (#7348)
---
datafusion/expr/src/built_in_function.rs | 6 +
datafusion/expr/src/expr_fn.rs | 9 ++
datafusion/physical-expr/src/array_expressions.rs | 161 ++++++++++++++++++++++
datafusion/physical-expr/src/functions.rs | 4 +-
datafusion/proto/proto/datafusion.proto | 1 +
datafusion/proto/src/generated/pbjson.rs | 3 +
datafusion/proto/src/generated/prost.rs | 3 +
datafusion/proto/src/logical_plan/from_proto.rs | 6 +-
datafusion/proto/src/logical_plan/to_proto.rs | 1 +
datafusion/sqllogictest/test_files/array.slt | 75 ++++++++++
docs/source/user-guide/expressions.md | 1 +
docs/source/user-guide/sql/scalar_functions.md | 25 ++++
12 files changed, 293 insertions(+), 2 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index e97269e474..de046cde89 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -134,6 +134,8 @@ pub enum BuiltinScalarFunction {
ArrayHasAll,
/// array_has_any
ArrayHasAny,
+ /// array_pop_back
+ ArrayPopBack,
/// array_dims
ArrayDims,
/// array_element
@@ -370,6 +372,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
BuiltinScalarFunction::ArrayNdims => Volatility::Immutable,
+ BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
BuiltinScalarFunction::ArrayPositions => Volatility::Immutable,
BuiltinScalarFunction::ArrayPrepend => Volatility::Immutable,
@@ -552,6 +555,7 @@ impl BuiltinScalarFunction {
},
BuiltinScalarFunction::ArrayLength => Ok(UInt64),
BuiltinScalarFunction::ArrayNdims => Ok(UInt64),
+ BuiltinScalarFunction::ArrayPopBack =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
BuiltinScalarFunction::ArrayPositions => {
Ok(List(Arc::new(Field::new("item", UInt64, true))))
@@ -823,6 +827,7 @@ impl BuiltinScalarFunction {
// for now, the list is small, as we do not have many built-in
functions.
match self {
BuiltinScalarFunction::ArrayAppend => Signature::any(2,
self.volatility()),
+ BuiltinScalarFunction::ArrayPopBack => Signature::any(1,
self.volatility()),
BuiltinScalarFunction::ArrayConcat => {
Signature::variadic_any(self.volatility())
}
@@ -1333,6 +1338,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static
[&'static str] {
}
BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"],
BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"],
+ BuiltinScalarFunction::ArrayPopBack => &["array_pop_back",
"list_pop_back"],
BuiltinScalarFunction::ArrayPosition => &[
"array_position",
"list_position",
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index e3fd5ceb20..a7f7d9b6e6 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -545,6 +545,14 @@ scalar_expr!(
array element,
"appends an element to the end of an array."
);
+
+scalar_expr!(
+ ArrayPopBack,
+ array_pop_back,
+ array,
+ "returns the array without the last element."
+);
+
nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
scalar_expr!(
ArrayHas,
@@ -1087,6 +1095,7 @@ mod test {
test_scalar_expr!(FromUnixtime, from_unixtime, unixtime);
test_scalar_expr!(ArrayAppend, array_append, array, element);
+ test_scalar_expr!(ArrayPopBack, array_pop_back, array);
test_unary_scalar_expr!(ArrayDims, array_dims);
test_scalar_expr!(ArrayLength, array_length, array, dimension);
test_unary_scalar_expr!(ArrayNdims, array_ndims);
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 97d7ee4610..98b14fdbc3 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -599,6 +599,22 @@ pub fn array_slice(args: &[ArrayRef]) -> Result<ArrayRef> {
define_array_slice(list_array, key, extra_key, false)
}
+pub fn array_pop_back(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let list_array = as_list_array(&args[0])?;
+ let key = vec![0; list_array.len()];
+ let extra_key: Vec<_> = list_array
+ .iter()
+ .map(|x| x.map_or(0, |arr| arr.len() as i64 - 1))
+ .collect();
+
+ define_array_slice(
+ list_array,
+ &Int64Array::from(key),
+ &Int64Array::from(extra_key),
+ false,
+ )
+}
+
macro_rules! append {
($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
let mut offsets: Vec<i32> = vec![0];
@@ -2005,6 +2021,151 @@ mod tests {
);
}
+ #[test]
+ fn test_array_pop_back() {
+ // array_pop_back([1, 2, 3, 4]) = [1, 2, 3]
+ let list_array = return_array().into_array(1);
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_pop_back");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_pop_back");
+ assert_eq!(
+ &[1, 2, 3],
+ result
+ .value(0)
+ .as_any()
+ .downcast_ref::<Int64Array>()
+ .unwrap()
+ .values()
+ );
+
+ // array_pop_back([1, 2, 3]) = [1, 2]
+ let list_array = Arc::new(result.clone());
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_pop_back");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_pop_back");
+ assert_eq!(
+ &[1, 2],
+ result
+ .value(0)
+ .as_any()
+ .downcast_ref::<Int64Array>()
+ .unwrap()
+ .values()
+ );
+
+ // array_pop_back([1, 2]) = [1]
+ let list_array = Arc::new(result.clone());
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_pop_back");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_pop_back");
+ assert_eq!(
+ &[1],
+ result
+ .value(0)
+ .as_any()
+ .downcast_ref::<Int64Array>()
+ .unwrap()
+ .values()
+ );
+
+ // array_pop_back([1]) = []
+ let list_array = Arc::new(result.clone());
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_pop_back");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_pop_back");
+ assert_eq!(
+ &[],
+ result
+ .value(0)
+ .as_any()
+ .downcast_ref::<Int64Array>()
+ .unwrap()
+ .values()
+ );
+ // array_pop_back([]) = []
+ let list_array = Arc::new(result.clone());
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_pop_back");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_pop_back");
+ assert_eq!(
+ &[],
+ result
+ .value(0)
+ .as_any()
+ .downcast_ref::<Int64Array>()
+ .unwrap()
+ .values()
+ );
+
+ // array_pop_back([1, NULL, 3, NULL]) = [1, NULL, 3]
+ let list_array = return_array_with_nulls().into_array(1);
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_pop_back");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_pop_back");
+ assert_eq!(3, result.values().len());
+ assert_eq!(
+ &[false, true, false],
+ &[
+ result.values().is_null(0),
+ result.values().is_null(1),
+ result.values().is_null(2)
+ ]
+ );
+ }
+ #[test]
+ fn test_nested_array_pop_back() {
+ // array_pop_back([[1, 2, 3, 4], [5, 6, 7, 8]]) = [[1, 2, 3, 4]]
+ let list_array = return_nested_array().into_array(1);
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_slice");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_slice");
+ assert_eq!(
+ &[1, 2, 3, 4],
+ result
+ .value(0)
+ .as_any()
+ .downcast_ref::<ListArray>()
+ .unwrap()
+ .value(0)
+ .as_any()
+ .downcast_ref::<Int64Array>()
+ .unwrap()
+ .values()
+ );
+
+ // array_pop_back([[1, 2, 3, 4]]) = []
+ let list_array = Arc::new(result.clone());
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_pop_back");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_pop_back");
+ assert!(result
+ .value(0)
+ .as_any()
+ .downcast_ref::<ListArray>()
+ .unwrap()
+ .is_empty());
+ // array_pop_back([]) = []
+ let list_array = Arc::new(result.clone());
+ let arr = array_pop_back(&[list_array])
+ .expect("failed to initialize function array_pop_back");
+ let result =
+ as_list_array(&arr).expect("failed to initialize function
array_pop_back");
+ assert!(result
+ .value(0)
+ .as_any()
+ .downcast_ref::<ListArray>()
+ .unwrap()
+ .is_empty());
+ }
+
#[test]
fn test_array_slice() {
// array_slice([1, 2, 3, 4], 1, 3) = [1, 2, 3]
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 2d6dbfdf52..420e40e9c4 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -449,10 +449,12 @@ pub fn create_physical_fun(
BuiltinScalarFunction::Flatten => {
Arc::new(|args|
make_scalar_function(array_expressions::flatten)(args))
}
-
BuiltinScalarFunction::ArrayNdims => {
Arc::new(|args|
make_scalar_function(array_expressions::array_ndims)(args))
}
+ BuiltinScalarFunction::ArrayPopBack => {
+ Arc::new(|args|
make_scalar_function(array_expressions::array_pop_back)(args))
+ }
BuiltinScalarFunction::ArrayPosition => {
Arc::new(|args|
make_scalar_function(array_expressions::array_position)(args))
}
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index f31a593ad5..a0148c8af0 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -598,6 +598,7 @@ enum ScalarFunction {
Isnan = 113;
Iszero = 114;
ArrayEmpty = 115;
+ ArrayPopBack = 116;
}
message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index 7d1a18349c..9485161174 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -18948,6 +18948,7 @@ impl serde::Serialize for ScalarFunction {
Self::Isnan => "Isnan",
Self::Iszero => "Iszero",
Self::ArrayEmpty => "ArrayEmpty",
+ Self::ArrayPopBack => "ArrayPopBack",
};
serializer.serialize_str(variant)
}
@@ -19075,6 +19076,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Isnan",
"Iszero",
"ArrayEmpty",
+ "ArrayPopBack",
];
struct GeneratedVisitor;
@@ -19233,6 +19235,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"Isnan" => Ok(ScalarFunction::Isnan),
"Iszero" => Ok(ScalarFunction::Iszero),
"ArrayEmpty" => Ok(ScalarFunction::ArrayEmpty),
+ "ArrayPopBack" => Ok(ScalarFunction::ArrayPopBack),
_ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
}
}
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index fc55b7e23a..22a4beadb8 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2378,6 +2378,7 @@ pub enum ScalarFunction {
Isnan = 113,
Iszero = 114,
ArrayEmpty = 115,
+ ArrayPopBack = 116,
}
impl ScalarFunction {
/// String value of the enum field names used in the ProtoBuf definition.
@@ -2502,6 +2503,7 @@ impl ScalarFunction {
ScalarFunction::Isnan => "Isnan",
ScalarFunction::Iszero => "Iszero",
ScalarFunction::ArrayEmpty => "ArrayEmpty",
+ ScalarFunction::ArrayPopBack => "ArrayPopBack",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
@@ -2623,6 +2625,7 @@ impl ScalarFunction {
"Isnan" => Some(Self::Isnan),
"Iszero" => Some(Self::Iszero),
"ArrayEmpty" => Some(Self::ArrayEmpty),
+ "ArrayPopBack" => Some(Self::ArrayPopBack),
_ => None,
}
}
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index c5ab0c25f6..7fbf2ff52c 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -59,7 +59,7 @@ use datafusion_expr::{
WindowFrameUnits,
};
use datafusion_expr::{
- array_empty,
+ array_empty, array_pop_back,
expr::{Alias, Placeholder},
};
use std::sync::Arc;
@@ -464,6 +464,7 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::Flatten => Self::Flatten,
ScalarFunction::ArrayLength => Self::ArrayLength,
ScalarFunction::ArrayNdims => Self::ArrayNdims,
+ ScalarFunction::ArrayPopBack => Self::ArrayPopBack,
ScalarFunction::ArrayPosition => Self::ArrayPosition,
ScalarFunction::ArrayPositions => Self::ArrayPositions,
ScalarFunction::ArrayPrepend => Self::ArrayPrepend,
@@ -1272,6 +1273,9 @@ pub fn parse_expr(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
+ ScalarFunction::ArrayPopBack => {
+ Ok(array_pop_back(parse_expr(&args[0], registry)?))
+ }
ScalarFunction::ArrayPrepend => Ok(array_prepend(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 82df53af92..b90e3b0e4b 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1460,6 +1460,7 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::Flatten => Self::Flatten,
BuiltinScalarFunction::ArrayLength => Self::ArrayLength,
BuiltinScalarFunction::ArrayNdims => Self::ArrayNdims,
+ BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack,
BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition,
BuiltinScalarFunction::ArrayPositions => Self::ArrayPositions,
BuiltinScalarFunction::ArrayPrepend => Self::ArrayPrepend,
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index a4969b1e20..f54c2f7171 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -79,6 +79,17 @@ AS VALUES
(make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 5, NULL)
;
+statement ok
+CREATE TABLE arrayspop
+AS VALUES
+ (make_array(1, 2, NULL)),
+ (make_array(3, 4, 5, NULL)),
+ (make_array(6, 7, 8, NULL, 9)),
+ (make_array(NULL, NULL, 100)),
+ (NULL),
+ (make_array(NULL, 10, 11, 12))
+;
+
statement ok
CREATE TABLE nested_arrays
AS VALUES
@@ -687,6 +698,67 @@ NULL 23
NULL 43
5 NULL
+## array_pop_back (aliases: `list_pop_back`)
+
+# array_pop_back scalar function #1
+query ??
+select array_pop_back(make_array(1, 2, 3, 4, 5)),
array_pop_back(make_array('h', 'e', 'l', 'l', 'o'));
+----
+[1, 2, 3, 4] [h, e, l, l]
+
+# array_pop_back scalar function #2 (after array_pop_back, array is empty)
+query ?
+select array_pop_back(make_array(1));
+----
+[]
+
+# array_pop_back scalar function #3 (array_pop_back the empty array)
+query ?
+select array_pop_back(array_pop_back(make_array(1)));
+----
+[]
+
+# array_pop_back scalar function #4 (array_pop_back the arrays which have NULL)
+query ??
+select array_pop_back(make_array(1, 2, 3, 4, NULL)),
array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o'));
+----
+[1, 2, 3, 4] [, e, l, ]
+
+# array_pop_back scalar function #5 (array_pop_back the nested arrays)
+query ?
+select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5,
6)));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
+# array_pop_back scalar function #6 (array_pop_back the nested arrays with
NULL)
+query ?
+select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
+# array_pop_back scalar function #7 (array_pop_back the nested arrays with
NULL)
+query ?
+select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), NULL, make_array(1, 7, 4)));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], ]
+
+# array_pop_back scalar function #8 (after array_pop_back, nested array is
empty)
+query ?
+select array_pop_back(make_array(make_array(1, 2, 3)));
+----
+[]
+
+# array_pop_back with columns
+query ?
+select array_pop_back(column1) from arrayspop;
+----
+[1, 2]
+[3, 4, 5]
+[6, 7, 8, ]
+[, ]
+[]
+[, 10, 11]
+
## array_slice (aliases: list_slice)
# array_slice scalar function #1 (with positive indexes)
@@ -2430,6 +2502,9 @@ drop table arrays;
statement ok
drop table slices;
+statement ok
+drop table arrayspop;
+
statement ok
drop table arrays_values;
diff --git a/docs/source/user-guide/expressions.md
b/docs/source/user-guide/expressions.md
index d8dfa7af53..a481e525fe 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -194,6 +194,7 @@ Unlike to some databases the math functions in Datafusion
works the same way as
| flatten(array) | Converts an array of arrays to a
flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]`
|
| array_length(array, dimension) | Returns the length of the array
dimension. `array_length([1, 2, 3, 4, 5]) -> 5`
|
| array_ndims(array) | Returns the number of dimensions of
the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2`
|
+| array_pop_back(array) | Returns the array without the last
element. `array_pop_back([1, 2, 3]) -> [1, 2]`
|
| array_position(array, element) | Searches for an element in the
array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2`
|
| array_positions(array, element) | Searches for an element in the
array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]`
|
| array_prepend(array, element) | Prepends an element to the beginning
of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]`
|
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index c6e2f5ddd8..fc62938502 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1481,6 +1481,7 @@ from_unixtime(expression)
- [array_length](#array_length)
- [array_ndims](#array_ndims)
- [array_prepend](#array_prepend)
+- [array_pop_back](#array_pop_back)
- [array_position](#array_position)
- [array_positions](#array_positions)
- [array_push_back](#array_push_back)
@@ -1830,6 +1831,30 @@ array_prepend(element, array)
- list_prepend
- list_push_front
+### `array_pop_back`
+
+Returns the array without the last element.
+
+```
+array_pop_back(array)
+```
+
+#### Arguments
+
+- **array**: Array expression.
+ Can be a constant, column, or function, and any combination of array
operators.
+
+#### Example
+
+```
+❯ select array_pop_back([1, 2, 3]);
++-------------------------------+
+| array_pop_back(List([1,2,3])) |
++-------------------------------+
+| [1, 2] |
++-------------------------------+
+```
+
### `array_position`
Returns a string with an input string repeated a specified number.