This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6b945a4409 Implement func `array_pop_front` (#8142)
6b945a4409 is described below
commit 6b945a4409e1c8e9c50124e30a0996b65e9d31c6
Author: 谭巍 <[email protected]>
AuthorDate: Thu Nov 16 05:17:05 2023 +0800
Implement func `array_pop_front` (#8142)
* implement array_pop_front
Signed-off-by: veeupup <[email protected]>
* abstract array_pop
* fix cargo check
* add docs for array_pop_front
Signed-off-by: veeupup <[email protected]>
* fix comments
---------
Signed-off-by: veeupup <[email protected]>
---
datafusion/expr/src/built_in_function.rs | 6 ++++
datafusion/expr/src/expr_fn.rs | 8 +++++
datafusion/physical-expr/src/array_expressions.rs | 42 ++++++++++++++++++++---
datafusion/physical-expr/src/functions.rs | 3 ++
datafusion/proto/proto/datafusion.proto | 1 +
datafusion/proto/src/generated/pbjson.rs | 3 ++
datafusion/proto/src/generated/prost.rs | 3 ++
datafusion/proto/src/logical_plan/from_proto.rs | 6 +++-
datafusion/proto/src/logical_plan/to_proto.rs | 1 +
datafusion/sqllogictest/test_files/array.slt | 38 ++++++++++++++++++++
docs/source/user-guide/expressions.md | 1 +
docs/source/user-guide/sql/scalar_functions.md | 25 ++++++++++++++
12 files changed, 131 insertions(+), 6 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 473094c001..1b48c37406 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -138,6 +138,8 @@ pub enum BuiltinScalarFunction {
ArrayHasAll,
/// array_has_any
ArrayHasAny,
+ /// array_pop_front
+ ArrayPopFront,
/// array_pop_back
ArrayPopBack,
/// array_dims
@@ -392,6 +394,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
BuiltinScalarFunction::ArrayNdims => Volatility::Immutable,
+ BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable,
BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
BuiltinScalarFunction::ArrayPositions => Volatility::Immutable,
@@ -570,6 +573,7 @@ impl BuiltinScalarFunction {
},
BuiltinScalarFunction::ArrayLength => Ok(UInt64),
BuiltinScalarFunction::ArrayNdims => Ok(UInt64),
+ BuiltinScalarFunction::ArrayPopFront =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayPopBack =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
BuiltinScalarFunction::ArrayPositions => {
@@ -868,6 +872,7 @@ impl BuiltinScalarFunction {
// for now, the list is small, as we do not have many built-in
functions.
match self {
BuiltinScalarFunction::ArrayAppend => Signature::any(2,
self.volatility()),
+ BuiltinScalarFunction::ArrayPopFront => Signature::any(1,
self.volatility()),
BuiltinScalarFunction::ArrayPopBack => Signature::any(1,
self.volatility()),
BuiltinScalarFunction::ArrayConcat => {
Signature::variadic_any(self.volatility())
@@ -1512,6 +1517,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static
[&'static str] {
}
BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"],
BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"],
+ BuiltinScalarFunction::ArrayPopFront => &["array_pop_front",
"list_pop_front"],
BuiltinScalarFunction::ArrayPopBack => &["array_pop_back",
"list_pop_back"],
BuiltinScalarFunction::ArrayPosition => &[
"array_position",
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index e70a4a90f7..bcf1aa0ca7 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -590,6 +590,13 @@ scalar_expr!(
"returns the array without the last element."
);
+scalar_expr!(
+ ArrayPopFront,
+ array_pop_front,
+ array,
+ "returns the array without the first element."
+);
+
nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
scalar_expr!(
ArrayHas,
@@ -1166,6 +1173,7 @@ mod test {
test_scalar_expr!(FromUnixtime, from_unixtime, unixtime);
test_scalar_expr!(ArrayAppend, array_append, array, element);
+ test_scalar_expr!(ArrayPopFront, array_pop_front, array);
test_scalar_expr!(ArrayPopBack, array_pop_back, array);
test_unary_scalar_expr!(ArrayDims, array_dims);
test_scalar_expr!(ArrayLength, array_length, array, dimension);
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 515df2a970..ded606c3b7 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -636,13 +636,33 @@ pub fn array_slice(args: &[ArrayRef]) -> Result<ArrayRef>
{
define_array_slice(list_array, key, extra_key, false)
}
+fn general_array_pop(
+ list_array: &GenericListArray<i32>,
+ from_back: bool,
+) -> Result<(Vec<i64>, Vec<i64>)> {
+ if from_back {
+ let key = vec![0; list_array.len()];
+ // Atttetion: `arr.len() - 1` in extra key defines the last element
position (position = index + 1, not inclusive) we want in the new array.
+ let extra_key: Vec<_> = list_array
+ .iter()
+ .map(|x| x.map_or(0, |arr| arr.len() as i64 - 1))
+ .collect();
+ Ok((key, extra_key))
+ } else {
+ // Atttetion: 2 in the `key`` defines the first element position
(position = index + 1) we want in the new array.
+ // We only handle two cases of the first element index: if the old
array has any elements, starts from 2 (index + 1), or starts from initial.
+ let key: Vec<_> = list_array.iter().map(|x| x.map_or(0, |_|
2)).collect();
+ let extra_key: Vec<_> = list_array
+ .iter()
+ .map(|x| x.map_or(0, |arr| arr.len() as i64))
+ .collect();
+ Ok((key, extra_key))
+ }
+}
+
pub fn array_pop_back(args: &[ArrayRef]) -> Result<ArrayRef> {
let list_array = as_list_array(&args[0])?;
- let key = vec![0; list_array.len()];
- let extra_key: Vec<_> = list_array
- .iter()
- .map(|x| x.map_or(0, |arr| arr.len() as i64 - 1))
- .collect();
+ let (key, extra_key) = general_array_pop(list_array, true)?;
define_array_slice(
list_array,
@@ -767,6 +787,18 @@ pub fn gen_range(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(arr)
}
+pub fn array_pop_front(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let list_array = as_list_array(&args[0])?;
+ let (key, extra_key) = general_array_pop(list_array, false)?;
+
+ define_array_slice(
+ list_array,
+ &Int64Array::from(key),
+ &Int64Array::from(extra_key),
+ false,
+ )
+}
+
/// Array_append SQL function
pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
let list_array = as_list_array(&args[0])?;
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 543d7eb654..1e8500079f 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -359,6 +359,9 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayNdims => {
Arc::new(|args|
make_scalar_function(array_expressions::array_ndims)(args))
}
+ BuiltinScalarFunction::ArrayPopFront => Arc::new(|args| {
+ make_scalar_function(array_expressions::array_pop_front)(args)
+ }),
BuiltinScalarFunction::ArrayPopBack => {
Arc::new(|args|
make_scalar_function(array_expressions::array_pop_back)(args))
}
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index fa080518d5..66c34c7a12 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -638,6 +638,7 @@ enum ScalarFunction {
ArrayUnion = 120;
OverLay = 121;
Range = 122;
+ ArrayPopFront = 123;
}
message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index 08e7413102..628adcc411 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -20937,6 +20937,7 @@ impl serde::Serialize for ScalarFunction {
Self::ArrayUnion => "ArrayUnion",
Self::OverLay => "OverLay",
Self::Range => "Range",
+ Self::ArrayPopFront => "ArrayPopFront",
};
serializer.serialize_str(variant)
}
@@ -21071,6 +21072,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArrayUnion",
"OverLay",
"Range",
+ "ArrayPopFront",
];
struct GeneratedVisitor;
@@ -21234,6 +21236,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArrayUnion" => Ok(ScalarFunction::ArrayUnion),
"OverLay" => Ok(ScalarFunction::OverLay),
"Range" => Ok(ScalarFunction::Range),
+ "ArrayPopFront" => Ok(ScalarFunction::ArrayPopFront),
_ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
}
}
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index 15606488b3..317b888447 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2569,6 +2569,7 @@ pub enum ScalarFunction {
ArrayUnion = 120,
OverLay = 121,
Range = 122,
+ ArrayPopFront = 123,
}
impl ScalarFunction {
/// String value of the enum field names used in the ProtoBuf definition.
@@ -2700,6 +2701,7 @@ impl ScalarFunction {
ScalarFunction::ArrayUnion => "ArrayUnion",
ScalarFunction::OverLay => "OverLay",
ScalarFunction::Range => "Range",
+ ScalarFunction::ArrayPopFront => "ArrayPopFront",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
@@ -2828,6 +2830,7 @@ impl ScalarFunction {
"ArrayUnion" => Some(Self::ArrayUnion),
"OverLay" => Some(Self::OverLay),
"Range" => Some(Self::Range),
+ "ArrayPopFront" => Some(Self::ArrayPopFront),
_ => None,
}
}
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index b3d6857003..94c9f98066 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -66,7 +66,7 @@ use datafusion_expr::{
WindowFrameUnits,
};
use datafusion_expr::{
- array_empty, array_pop_back,
+ array_empty, array_pop_back, array_pop_front,
expr::{Alias, Placeholder},
};
use std::sync::Arc;
@@ -473,6 +473,7 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::Flatten => Self::Flatten,
ScalarFunction::ArrayLength => Self::ArrayLength,
ScalarFunction::ArrayNdims => Self::ArrayNdims,
+ ScalarFunction::ArrayPopFront => Self::ArrayPopFront,
ScalarFunction::ArrayPopBack => Self::ArrayPopBack,
ScalarFunction::ArrayPosition => Self::ArrayPosition,
ScalarFunction::ArrayPositions => Self::ArrayPositions,
@@ -1330,6 +1331,9 @@ pub fn parse_expr(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
+ ScalarFunction::ArrayPopFront => {
+ Ok(array_pop_front(parse_expr(&args[0], registry)?))
+ }
ScalarFunction::ArrayPopBack => {
Ok(array_pop_back(parse_expr(&args[0], registry)?))
}
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 144f285310..53be5f7bd4 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1480,6 +1480,7 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::Flatten => Self::Flatten,
BuiltinScalarFunction::ArrayLength => Self::ArrayLength,
BuiltinScalarFunction::ArrayNdims => Self::ArrayNdims,
+ BuiltinScalarFunction::ArrayPopFront => Self::ArrayPopFront,
BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack,
BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition,
BuiltinScalarFunction::ArrayPositions => Self::ArrayPositions,
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index 67cabb0988..99ed948836 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -826,6 +826,44 @@ select array_pop_back(column1) from arrayspop;
[]
[, 10, 11]
+## array_pop_front (aliases: `list_pop_front`)
+
+# array_pop_front scalar function #1
+query ??
+select array_pop_front(make_array(1, 2, 3, 4, 5)),
array_pop_front(make_array('h', 'e', 'l', 'l', 'o'));
+----
+[2, 3, 4, 5] [e, l, l, o]
+
+# array_pop_front scalar function #2 (after array_pop_front, array is empty)
+query ?
+select array_pop_front(make_array(1));
+----
+[]
+
+# array_pop_front scalar function #3 (array_pop_front the empty array)
+query ?
+select array_pop_front(array_pop_front(make_array(1)));
+----
+[]
+
+# array_pop_front scalar function #5 (array_pop_front the nested arrays)
+query ?
+select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5,
6)));
+----
+[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+
+# array_pop_front scalar function #6 (array_pop_front the nested arrays with
NULL)
+query ?
+select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9,
1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
+# array_pop_front scalar function #8 (after array_pop_front, nested array is
empty)
+query ?
+select array_pop_front(make_array(make_array(1, 2, 3)));
+----
+[]
+
## array_slice (aliases: list_slice)
# array_slice scalar function #1 (with positive indexes)
diff --git a/docs/source/user-guide/expressions.md
b/docs/source/user-guide/expressions.md
index 6b2ab46eb3..191ef6cd91 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -219,6 +219,7 @@ Unlike to some databases the math functions in Datafusion
works the same way as
| flatten(array) | Converts an array of arrays to a
flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]`
|
| array_length(array, dimension) | Returns the length of the array
dimension. `array_length([1, 2, 3, 4, 5]) -> 5`
|
| array_ndims(array) | Returns the number of dimensions of
the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2`
|
+| array_pop_front(array) | Returns the array without the first
element. `array_pop_front([1, 2, 3]) -> [2, 3]`
|
| array_pop_back(array) | Returns the array without the last
element. `array_pop_back([1, 2, 3]) -> [1, 2]`
|
| array_position(array, element) | Searches for an element in the
array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2`
|
| array_positions(array, element) | Searches for an element in the
array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]`
|
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 826782e1a0..baaea3926f 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1515,6 +1515,7 @@ from_unixtime(expression)
- [array_length](#array_length)
- [array_ndims](#array_ndims)
- [array_prepend](#array_prepend)
+- [array_pop_front](#array_pop_front)
- [array_pop_back](#array_pop_back)
- [array_position](#array_position)
- [array_positions](#array_positions)
@@ -1868,6 +1869,30 @@ array_prepend(element, array)
- list_prepend
- list_push_front
+### `array_pop_front`
+
+Returns the array without the first element.
+
+```
+array_pop_first(array)
+```
+
+#### Arguments
+
+- **array**: Array expression.
+ Can be a constant, column, or function, and any combination of array
operators.
+
+#### Example
+
+```
+❯ select array_pop_first([1, 2, 3]);
++-------------------------------+
+| array_pop_first(List([1,2,3])) |
++-------------------------------+
+| [2, 3] |
++-------------------------------+
+```
+
### `array_pop_back`
Returns the array without the last element.