This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 6b945a4409 Implement func `array_pop_front` (#8142)
6b945a4409 is described below

commit 6b945a4409e1c8e9c50124e30a0996b65e9d31c6
Author: 谭巍 <[email protected]>
AuthorDate: Thu Nov 16 05:17:05 2023 +0800

    Implement func `array_pop_front` (#8142)
    
    * implement array_pop_front
    
    Signed-off-by: veeupup <[email protected]>
    
    * abstract array_pop
    
    * fix cargo check
    
    * add docs for array_pop_front
    
    Signed-off-by: veeupup <[email protected]>
    
    * fix comments
    
    ---------
    
    Signed-off-by: veeupup <[email protected]>
---
 datafusion/expr/src/built_in_function.rs          |  6 ++++
 datafusion/expr/src/expr_fn.rs                    |  8 +++++
 datafusion/physical-expr/src/array_expressions.rs | 42 ++++++++++++++++++++---
 datafusion/physical-expr/src/functions.rs         |  3 ++
 datafusion/proto/proto/datafusion.proto           |  1 +
 datafusion/proto/src/generated/pbjson.rs          |  3 ++
 datafusion/proto/src/generated/prost.rs           |  3 ++
 datafusion/proto/src/logical_plan/from_proto.rs   |  6 +++-
 datafusion/proto/src/logical_plan/to_proto.rs     |  1 +
 datafusion/sqllogictest/test_files/array.slt      | 38 ++++++++++++++++++++
 docs/source/user-guide/expressions.md             |  1 +
 docs/source/user-guide/sql/scalar_functions.md    | 25 ++++++++++++++
 12 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 473094c001..1b48c37406 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -138,6 +138,8 @@ pub enum BuiltinScalarFunction {
     ArrayHasAll,
     /// array_has_any
     ArrayHasAny,
+    /// array_pop_front
+    ArrayPopFront,
     /// array_pop_back
     ArrayPopBack,
     /// array_dims
@@ -392,6 +394,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayElement => Volatility::Immutable,
             BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
             BuiltinScalarFunction::ArrayNdims => Volatility::Immutable,
+            BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable,
             BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable,
             BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
             BuiltinScalarFunction::ArrayPositions => Volatility::Immutable,
@@ -570,6 +573,7 @@ impl BuiltinScalarFunction {
             },
             BuiltinScalarFunction::ArrayLength => Ok(UInt64),
             BuiltinScalarFunction::ArrayNdims => Ok(UInt64),
+            BuiltinScalarFunction::ArrayPopFront => 
Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayPopBack => 
Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
             BuiltinScalarFunction::ArrayPositions => {
@@ -868,6 +872,7 @@ impl BuiltinScalarFunction {
         // for now, the list is small, as we do not have many built-in 
functions.
         match self {
             BuiltinScalarFunction::ArrayAppend => Signature::any(2, 
self.volatility()),
+            BuiltinScalarFunction::ArrayPopFront => Signature::any(1, 
self.volatility()),
             BuiltinScalarFunction::ArrayPopBack => Signature::any(1, 
self.volatility()),
             BuiltinScalarFunction::ArrayConcat => {
                 Signature::variadic_any(self.volatility())
@@ -1512,6 +1517,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static 
[&'static str] {
         }
         BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"],
         BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"],
+        BuiltinScalarFunction::ArrayPopFront => &["array_pop_front", 
"list_pop_front"],
         BuiltinScalarFunction::ArrayPopBack => &["array_pop_back", 
"list_pop_back"],
         BuiltinScalarFunction::ArrayPosition => &[
             "array_position",
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index e70a4a90f7..bcf1aa0ca7 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -590,6 +590,13 @@ scalar_expr!(
     "returns the array without the last element."
 );
 
+scalar_expr!(
+    ArrayPopFront,
+    array_pop_front,
+    array,
+    "returns the array without the first element."
+);
+
 nary_scalar_expr!(ArrayConcat, array_concat, "concatenates arrays.");
 scalar_expr!(
     ArrayHas,
@@ -1166,6 +1173,7 @@ mod test {
         test_scalar_expr!(FromUnixtime, from_unixtime, unixtime);
 
         test_scalar_expr!(ArrayAppend, array_append, array, element);
+        test_scalar_expr!(ArrayPopFront, array_pop_front, array);
         test_scalar_expr!(ArrayPopBack, array_pop_back, array);
         test_unary_scalar_expr!(ArrayDims, array_dims);
         test_scalar_expr!(ArrayLength, array_length, array, dimension);
diff --git a/datafusion/physical-expr/src/array_expressions.rs 
b/datafusion/physical-expr/src/array_expressions.rs
index 515df2a970..ded606c3b7 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -636,13 +636,33 @@ pub fn array_slice(args: &[ArrayRef]) -> Result<ArrayRef> 
{
     define_array_slice(list_array, key, extra_key, false)
 }
 
+fn general_array_pop(
+    list_array: &GenericListArray<i32>,
+    from_back: bool,
+) -> Result<(Vec<i64>, Vec<i64>)> {
+    if from_back {
+        let key = vec![0; list_array.len()];
+        // Atttetion: `arr.len() - 1` in extra key defines the last element 
position (position = index + 1, not inclusive) we want in the new array.
+        let extra_key: Vec<_> = list_array
+            .iter()
+            .map(|x| x.map_or(0, |arr| arr.len() as i64 - 1))
+            .collect();
+        Ok((key, extra_key))
+    } else {
+        // Atttetion: 2 in the `key`` defines the first element position 
(position = index + 1) we want in the new array.
+        // We only handle two cases of the first element index: if the old 
array has any elements, starts from 2 (index + 1), or starts from initial.
+        let key: Vec<_> = list_array.iter().map(|x| x.map_or(0, |_| 
2)).collect();
+        let extra_key: Vec<_> = list_array
+            .iter()
+            .map(|x| x.map_or(0, |arr| arr.len() as i64))
+            .collect();
+        Ok((key, extra_key))
+    }
+}
+
 pub fn array_pop_back(args: &[ArrayRef]) -> Result<ArrayRef> {
     let list_array = as_list_array(&args[0])?;
-    let key = vec![0; list_array.len()];
-    let extra_key: Vec<_> = list_array
-        .iter()
-        .map(|x| x.map_or(0, |arr| arr.len() as i64 - 1))
-        .collect();
+    let (key, extra_key) = general_array_pop(list_array, true)?;
 
     define_array_slice(
         list_array,
@@ -767,6 +787,18 @@ pub fn gen_range(args: &[ArrayRef]) -> Result<ArrayRef> {
     Ok(arr)
 }
 
+pub fn array_pop_front(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let list_array = as_list_array(&args[0])?;
+    let (key, extra_key) = general_array_pop(list_array, false)?;
+
+    define_array_slice(
+        list_array,
+        &Int64Array::from(key),
+        &Int64Array::from(extra_key),
+        false,
+    )
+}
+
 /// Array_append SQL function
 pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
     let list_array = as_list_array(&args[0])?;
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index 543d7eb654..1e8500079f 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -359,6 +359,9 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::ArrayNdims => {
             Arc::new(|args| 
make_scalar_function(array_expressions::array_ndims)(args))
         }
+        BuiltinScalarFunction::ArrayPopFront => Arc::new(|args| {
+            make_scalar_function(array_expressions::array_pop_front)(args)
+        }),
         BuiltinScalarFunction::ArrayPopBack => {
             Arc::new(|args| 
make_scalar_function(array_expressions::array_pop_back)(args))
         }
diff --git a/datafusion/proto/proto/datafusion.proto 
b/datafusion/proto/proto/datafusion.proto
index fa080518d5..66c34c7a12 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -638,6 +638,7 @@ enum ScalarFunction {
   ArrayUnion = 120;
   OverLay = 121;
   Range = 122;
+  ArrayPopFront = 123;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs 
b/datafusion/proto/src/generated/pbjson.rs
index 08e7413102..628adcc411 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -20937,6 +20937,7 @@ impl serde::Serialize for ScalarFunction {
             Self::ArrayUnion => "ArrayUnion",
             Self::OverLay => "OverLay",
             Self::Range => "Range",
+            Self::ArrayPopFront => "ArrayPopFront",
         };
         serializer.serialize_str(variant)
     }
@@ -21071,6 +21072,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "ArrayUnion",
             "OverLay",
             "Range",
+            "ArrayPopFront",
         ];
 
         struct GeneratedVisitor;
@@ -21234,6 +21236,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "ArrayUnion" => Ok(ScalarFunction::ArrayUnion),
                     "OverLay" => Ok(ScalarFunction::OverLay),
                     "Range" => Ok(ScalarFunction::Range),
+                    "ArrayPopFront" => Ok(ScalarFunction::ArrayPopFront),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs 
b/datafusion/proto/src/generated/prost.rs
index 15606488b3..317b888447 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2569,6 +2569,7 @@ pub enum ScalarFunction {
     ArrayUnion = 120,
     OverLay = 121,
     Range = 122,
+    ArrayPopFront = 123,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2700,6 +2701,7 @@ impl ScalarFunction {
             ScalarFunction::ArrayUnion => "ArrayUnion",
             ScalarFunction::OverLay => "OverLay",
             ScalarFunction::Range => "Range",
+            ScalarFunction::ArrayPopFront => "ArrayPopFront",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -2828,6 +2830,7 @@ impl ScalarFunction {
             "ArrayUnion" => Some(Self::ArrayUnion),
             "OverLay" => Some(Self::OverLay),
             "Range" => Some(Self::Range),
+            "ArrayPopFront" => Some(Self::ArrayPopFront),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs 
b/datafusion/proto/src/logical_plan/from_proto.rs
index b3d6857003..94c9f98066 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -66,7 +66,7 @@ use datafusion_expr::{
     WindowFrameUnits,
 };
 use datafusion_expr::{
-    array_empty, array_pop_back,
+    array_empty, array_pop_back, array_pop_front,
     expr::{Alias, Placeholder},
 };
 use std::sync::Arc;
@@ -473,6 +473,7 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::Flatten => Self::Flatten,
             ScalarFunction::ArrayLength => Self::ArrayLength,
             ScalarFunction::ArrayNdims => Self::ArrayNdims,
+            ScalarFunction::ArrayPopFront => Self::ArrayPopFront,
             ScalarFunction::ArrayPopBack => Self::ArrayPopBack,
             ScalarFunction::ArrayPosition => Self::ArrayPosition,
             ScalarFunction::ArrayPositions => Self::ArrayPositions,
@@ -1330,6 +1331,9 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
                 )),
+                ScalarFunction::ArrayPopFront => {
+                    Ok(array_pop_front(parse_expr(&args[0], registry)?))
+                }
                 ScalarFunction::ArrayPopBack => {
                     Ok(array_pop_back(parse_expr(&args[0], registry)?))
                 }
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index 144f285310..53be5f7bd4 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1480,6 +1480,7 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::Flatten => Self::Flatten,
             BuiltinScalarFunction::ArrayLength => Self::ArrayLength,
             BuiltinScalarFunction::ArrayNdims => Self::ArrayNdims,
+            BuiltinScalarFunction::ArrayPopFront => Self::ArrayPopFront,
             BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack,
             BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition,
             BuiltinScalarFunction::ArrayPositions => Self::ArrayPositions,
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index 67cabb0988..99ed948836 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -826,6 +826,44 @@ select array_pop_back(column1) from arrayspop;
 []
 [, 10, 11]
 
+## array_pop_front (aliases: `list_pop_front`)
+
+# array_pop_front scalar function #1
+query ??
+select array_pop_front(make_array(1, 2, 3, 4, 5)), 
array_pop_front(make_array('h', 'e', 'l', 'l', 'o'));
+----
+[2, 3, 4, 5] [e, l, l, o]
+
+# array_pop_front scalar function #2 (after array_pop_front, array is empty)
+query ?
+select array_pop_front(make_array(1));
+----
+[]
+
+# array_pop_front scalar function #3 (array_pop_front the empty array)
+query ?
+select array_pop_front(array_pop_front(make_array(1)));
+----
+[]
+
+# array_pop_front scalar function #5 (array_pop_front the nested arrays)
+query ?
+select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1), 
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 
6)));
+----
+[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+
+# array_pop_front scalar function #6 (array_pop_front the nested arrays with 
NULL)
+query ?
+select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 
1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
+# array_pop_front scalar function #8 (after array_pop_front, nested array is 
empty)
+query ?
+select array_pop_front(make_array(make_array(1, 2, 3)));
+----
+[]
+
 ## array_slice (aliases: list_slice)
 
 # array_slice scalar function #1 (with positive indexes)
diff --git a/docs/source/user-guide/expressions.md 
b/docs/source/user-guide/expressions.md
index 6b2ab46eb3..191ef6cd91 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -219,6 +219,7 @@ Unlike to some databases the math functions in Datafusion 
works the same way as
 | flatten(array)                        | Converts an array of arrays to a 
flat array `flatten([[1], [2, 3], [4, 5, 6]]) -> [1, 2, 3, 4, 5, 6]`            
                                                        |
 | array_length(array, dimension)        | Returns the length of the array 
dimension. `array_length([1, 2, 3, 4, 5]) -> 5`                                 
                                                         |
 | array_ndims(array)                    | Returns the number of dimensions of 
the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2`                           
                                                     |
+| array_pop_front(array)                | Returns the array without the first 
element. `array_pop_front([1, 2, 3]) -> [2, 3]`                                 
                                                     |
 | array_pop_back(array)                 | Returns the array without the last 
element. `array_pop_back([1, 2, 3]) -> [1, 2]`                                  
                                                      |
 | array_position(array, element)        | Searches for an element in the 
array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2`      
                                                          |
 | array_positions(array, element)       | Searches for an element in the 
array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]` 
                                                          |
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 826782e1a0..baaea3926f 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1515,6 +1515,7 @@ from_unixtime(expression)
 - [array_length](#array_length)
 - [array_ndims](#array_ndims)
 - [array_prepend](#array_prepend)
+- [array_pop_front](#array_pop_front)
 - [array_pop_back](#array_pop_back)
 - [array_position](#array_position)
 - [array_positions](#array_positions)
@@ -1868,6 +1869,30 @@ array_prepend(element, array)
 - list_prepend
 - list_push_front
 
+### `array_pop_front`
+
+Returns the array without the first element.
+
+```
+array_pop_first(array)
+```
+
+#### Arguments
+
+- **array**: Array expression.
+  Can be a constant, column, or function, and any combination of array 
operators.
+
+#### Example
+
+```
+❯ select array_pop_first([1, 2, 3]);
++-------------------------------+
+| array_pop_first(List([1,2,3])) |
++-------------------------------+
+| [2, 3]                        |
++-------------------------------+
+```
+
 ### `array_pop_back`
 
 Returns the array without the last element.

Reply via email to