This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 31e54f00c7 implement range/generate_series func (#8140)
31e54f00c7 is described below

commit 31e54f00c71da3e4441cf59a5deee7fc1d2727f2
Author: 谭巍 <[email protected]>
AuthorDate: Wed Nov 15 00:57:08 2023 +0800

    implement range/generate_series func (#8140)
    
    * implement range/generate_series func
    
    Signed-off-by: veeupup <[email protected]>
    
    * explain details for range func
    
    Signed-off-by: veeupup <[email protected]>
    
    * fix ci
    
    * fix doc fmt
    
    * fix comments
    
    * regenerate proto
    
    * add comment for gen_range usage
    
    Signed-off-by: veeupup <[email protected]>
    
    ---------
    
    Signed-off-by: veeupup <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/expr/src/built_in_function.rs          | 15 +++++++
 datafusion/expr/src/expr_fn.rs                    |  6 +++
 datafusion/physical-expr/src/array_expressions.rs | 52 +++++++++++++++++++++++
 datafusion/physical-expr/src/functions.rs         |  3 ++
 datafusion/proto/proto/datafusion.proto           |  1 +
 datafusion/proto/src/generated/pbjson.rs          |  3 ++
 datafusion/proto/src/generated/prost.rs           |  3 ++
 datafusion/proto/src/logical_plan/from_proto.rs   | 11 ++++-
 datafusion/proto/src/logical_plan/to_proto.rs     |  1 +
 datafusion/sqllogictest/test_files/array.slt      | 36 ++++++++++++++++
 docs/source/user-guide/expressions.md             |  1 +
 docs/source/user-guide/sql/scalar_functions.md    | 15 +++++++
 12 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 77c64128e1..473094c001 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -184,6 +184,8 @@ pub enum BuiltinScalarFunction {
     MakeArray,
     /// Flatten
     Flatten,
+    /// Range
+    Range,
 
     // struct functions
     /// struct
@@ -406,6 +408,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
             BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
             BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
+            BuiltinScalarFunction::Range => Volatility::Immutable,
             BuiltinScalarFunction::Cardinality => Volatility::Immutable,
             BuiltinScalarFunction::MakeArray => Volatility::Immutable,
             BuiltinScalarFunction::Ascii => Volatility::Immutable,
@@ -588,6 +591,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayToString => Ok(Utf8),
             BuiltinScalarFunction::ArrayIntersect => 
Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayUnion => 
Ok(input_expr_types[0].clone()),
+            BuiltinScalarFunction::Range => {
+                Ok(List(Arc::new(Field::new("item", Int64, true))))
+            }
             BuiltinScalarFunction::Cardinality => Ok(UInt64),
             BuiltinScalarFunction::MakeArray => match input_expr_types.len() {
                 0 => Ok(List(Arc::new(Field::new("item", Null, true)))),
@@ -902,6 +908,14 @@ impl BuiltinScalarFunction {
                 // 0 or more arguments of arbitrary type
                 Signature::one_of(vec![VariadicAny, Any(0)], self.volatility())
             }
+            BuiltinScalarFunction::Range => Signature::one_of(
+                vec![
+                    Exact(vec![Int64]),
+                    Exact(vec![Int64, Int64]),
+                    Exact(vec![Int64, Int64, Int64]),
+                ],
+                self.volatility(),
+            ),
             BuiltinScalarFunction::Struct => Signature::variadic(
                 struct_expressions::SUPPORTED_STRUCT_TYPES.to_vec(),
                 self.volatility(),
@@ -1533,6 +1547,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static 
[&'static str] {
         BuiltinScalarFunction::MakeArray => &["make_array", "make_list"],
         BuiltinScalarFunction::ArrayIntersect => &["array_intersect", 
"list_intersect"],
         BuiltinScalarFunction::OverLay => &["overlay"],
+        BuiltinScalarFunction::Range => &["range", "generate_series"],
 
         // struct functions
         BuiltinScalarFunction::Struct => &["struct"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 91674cc092..e70a4a90f7 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -737,6 +737,12 @@ scalar_expr!(
     "Returns an array of the elements in the intersection of array1 and 
array2."
 );
 
+nary_scalar_expr!(
+    Range,
+    gen_range,
+    "Returns a list of values in the range between start and stop with step."
+);
+
 // string functions
 scalar_expr!(Ascii, ascii, chr, "ASCII code value of the character");
 scalar_expr!(
diff --git a/datafusion/physical-expr/src/array_expressions.rs 
b/datafusion/physical-expr/src/array_expressions.rs
index 9b074ff0ee..6415bd5391 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -643,6 +643,58 @@ fn general_append_and_prepend(
     )?))
 }
 
+/// Generates an array of integers from start to stop with a given step.
+///
+/// This function takes 1 to 3 ArrayRefs as arguments, representing start, 
stop, and step values.
+/// It returns a `Result<ArrayRef>` representing the resulting ListArray after 
the operation.
+///
+/// # Arguments
+///
+/// * `args` - An array of 1 to 3 ArrayRefs representing start, stop, and 
step(step value can not be zero.) values.
+///    
+/// # Examples
+///
+/// gen_range(3) => [0, 1, 2]
+/// gen_range(1, 4) => [1, 2, 3]
+/// gen_range(1, 7, 2) => [1, 3, 5]
+pub fn gen_range(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let (start_array, stop_array, step_array) = match args.len() {
+        1 => (None, as_int64_array(&args[0])?, None),
+        2 => (
+            Some(as_int64_array(&args[0])?),
+            as_int64_array(&args[1])?,
+            None,
+        ),
+        3 => (
+            Some(as_int64_array(&args[0])?),
+            as_int64_array(&args[1])?,
+            Some(as_int64_array(&args[2])?),
+        ),
+        _ => return internal_err!("gen_range expects 1 to 3 arguments"),
+    };
+
+    let mut values = vec![];
+    let mut offsets = vec![0];
+    for (idx, stop) in stop_array.iter().enumerate() {
+        let stop = stop.unwrap_or(0);
+        let start = start_array.as_ref().map(|arr| 
arr.value(idx)).unwrap_or(0);
+        let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(1);
+        if step == 0 {
+            return exec_err!("step can't be 0 for function range(start [, 
stop, step]");
+        }
+        let value = (start..stop).step_by(step as usize);
+        values.extend(value);
+        offsets.push(values.len() as i32);
+    }
+    let arr = Arc::new(ListArray::try_new(
+        Arc::new(Field::new("item", DataType::Int64, true)),
+        OffsetBuffer::new(offsets.into()),
+        Arc::new(Int64Array::from(values)),
+        None,
+    )?);
+    Ok(arr)
+}
+
 /// Array_append SQL function
 pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
     let list_array = as_list_array(&args[0])?;
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index 7f8921e86c..799127c95c 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -401,6 +401,9 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| {
             make_scalar_function(array_expressions::array_intersect)(args)
         }),
+        BuiltinScalarFunction::Range => {
+            Arc::new(|args| 
make_scalar_function(array_expressions::gen_range)(args))
+        }
         BuiltinScalarFunction::Cardinality => {
             Arc::new(|args| 
make_scalar_function(array_expressions::cardinality)(args))
         }
diff --git a/datafusion/proto/proto/datafusion.proto 
b/datafusion/proto/proto/datafusion.proto
index d85678a76b..fa080518d5 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -637,6 +637,7 @@ enum ScalarFunction {
   ArrayIntersect = 119;
   ArrayUnion = 120;
   OverLay = 121;
+  Range = 122;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs 
b/datafusion/proto/src/generated/pbjson.rs
index 64db9137d6..08e7413102 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -20936,6 +20936,7 @@ impl serde::Serialize for ScalarFunction {
             Self::ArrayIntersect => "ArrayIntersect",
             Self::ArrayUnion => "ArrayUnion",
             Self::OverLay => "OverLay",
+            Self::Range => "Range",
         };
         serializer.serialize_str(variant)
     }
@@ -21069,6 +21070,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "ArrayIntersect",
             "ArrayUnion",
             "OverLay",
+            "Range",
         ];
 
         struct GeneratedVisitor;
@@ -21231,6 +21233,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "ArrayIntersect" => Ok(ScalarFunction::ArrayIntersect),
                     "ArrayUnion" => Ok(ScalarFunction::ArrayUnion),
                     "OverLay" => Ok(ScalarFunction::OverLay),
+                    "Range" => Ok(ScalarFunction::Range),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs 
b/datafusion/proto/src/generated/prost.rs
index 131ca11993..15606488b3 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2568,6 +2568,7 @@ pub enum ScalarFunction {
     ArrayIntersect = 119,
     ArrayUnion = 120,
     OverLay = 121,
+    Range = 122,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2698,6 +2699,7 @@ impl ScalarFunction {
             ScalarFunction::ArrayIntersect => "ArrayIntersect",
             ScalarFunction::ArrayUnion => "ArrayUnion",
             ScalarFunction::OverLay => "OverLay",
+            ScalarFunction::Range => "Range",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -2825,6 +2827,7 @@ impl ScalarFunction {
             "ArrayIntersect" => Some(Self::ArrayIntersect),
             "ArrayUnion" => Some(Self::ArrayUnion),
             "OverLay" => Some(Self::OverLay),
+            "Range" => Some(Self::Range),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs 
b/datafusion/proto/src/logical_plan/from_proto.rs
index 9ca7bb0e89..b3d6857003 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -49,8 +49,8 @@ use datafusion_expr::{
     concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, 
date_bin,
     date_part, date_trunc, decode, degrees, digest, encode, exp,
     expr::{self, InList, Sort, WindowFunction},
-    factorial, flatten, floor, from_unixtime, gcd, isnan, iszero, lcm, left, 
ln, log,
-    log10, log2,
+    factorial, flatten, floor, from_unixtime, gcd, gen_range, isnan, iszero, 
lcm, left,
+    ln, log, log10, log2,
     logical_plan::{PlanType, StringifiedPlan},
     lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, overlay, pi, 
power,
     radians, random, regexp_match, regexp_replace, repeat, replace, reverse, 
right,
@@ -488,6 +488,7 @@ impl From<&protobuf::ScalarFunction> for 
BuiltinScalarFunction {
             ScalarFunction::ArrayToString => Self::ArrayToString,
             ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
             ScalarFunction::ArrayUnion => Self::ArrayUnion,
+            ScalarFunction::Range => Self::Range,
             ScalarFunction::Cardinality => Self::Cardinality,
             ScalarFunction::Array => Self::MakeArray,
             ScalarFunction::NullIf => Self::NullIf,
@@ -1409,6 +1410,12 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
                 )),
+                ScalarFunction::Range => Ok(gen_range(
+                    args.to_owned()
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<Result<Vec<_>, _>>()?,
+                )),
                 ScalarFunction::Cardinality => {
                     Ok(cardinality(parse_expr(&args[0], registry)?))
                 }
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index 974d6c5aab..491b7f6664 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1495,6 +1495,7 @@ impl TryFrom<&BuiltinScalarFunction> for 
protobuf::ScalarFunction {
             BuiltinScalarFunction::ArrayToString => Self::ArrayToString,
             BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
             BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion,
+            BuiltinScalarFunction::Range => Self::Range,
             BuiltinScalarFunction::Cardinality => Self::Cardinality,
             BuiltinScalarFunction::MakeArray => Self::Array,
             BuiltinScalarFunction::NullIf => Self::NullIf,
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index 54741afdf8..92013f37d3 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -240,6 +240,13 @@ AS VALUES
   (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok', [8,9])
 ;
 
+statement ok
+CREATE TABLE arrays_range
+AS VALUES
+  (3, 10, 2),
+  (4, 13, 3)
+;
+
 statement ok
 CREATE TABLE arrays_with_repeating_elements
 AS VALUES
@@ -2662,6 +2669,32 @@ select list_has_all(make_array(1,2,3), 
make_array(4,5,6)),
 ----
 false true false true
 
+query ???
+select range(column2),
+       range(column1, column2),
+       range(column1, column2, column3)
+from arrays_range;
+----
+[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [3, 4, 5, 6, 7, 8, 9] [3, 5, 7, 9]
+[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 
7, 10]
+
+query ?????
+select range(5),
+       range(2, 5),
+       range(2, 10, 3),
+       range(1, 5, -1),
+       range(1, -5, 1)
+;
+----
+[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [1] []
+
+query ???
+select generate_series(5),
+       generate_series(2, 5),
+       generate_series(2, 10, 3)
+;
+----
+[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8]
 
 ### Array operators tests
 
@@ -2969,6 +3002,9 @@ drop table array_intersect_table_3D;
 statement ok
 drop table arrays_values_without_nulls;
 
+statement ok
+drop table arrays_range;
+
 statement ok
 drop table arrays_with_repeating_elements;
 
diff --git a/docs/source/user-guide/expressions.md 
b/docs/source/user-guide/expressions.md
index bec3ba9bb2..6b2ab46eb3 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -236,6 +236,7 @@ Unlike to some databases the math functions in Datafusion 
works the same way as
 | array_union(array1, array2)           | Returns an array of the elements in 
the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4], 
[5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]`                   |
 | cardinality(array)                    | Returns the total number of elements 
in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6`                        
                                                    |
 | make_array(value1, [value2 [, ...]])  | Returns an Arrow array using the 
specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]`                 
                                                        |
+| range(start [, stop, step])           | Returns an Arrow array between start 
and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]`                       
                                                    |
 | trim_array(array, n)                  | Deprecated                           
                                                                                
                                                    |
 
 ## Regular Expressions
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 099c903122..826782e1a0 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1560,6 +1560,7 @@ from_unixtime(expression)
 - [string_to_array](#string_to_array)
 - [string_to_list](#string_to_list)
 - [trim_array](#trim_array)
+- [range](#range)
 
 ### `array_append`
 
@@ -2481,6 +2482,20 @@ trim_array(array, n)
   Can be a constant, column, or function, and any combination of array 
operators.
 - **n**: Element to trim the array.
 
+### `range`
+
+Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 
3) -> [2, 5, 8]`
+
+The range start..end contains all values with start <= x < end. It is empty if 
start >= end.
+
+Step can not be 0 (then the range will be nonsense.).
+
+#### Arguments
+
+- **start**: start of the range
+- **end**: end of the range (not included)
+- **step**: increase by step (can not be 0)
+
 ## Struct Functions
 
 - [struct](#struct)

Reply via email to