This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 31e54f00c7 implement range/generate_series func (#8140)
31e54f00c7 is described below
commit 31e54f00c71da3e4441cf59a5deee7fc1d2727f2
Author: 谭巍 <[email protected]>
AuthorDate: Wed Nov 15 00:57:08 2023 +0800
implement range/generate_series func (#8140)
* implement range/generate_series func
Signed-off-by: veeupup <[email protected]>
* explain details for range func
Signed-off-by: veeupup <[email protected]>
* fix ci
* fix doc fmt
* fix comments
* regenerate proto
* add comment for gen_range usage
Signed-off-by: veeupup <[email protected]>
---------
Signed-off-by: veeupup <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/expr/src/built_in_function.rs | 15 +++++++
datafusion/expr/src/expr_fn.rs | 6 +++
datafusion/physical-expr/src/array_expressions.rs | 52 +++++++++++++++++++++++
datafusion/physical-expr/src/functions.rs | 3 ++
datafusion/proto/proto/datafusion.proto | 1 +
datafusion/proto/src/generated/pbjson.rs | 3 ++
datafusion/proto/src/generated/prost.rs | 3 ++
datafusion/proto/src/logical_plan/from_proto.rs | 11 ++++-
datafusion/proto/src/logical_plan/to_proto.rs | 1 +
datafusion/sqllogictest/test_files/array.slt | 36 ++++++++++++++++
docs/source/user-guide/expressions.md | 1 +
docs/source/user-guide/sql/scalar_functions.md | 15 +++++++
12 files changed, 145 insertions(+), 2 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index 77c64128e1..473094c001 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -184,6 +184,8 @@ pub enum BuiltinScalarFunction {
MakeArray,
/// Flatten
Flatten,
+ /// Range
+ Range,
// struct functions
/// struct
@@ -406,6 +408,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
+ BuiltinScalarFunction::Range => Volatility::Immutable,
BuiltinScalarFunction::Cardinality => Volatility::Immutable,
BuiltinScalarFunction::MakeArray => Volatility::Immutable,
BuiltinScalarFunction::Ascii => Volatility::Immutable,
@@ -588,6 +591,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayToString => Ok(Utf8),
BuiltinScalarFunction::ArrayIntersect =>
Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayUnion =>
Ok(input_expr_types[0].clone()),
+ BuiltinScalarFunction::Range => {
+ Ok(List(Arc::new(Field::new("item", Int64, true))))
+ }
BuiltinScalarFunction::Cardinality => Ok(UInt64),
BuiltinScalarFunction::MakeArray => match input_expr_types.len() {
0 => Ok(List(Arc::new(Field::new("item", Null, true)))),
@@ -902,6 +908,14 @@ impl BuiltinScalarFunction {
// 0 or more arguments of arbitrary type
Signature::one_of(vec![VariadicAny, Any(0)], self.volatility())
}
+ BuiltinScalarFunction::Range => Signature::one_of(
+ vec![
+ Exact(vec![Int64]),
+ Exact(vec![Int64, Int64]),
+ Exact(vec![Int64, Int64, Int64]),
+ ],
+ self.volatility(),
+ ),
BuiltinScalarFunction::Struct => Signature::variadic(
struct_expressions::SUPPORTED_STRUCT_TYPES.to_vec(),
self.volatility(),
@@ -1533,6 +1547,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static
[&'static str] {
BuiltinScalarFunction::MakeArray => &["make_array", "make_list"],
BuiltinScalarFunction::ArrayIntersect => &["array_intersect",
"list_intersect"],
BuiltinScalarFunction::OverLay => &["overlay"],
+ BuiltinScalarFunction::Range => &["range", "generate_series"],
// struct functions
BuiltinScalarFunction::Struct => &["struct"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 91674cc092..e70a4a90f7 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -737,6 +737,12 @@ scalar_expr!(
"Returns an array of the elements in the intersection of array1 and
array2."
);
+nary_scalar_expr!(
+ Range,
+ gen_range,
+ "Returns a list of values in the range between start and stop with step."
+);
+
// string functions
scalar_expr!(Ascii, ascii, chr, "ASCII code value of the character");
scalar_expr!(
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 9b074ff0ee..6415bd5391 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -643,6 +643,58 @@ fn general_append_and_prepend(
)?))
}
+/// Generates an array of integers from start to stop with a given step.
+///
+/// This function takes 1 to 3 ArrayRefs as arguments, representing start,
stop, and step values.
+/// It returns a `Result<ArrayRef>` representing the resulting ListArray after
the operation.
+///
+/// # Arguments
+///
+/// * `args` - An array of 1 to 3 ArrayRefs representing start, stop, and
step(step value can not be zero.) values.
+///
+/// # Examples
+///
+/// gen_range(3) => [0, 1, 2]
+/// gen_range(1, 4) => [1, 2, 3]
+/// gen_range(1, 7, 2) => [1, 3, 5]
+pub fn gen_range(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let (start_array, stop_array, step_array) = match args.len() {
+ 1 => (None, as_int64_array(&args[0])?, None),
+ 2 => (
+ Some(as_int64_array(&args[0])?),
+ as_int64_array(&args[1])?,
+ None,
+ ),
+ 3 => (
+ Some(as_int64_array(&args[0])?),
+ as_int64_array(&args[1])?,
+ Some(as_int64_array(&args[2])?),
+ ),
+ _ => return internal_err!("gen_range expects 1 to 3 arguments"),
+ };
+
+ let mut values = vec![];
+ let mut offsets = vec![0];
+ for (idx, stop) in stop_array.iter().enumerate() {
+ let stop = stop.unwrap_or(0);
+ let start = start_array.as_ref().map(|arr|
arr.value(idx)).unwrap_or(0);
+ let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(1);
+ if step == 0 {
+ return exec_err!("step can't be 0 for function range(start [,
stop, step]");
+ }
+ let value = (start..stop).step_by(step as usize);
+ values.extend(value);
+ offsets.push(values.len() as i32);
+ }
+ let arr = Arc::new(ListArray::try_new(
+ Arc::new(Field::new("item", DataType::Int64, true)),
+ OffsetBuffer::new(offsets.into()),
+ Arc::new(Int64Array::from(values)),
+ None,
+ )?);
+ Ok(arr)
+}
+
/// Array_append SQL function
pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
let list_array = as_list_array(&args[0])?;
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 7f8921e86c..799127c95c 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -401,6 +401,9 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| {
make_scalar_function(array_expressions::array_intersect)(args)
}),
+ BuiltinScalarFunction::Range => {
+ Arc::new(|args|
make_scalar_function(array_expressions::gen_range)(args))
+ }
BuiltinScalarFunction::Cardinality => {
Arc::new(|args|
make_scalar_function(array_expressions::cardinality)(args))
}
diff --git a/datafusion/proto/proto/datafusion.proto
b/datafusion/proto/proto/datafusion.proto
index d85678a76b..fa080518d5 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -637,6 +637,7 @@ enum ScalarFunction {
ArrayIntersect = 119;
ArrayUnion = 120;
OverLay = 121;
+ Range = 122;
}
message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs
b/datafusion/proto/src/generated/pbjson.rs
index 64db9137d6..08e7413102 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -20936,6 +20936,7 @@ impl serde::Serialize for ScalarFunction {
Self::ArrayIntersect => "ArrayIntersect",
Self::ArrayUnion => "ArrayUnion",
Self::OverLay => "OverLay",
+ Self::Range => "Range",
};
serializer.serialize_str(variant)
}
@@ -21069,6 +21070,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArrayIntersect",
"ArrayUnion",
"OverLay",
+ "Range",
];
struct GeneratedVisitor;
@@ -21231,6 +21233,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"ArrayIntersect" => Ok(ScalarFunction::ArrayIntersect),
"ArrayUnion" => Ok(ScalarFunction::ArrayUnion),
"OverLay" => Ok(ScalarFunction::OverLay),
+ "Range" => Ok(ScalarFunction::Range),
_ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
}
}
diff --git a/datafusion/proto/src/generated/prost.rs
b/datafusion/proto/src/generated/prost.rs
index 131ca11993..15606488b3 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2568,6 +2568,7 @@ pub enum ScalarFunction {
ArrayIntersect = 119,
ArrayUnion = 120,
OverLay = 121,
+ Range = 122,
}
impl ScalarFunction {
/// String value of the enum field names used in the ProtoBuf definition.
@@ -2698,6 +2699,7 @@ impl ScalarFunction {
ScalarFunction::ArrayIntersect => "ArrayIntersect",
ScalarFunction::ArrayUnion => "ArrayUnion",
ScalarFunction::OverLay => "OverLay",
+ ScalarFunction::Range => "Range",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
@@ -2825,6 +2827,7 @@ impl ScalarFunction {
"ArrayIntersect" => Some(Self::ArrayIntersect),
"ArrayUnion" => Some(Self::ArrayUnion),
"OverLay" => Some(Self::OverLay),
+ "Range" => Some(Self::Range),
_ => None,
}
}
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs
b/datafusion/proto/src/logical_plan/from_proto.rs
index 9ca7bb0e89..b3d6857003 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -49,8 +49,8 @@ use datafusion_expr::{
concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time,
date_bin,
date_part, date_trunc, decode, degrees, digest, encode, exp,
expr::{self, InList, Sort, WindowFunction},
- factorial, flatten, floor, from_unixtime, gcd, isnan, iszero, lcm, left,
ln, log,
- log10, log2,
+ factorial, flatten, floor, from_unixtime, gcd, gen_range, isnan, iszero,
lcm, left,
+ ln, log, log10, log2,
logical_plan::{PlanType, StringifiedPlan},
lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, overlay, pi,
power,
radians, random, regexp_match, regexp_replace, repeat, replace, reverse,
right,
@@ -488,6 +488,7 @@ impl From<&protobuf::ScalarFunction> for
BuiltinScalarFunction {
ScalarFunction::ArrayToString => Self::ArrayToString,
ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
ScalarFunction::ArrayUnion => Self::ArrayUnion,
+ ScalarFunction::Range => Self::Range,
ScalarFunction::Cardinality => Self::Cardinality,
ScalarFunction::Array => Self::MakeArray,
ScalarFunction::NullIf => Self::NullIf,
@@ -1409,6 +1410,12 @@ pub fn parse_expr(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
+ ScalarFunction::Range => Ok(gen_range(
+ args.to_owned()
+ .iter()
+ .map(|expr| parse_expr(expr, registry))
+ .collect::<Result<Vec<_>, _>>()?,
+ )),
ScalarFunction::Cardinality => {
Ok(cardinality(parse_expr(&args[0], registry)?))
}
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs
b/datafusion/proto/src/logical_plan/to_proto.rs
index 974d6c5aab..491b7f6664 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1495,6 +1495,7 @@ impl TryFrom<&BuiltinScalarFunction> for
protobuf::ScalarFunction {
BuiltinScalarFunction::ArrayToString => Self::ArrayToString,
BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion,
+ BuiltinScalarFunction::Range => Self::Range,
BuiltinScalarFunction::Cardinality => Self::Cardinality,
BuiltinScalarFunction::MakeArray => Self::Array,
BuiltinScalarFunction::NullIf => Self::NullIf,
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index 54741afdf8..92013f37d3 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -240,6 +240,13 @@ AS VALUES
(make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok', [8,9])
;
+statement ok
+CREATE TABLE arrays_range
+AS VALUES
+ (3, 10, 2),
+ (4, 13, 3)
+;
+
statement ok
CREATE TABLE arrays_with_repeating_elements
AS VALUES
@@ -2662,6 +2669,32 @@ select list_has_all(make_array(1,2,3),
make_array(4,5,6)),
----
false true false true
+query ???
+select range(column2),
+ range(column1, column2),
+ range(column1, column2, column3)
+from arrays_range;
+----
+[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [3, 4, 5, 6, 7, 8, 9] [3, 5, 7, 9]
+[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [4, 5, 6, 7, 8, 9, 10, 11, 12] [4,
7, 10]
+
+query ?????
+select range(5),
+ range(2, 5),
+ range(2, 10, 3),
+ range(1, 5, -1),
+ range(1, -5, 1)
+;
+----
+[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [1] []
+
+query ???
+select generate_series(5),
+ generate_series(2, 5),
+ generate_series(2, 10, 3)
+;
+----
+[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8]
### Array operators tests
@@ -2969,6 +3002,9 @@ drop table array_intersect_table_3D;
statement ok
drop table arrays_values_without_nulls;
+statement ok
+drop table arrays_range;
+
statement ok
drop table arrays_with_repeating_elements;
diff --git a/docs/source/user-guide/expressions.md
b/docs/source/user-guide/expressions.md
index bec3ba9bb2..6b2ab46eb3 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -236,6 +236,7 @@ Unlike to some databases the math functions in Datafusion
works the same way as
| array_union(array1, array2) | Returns an array of the elements in
the union of array1 and array2 without duplicates. `array_union([1, 2, 3, 4],
[5, 6, 3, 4]) -> [1, 2, 3, 4, 5, 6]` |
| cardinality(array) | Returns the total number of elements
in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6`
|
| make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the
specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]`
|
+| range(start [, stop, step]) | Returns an Arrow array between start
and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]`
|
| trim_array(array, n) | Deprecated
|
## Regular Expressions
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 099c903122..826782e1a0 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1560,6 +1560,7 @@ from_unixtime(expression)
- [string_to_array](#string_to_array)
- [string_to_list](#string_to_list)
- [trim_array](#trim_array)
+- [range](#range)
### `array_append`
@@ -2481,6 +2482,20 @@ trim_array(array, n)
Can be a constant, column, or function, and any combination of array
operators.
- **n**: Element to trim the array.
+### `range`
+
+Returns an Arrow array between start and stop with step. `SELECT range(2, 10,
3) -> [2, 5, 8]`
+
+The range start..end contains all values with start <= x < end. It is empty if
start >= end.
+
+Step can not be 0 (then the range will be nonsense.).
+
+#### Arguments
+
+- **start**: start of the range
+- **end**: end of the range (not included)
+- **step**: increase by step (can not be 0)
+
## Struct Functions
- [struct](#struct)