This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new dd5683745e Minor: return NULL for range and generate_series (#10275)
dd5683745e is described below
commit dd5683745e7d527b01b804c8f4f1a0a53aa225e8
Author: Lordworms <[email protected]>
AuthorDate: Mon Apr 29 18:14:06 2024 -0500
Minor: return NULL for range and generate_series (#10275)
* return NULL for range and generate_series
* Update datafusion/sqllogictest/test_files/array.slt
Co-authored-by: Andrew Lamb <[email protected]>
* Update datafusion/sqllogictest/test_files/array.slt
Co-authored-by: Andrew Lamb <[email protected]>
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/functions-array/src/range.rs | 45 ++++++---
datafusion/functions-array/src/udf.rs | 140 ---------------------------
datafusion/sqllogictest/test_files/array.slt | 37 +++++--
3 files changed, 59 insertions(+), 163 deletions(-)
diff --git a/datafusion/functions-array/src/range.rs
b/datafusion/functions-array/src/range.rs
index 1c9e0c878e..150fe59602 100644
--- a/datafusion/functions-array/src/range.rs
+++ b/datafusion/functions-array/src/range.rs
@@ -17,14 +17,12 @@
//! [`ScalarUDFImpl`] definitions for range and gen_series functions.
+use crate::utils::make_scalar_function;
use arrow::array::{Array, ArrayRef, Int64Array, ListArray};
use arrow::datatypes::{DataType, Field};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
-use std::any::Any;
-
-use crate::utils::make_scalar_function;
use arrow_array::types::{Date32Type, IntervalMonthDayNanoType};
-use arrow_array::Date32Array;
+use arrow_array::{Date32Array, NullArray};
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
use arrow_schema::DataType::{Date32, Int64, Interval, List};
use arrow_schema::IntervalUnit::MonthDayNano;
use datafusion_common::cast::{as_date32_array, as_int64_array,
as_interval_mdn_array};
@@ -34,6 +32,7 @@ use datafusion_expr::Expr;
use datafusion_expr::{
ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility,
};
+use std::any::Any;
use std::sync::Arc;
make_udf_function!(
@@ -57,6 +56,7 @@ impl Range {
TypeSignature::Exact(vec![Int64, Int64]),
TypeSignature::Exact(vec![Int64, Int64, Int64]),
TypeSignature::Exact(vec![Date32, Date32,
Interval(MonthDayNano)]),
+ TypeSignature::Any(3),
],
Volatility::Immutable,
),
@@ -77,14 +77,21 @@ impl ScalarUDFImpl for Range {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- Ok(List(Arc::new(Field::new(
- "item",
- arg_types[0].clone(),
- true,
- ))))
+ if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
+ Ok(DataType::Null)
+ } else {
+ Ok(List(Arc::new(Field::new(
+ "item",
+ arg_types[0].clone(),
+ true,
+ ))))
+ }
}
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.iter().any(|arg| arg.data_type() == DataType::Null) {
+ return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
+ }
match args[0].data_type() {
Int64 => make_scalar_function(|args| gen_range_inner(args,
false))(args),
Date32 => make_scalar_function(|args| gen_range_date(args,
false))(args),
@@ -120,6 +127,7 @@ impl GenSeries {
TypeSignature::Exact(vec![Int64, Int64]),
TypeSignature::Exact(vec![Int64, Int64, Int64]),
TypeSignature::Exact(vec![Date32, Date32,
Interval(MonthDayNano)]),
+ TypeSignature::Any(3),
],
Volatility::Immutable,
),
@@ -140,14 +148,21 @@ impl ScalarUDFImpl for GenSeries {
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- Ok(List(Arc::new(Field::new(
- "item",
- arg_types[0].clone(),
- true,
- ))))
+ if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
+ Ok(DataType::Null)
+ } else {
+ Ok(List(Arc::new(Field::new(
+ "item",
+ arg_types[0].clone(),
+ true,
+ ))))
+ }
}
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ if args.iter().any(|arg| arg.data_type() == DataType::Null) {
+ return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
+ }
match args[0].data_type() {
Int64 => make_scalar_function(|args| gen_range_inner(args,
true))(args),
Date32 => make_scalar_function(|args| gen_range_date(args,
true))(args),
diff --git a/datafusion/functions-array/src/udf.rs
b/datafusion/functions-array/src/udf.rs
index 1462b3efad..c723fbb42c 100644
--- a/datafusion/functions-array/src/udf.rs
+++ b/datafusion/functions-array/src/udf.rs
@@ -166,146 +166,6 @@ impl ScalarUDFImpl for StringToArray {
}
}
-make_udf_function!(
- Range,
- range,
- start stop step,
- "create a list of values in the range between start and stop",
- range_udf
-);
-#[derive(Debug)]
-pub struct Range {
- signature: Signature,
- aliases: Vec<String>,
-}
-impl Range {
- pub fn new() -> Self {
- use DataType::*;
- Self {
- signature: Signature::one_of(
- vec![
- TypeSignature::Exact(vec![Int64]),
- TypeSignature::Exact(vec![Int64, Int64]),
- TypeSignature::Exact(vec![Int64, Int64, Int64]),
- TypeSignature::Exact(vec![Date32, Date32,
Interval(MonthDayNano)]),
- ],
- Volatility::Immutable,
- ),
- aliases: vec![String::from("range")],
- }
- }
-}
-impl ScalarUDFImpl for Range {
- fn as_any(&self) -> &dyn Any {
- self
- }
- fn name(&self) -> &str {
- "range"
- }
-
- fn signature(&self) -> &Signature {
- &self.signature
- }
-
- fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
- Ok(List(Arc::new(Field::new(
- "item",
- arg_types[0].clone(),
- true,
- ))))
- }
-
- fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
- match args[0].data_type() {
- arrow::datatypes::DataType::Int64 => {
- crate::kernels::gen_range(&args,
false).map(ColumnarValue::Array)
- }
- arrow::datatypes::DataType::Date32 => {
- crate::kernels::gen_range_date(&args,
false).map(ColumnarValue::Array)
- }
- _ => {
- exec_err!("unsupported type for range")
- }
- }
- }
-
- fn aliases(&self) -> &[String] {
- &self.aliases
- }
-}
-
-make_udf_function!(
- GenSeries,
- gen_series,
- start stop step,
- "create a list of values in the range between start and stop, include
upper bound",
- gen_series_udf
-);
-#[derive(Debug)]
-pub struct GenSeries {
- signature: Signature,
- aliases: Vec<String>,
-}
-impl GenSeries {
- pub fn new() -> Self {
- use DataType::*;
- Self {
- signature: Signature::one_of(
- vec![
- TypeSignature::Exact(vec![Int64]),
- TypeSignature::Exact(vec![Int64, Int64]),
- TypeSignature::Exact(vec![Int64, Int64, Int64]),
- TypeSignature::Exact(vec![Date32, Date32,
Interval(MonthDayNano)]),
- ],
- Volatility::Immutable,
- ),
- aliases: vec![String::from("generate_series")],
- }
- }
-}
-impl ScalarUDFImpl for GenSeries {
- fn as_any(&self) -> &dyn Any {
- self
- }
- fn name(&self) -> &str {
- "generate_series"
- }
-
- fn signature(&self) -> &Signature {
- &self.signature
- }
-
- fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
- use DataType::*;
- Ok(List(Arc::new(Field::new(
- "item",
- arg_types[0].clone(),
- true,
- ))))
- }
-
- fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
- match args[0].data_type() {
- arrow::datatypes::DataType::Int64 => {
- crate::kernels::gen_range(&args,
true).map(ColumnarValue::Array)
- }
- arrow::datatypes::DataType::Date32 => {
- crate::kernels::gen_range_date(&args,
true).map(ColumnarValue::Array)
- }
- _ => {
- exec_err!("unsupported type for range")
- }
- }
- }
-
- fn aliases(&self) -> &[String] {
- &self.aliases
- }
-}
-
make_udf_function!(
ArrayDims,
array_dims,
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index b33419ecd4..3b90187f07 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5634,15 +5634,26 @@ select range(NULL)
----
NULL
-## should throw error
-query error
+## should return NULL
+query ?
select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
+----
+NULL
-query error
+query ?
select range(DATE '1992-09-01', DATE '1993-03-01', NULL);
+----
+NULL
-query error
+query ?
select range(NULL, DATE '1993-03-01', INTERVAL '1' YEAR);
+----
+NULL
+
+query ?
+select range(NULL, NULL, NULL);
+----
+NULL
query ?
select range(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1' YEAR)
@@ -5668,16 +5679,26 @@ select generate_series(5),
----
[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10,
7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01,
1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28,
1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22,
1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16,
1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10,
1993-01-09, 1993-01-08, 1993-01-07, 1993-01-0 [...]
-## should throw error
-query error
+## should return NULL
+query ?
select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
+----
+NULL
-query error
+query ?
select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL);
+----
+NULL
-query error
+query ?
select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR);
+----
+NULL
+query ?
+select generate_series(NULL, NULL, NULL);
+----
+NULL
query ?
select generate_series(DATE '1989-04-01', DATE '1993-03-01', INTERVAL '-1'
YEAR)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]