This is an automated email from the ASF dual-hosted git repository.

jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new bf40113e61 fix: incorrect null handling in `range` and 
`generate_series` (#9574)
bf40113e61 is described below

commit bf40113e61d247e4366382f1b8b2aa442003c89f
Author: Jonah Gao <[email protected]>
AuthorDate: Tue Mar 12 18:01:23 2024 +0800

    fix: incorrect null handling in `range` and `generate_series` (#9574)
    
    * fix: incorrect null handling in `range` and `generate_series`
    
    * add tests
    
    * doc
---
 datafusion/functions-array/src/kernels.rs    | 82 ++++++++++++++++++----------
 datafusion/sqllogictest/test_files/array.slt | 66 ++++++++++++++++++++++
 2 files changed, 120 insertions(+), 28 deletions(-)

diff --git a/datafusion/functions-array/src/kernels.rs 
b/datafusion/functions-array/src/kernels.rs
index e5231aa594..4ac32c02af 100644
--- a/datafusion/functions-array/src/kernels.rs
+++ b/datafusion/functions-array/src/kernels.rs
@@ -23,13 +23,12 @@ use arrow::array::{
     LargeStringArray, ListArray, ListBuilder, OffsetSizeTrait, StringArray,
     StringBuilder, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
 };
-use arrow::buffer::OffsetBuffer;
 use arrow::compute;
-use arrow::datatypes::Field;
-use arrow::datatypes::UInt64Type;
-use arrow::datatypes::{DataType, Date32Type, IntervalMonthDayNanoType};
+use arrow::datatypes::{
+    DataType, Date32Type, Field, IntervalMonthDayNanoType, UInt64Type,
+};
 use arrow::row::{RowConverter, SortField};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
 use arrow_schema::FieldRef;
 use arrow_schema::SortOptions;
 
@@ -394,39 +393,66 @@ pub(super) fn gen_range(args: &[ArrayRef], include_upper: 
bool) -> Result<ArrayR
 
     let mut values = vec![];
     let mut offsets = vec![0];
+    let mut valid = BooleanBufferBuilder::new(stop_array.len());
     for (idx, stop) in stop_array.iter().enumerate() {
-        let stop = stop.unwrap_or(0);
-        let start = start_array.as_ref().map(|arr| 
arr.value(idx)).unwrap_or(0);
-        let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(1);
-        if step == 0 {
-            return exec_err!(
-                "step can't be 0 for function {}(start [, stop, step])",
-                if include_upper {
-                    "generate_series"
-                } else {
-                    "range"
-                }
-            );
-        }
-        // Below, we utilize `usize` to represent steps.
-        // On 32-bit targets, the absolute value of `i64` may fail to fit into 
`usize`.
-        let step_abs = usize::try_from(step.unsigned_abs()).map_err(|_| {
-            not_impl_datafusion_err!("step {} can't fit into usize", step)
-        })?;
-        values.extend(
-            gen_range_iter(start, stop, step < 0, 
include_upper).step_by(step_abs),
-        );
-        offsets.push(values.len() as i32);
+        match retrieve_range_args(start_array, stop, step_array, idx) {
+            Some((_, _, 0)) => {
+                return exec_err!(
+                    "step can't be 0 for function {}(start [, stop, step])",
+                    if include_upper {
+                        "generate_series"
+                    } else {
+                        "range"
+                    }
+                );
+            }
+            Some((start, stop, step)) => {
+                // Below, we utilize `usize` to represent steps.
+                // On 32-bit targets, the absolute value of `i64` may fail to 
fit into `usize`.
+                let step_abs = 
usize::try_from(step.unsigned_abs()).map_err(|_| {
+                    not_impl_datafusion_err!("step {} can't fit into usize", 
step)
+                })?;
+                values.extend(
+                    gen_range_iter(start, stop, step < 0, include_upper)
+                        .step_by(step_abs),
+                );
+                offsets.push(values.len() as i32);
+                valid.append(true);
+            }
+            // If any of the arguments is NULL, append a NULL value to the 
result.
+            None => {
+                offsets.push(values.len() as i32);
+                valid.append(false);
+            }
+        };
     }
     let arr = Arc::new(ListArray::try_new(
         Arc::new(Field::new("item", DataType::Int64, true)),
         OffsetBuffer::new(offsets.into()),
         Arc::new(Int64Array::from(values)),
-        None,
+        Some(NullBuffer::new(valid.finish())),
     )?);
     Ok(arr)
 }
 
+/// Get the (start, stop, step) args for the range and generate_series 
function.
+/// If any of the arguments is NULL, returns None.
+fn retrieve_range_args(
+    start_array: Option<&Int64Array>,
+    stop: Option<i64>,
+    step_array: Option<&Int64Array>,
+    idx: usize,
+) -> Option<(i64, i64, i64)> {
+    // Default start value is 0 if not provided
+    let start =
+        start_array.map_or(Some(0), |arr| arr.is_valid(idx).then(|| 
arr.value(idx)))?;
+    let stop = stop?;
+    // Default step value is 1 if not provided
+    let step =
+        step_array.map_or(Some(1), |arr| arr.is_valid(idx).then(|| 
arr.value(idx)))?;
+    Some((start, stop, step))
+}
+
 /// Returns an iterator of i64 values from start to stop
 fn gen_range_iter(
     start: i64,
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index 434fe8c959..b729e5c10f 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5598,6 +5598,39 @@ select
 ----
 [] [9223372036854775807] [] [9223372036854775806] [] [-9223372036854775807] [] 
[-9223372036854775808]
 
+# Test range(start, stop, step) with NULL values
+query ?
+select range(start, stop, step) from
+  (values (1), (NULL)) as start_values(start),
+  (values (10), (NULL)) as stop_values(stop),
+  (values (3), (NULL)) as step_values(step)
+where start is null or stop is null or step is null
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+# Test range(start, stop) with NULL values
+query ?
+select range(start, stop) from
+  (values (1), (NULL)) as start_values(start),
+  (values (10), (NULL)) as stop_values(stop)
+where start is null or stop is null
+----
+NULL
+NULL
+NULL
+
+# Test range(stop) with NULL value
+query ?
+select range(NULL)
+----
+NULL
+
 ## should throw error
 query error
 select range(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
@@ -5678,6 +5711,39 @@ select
 ----
 [9223372036854775807] [9223372036854775807] [-9223372036854775808] 
[-9223372036854775808]
 
+# Test generate_series(start, stop, step) with NULL values
+query ?
+select generate_series(start, stop, step) from
+  (values (1), (NULL)) as start_values(start),
+  (values (10), (NULL)) as stop_values(stop),
+  (values (3), (NULL)) as step_values(step)
+where start is null or stop is null or step is null
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+# Test generate_series(start, stop) with NULL values
+query ?
+select generate_series(start, stop) from
+  (values (1), (NULL)) as start_values(start),
+  (values (10), (NULL)) as stop_values(stop)
+where start is null or stop is null
+----
+NULL
+NULL
+NULL
+
+# Test generate_series(stop) with NULL value
+query ?
+select generate_series(NULL)
+----
+NULL
+
 
 ## array_except
 

Reply via email to