(datafusion) 01/01: chore: change width_bucket buckets parameter from i32 to i64

comphead Thu, 12 Feb 2026 14:18:23 -0800

This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch width-bucket-i64
in repository https://gitbox.apache.org/repos/asf/datafusion.git


commit ef9d22ff3c2dd7b45e11b80b756c66662b4fca9b
Author: comphead <[email protected]>
AuthorDate: Thu Feb 12 14:16:58 2026 -0800

    chore: change width_bucket buckets parameter from i32 to i64
    
    Co-Authored-By: Claude Opus 4.5 <[email protected]>
---
 datafusion/spark/src/function/math/width_bucket.rs | 100 +++++++++++----------
 1 file changed, 51 insertions(+), 49 deletions(-)

diff --git a/datafusion/spark/src/function/math/width_bucket.rs 
b/datafusion/spark/src/function/math/width_bucket.rs
index 8d748439ad..bd68c37edb 100644
--- a/datafusion/spark/src/function/math/width_bucket.rs
+++ b/datafusion/spark/src/function/math/width_bucket.rs
@@ -26,11 +26,11 @@ use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{Duration, Float64, Int32, Interval};
 use arrow::datatypes::IntervalUnit::{MonthDayNano, YearMonth};
 use datafusion_common::cast::{
-    as_duration_microsecond_array, as_float64_array, as_int32_array,
+    as_duration_microsecond_array, as_float64_array, as_int64_array,
     as_interval_mdn_array, as_interval_ym_array,
 };
 use datafusion_common::types::{
-    NativeType, logical_duration_microsecond, logical_float64, logical_int32,
+    NativeType, logical_duration_microsecond, logical_float64, logical_int64,
     logical_interval_mdn, logical_interval_year_month,
 };
 use datafusion_common::{Result, exec_err, internal_err};
@@ -41,7 +41,7 @@ use datafusion_expr::{
 };
 use datafusion_functions::utils::make_scalar_function;
 
-use arrow::array::{Int32Array, Int32Builder};
+use arrow::array::{Int32Array, Int32Builder, Int64Array};
 use arrow::datatypes::TimeUnit::Microsecond;
 use datafusion_expr::Coercion;
 use datafusion_expr::Volatility::Immutable;
@@ -75,9 +75,9 @@ impl SparkWidthBucket {
         let interval_mdn =
             
Coercion::new_exact(TypeSignatureClass::Native(logical_interval_mdn()));
         let bucket = Coercion::new_implicit(
-            TypeSignatureClass::Native(logical_int32()),
+            TypeSignatureClass::Native(logical_int64()),
             vec![TypeSignatureClass::Integer],
-            NativeType::Int32,
+            NativeType::Int64,
         );
         let type_signature = Signature::one_of(
             vec![
@@ -160,28 +160,28 @@ fn width_bucket_kern(args: &[ArrayRef]) -> 
Result<ArrayRef> {
             let v = as_float64_array(v)?;
             let min = as_float64_array(minv)?;
             let max = as_float64_array(maxv)?;
-            let n_bucket = as_int32_array(nb)?;
+            let n_bucket = as_int64_array(nb)?;
             Ok(Arc::new(width_bucket_float64(v, min, max, n_bucket)))
         }
         Duration(Microsecond) => {
             let v = as_duration_microsecond_array(v)?;
             let min = as_duration_microsecond_array(minv)?;
             let max = as_duration_microsecond_array(maxv)?;
-            let n_bucket = as_int32_array(nb)?;
+            let n_bucket = as_int64_array(nb)?;
             Ok(Arc::new(width_bucket_i64_as_float(v, min, max, n_bucket)))
         }
         Interval(YearMonth) => {
             let v = as_interval_ym_array(v)?;
             let min = as_interval_ym_array(minv)?;
             let max = as_interval_ym_array(maxv)?;
-            let n_bucket = as_int32_array(nb)?;
+            let n_bucket = as_int64_array(nb)?;
             Ok(Arc::new(width_bucket_i32_as_float(v, min, max, n_bucket)))
         }
         Interval(MonthDayNano) => {
             let v = as_interval_mdn_array(v)?;
             let min = as_interval_mdn_array(minv)?;
             let max = as_interval_mdn_array(maxv)?;
-            let n_bucket = as_int32_array(nb)?;
+            let n_bucket = as_int64_array(nb)?;
             Ok(Arc::new(width_bucket_interval_mdn_exact(
                 v, min, max, n_bucket,
             )))
@@ -203,7 +203,7 @@ macro_rules! width_bucket_kernel_impl {
             v: &$arr_ty,
             min: &$arr_ty,
             max: &$arr_ty,
-            n_bucket: &Int32Array,
+            n_bucket: &Int64Array,
         ) -> Int32Array {
             let len = v.len();
             let mut b = Int32Builder::with_capacity(len);
@@ -223,6 +223,7 @@ macro_rules! width_bucket_kernel_impl {
                     b.append_null();
                     continue;
                 }
+                let next_bucket = (buckets + 1) as i32;
                 if $check_nan {
                     if !x.is_finite() || !l.is_finite() || !h.is_finite() {
                         b.append_null();
@@ -249,7 +250,7 @@ macro_rules! width_bucket_kernel_impl {
                         continue;
                     }
                     if x >= h {
-                        b.append_value(buckets + 1);
+                        b.append_value(next_bucket);
                         continue;
                     }
                 } else {
@@ -258,7 +259,7 @@ macro_rules! width_bucket_kernel_impl {
                         continue;
                     }
                     if x <= h {
-                        b.append_value(buckets + 1);
+                        b.append_value(next_bucket);
                         continue;
                     }
                 }
@@ -272,8 +273,8 @@ macro_rules! width_bucket_kernel_impl {
                 if bucket < 1 {
                     bucket = 1;
                 }
-                if bucket > buckets + 1 {
-                    bucket = buckets + 1;
+                if bucket > next_bucket {
+                    bucket = next_bucket;
                 }
 
                 b.append_value(bucket);
@@ -309,7 +310,7 @@ pub(crate) fn width_bucket_interval_mdn_exact(
     v: &IntervalMonthDayNanoArray,
     lo: &IntervalMonthDayNanoArray,
     hi: &IntervalMonthDayNanoArray,
-    n: &Int32Array,
+    n: &Int64Array,
 ) -> Int32Array {
     let len = v.len();
     let mut b = Int32Builder::with_capacity(len);
@@ -324,6 +325,7 @@ pub(crate) fn width_bucket_interval_mdn_exact(
             b.append_null();
             continue;
         }
+        let next_bucket = (buckets + 1) as i32;
 
         let x = v.value(i);
         let l = lo.value(i);
@@ -349,7 +351,7 @@ pub(crate) fn width_bucket_interval_mdn_exact(
                     continue;
                 }
                 if x_m >= h_m {
-                    b.append_value(buckets + 1);
+                    b.append_value(next_bucket);
                     continue;
                 }
             } else {
@@ -358,7 +360,7 @@ pub(crate) fn width_bucket_interval_mdn_exact(
                     continue;
                 }
                 if x_m <= h_m {
-                    b.append_value(buckets + 1);
+                    b.append_value(next_bucket);
                     continue;
                 }
             }
@@ -373,8 +375,8 @@ pub(crate) fn width_bucket_interval_mdn_exact(
             if bucket < 1 {
                 bucket = 1;
             }
-            if bucket > buckets + 1 {
-                bucket = buckets + 1;
+            if bucket > next_bucket {
+                bucket = next_bucket;
             }
             b.append_value(bucket);
             continue;
@@ -400,7 +402,7 @@ pub(crate) fn width_bucket_interval_mdn_exact(
                     continue;
                 }
                 if x_f >= h_f {
-                    b.append_value(buckets + 1);
+                    b.append_value(next_bucket);
                     continue;
                 }
             } else {
@@ -409,7 +411,7 @@ pub(crate) fn width_bucket_interval_mdn_exact(
                     continue;
                 }
                 if x_f <= h_f {
-                    b.append_value(buckets + 1);
+                    b.append_value(next_bucket);
                     continue;
                 }
             }
@@ -424,8 +426,8 @@ pub(crate) fn width_bucket_interval_mdn_exact(
             if bucket < 1 {
                 bucket = 1;
             }
-            if bucket > buckets + 1 {
-                bucket = buckets + 1;
+            if bucket > next_bucket {
+                bucket = next_bucket;
             }
             b.append_value(bucket);
             continue;
@@ -443,15 +445,15 @@ mod tests {
     use std::sync::Arc;
 
     use arrow::array::{
-        ArrayRef, DurationMicrosecondArray, Float64Array, Int32Array,
+        ArrayRef, DurationMicrosecondArray, Float64Array, Int32Array, 
Int64Array,
         IntervalYearMonthArray,
     };
     use arrow::datatypes::IntervalMonthDayNano;
 
     // --- Helpers 
-------------------------------------------------------------
 
-    fn i32_array_all(len: usize, val: i32) -> Arc<Int32Array> {
-        Arc::new(Int32Array::from(vec![val; len]))
+    fn i64_array_all(len: usize, val: i64) -> Arc<Int64Array> {
+        Arc::new(Int64Array::from(vec![val; len]))
     }
 
     fn f64_array(vals: &[f64]) -> Arc<Float64Array> {
@@ -489,7 +491,7 @@ mod tests {
         let v = f64_array(&[0.5, 1.0, 9.9, -1.0, 10.0]);
         let lo = f64_array(&[0.0, 0.0, 0.0, 0.0, 0.0]);
         let hi = f64_array(&[10.0, 10.0, 10.0, 10.0, 10.0]);
-        let n = i32_array_all(5, 10);
+        let n = i64_array_all(5, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -501,7 +503,7 @@ mod tests {
         let v = f64_array(&[9.9, 10.0, 0.0, -0.1, 10.1]);
         let lo = f64_array(&[10.0; 5]);
         let hi = f64_array(&[0.0; 5]);
-        let n = i32_array_all(5, 10);
+        let n = i64_array_all(5, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -513,7 +515,7 @@ mod tests {
         let v = f64_array(&[0.0, 9.999999999, 10.0]);
         let lo = f64_array(&[0.0; 3]);
         let hi = f64_array(&[10.0; 3]);
-        let n = i32_array_all(3, 10);
+        let n = i64_array_all(3, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -525,7 +527,7 @@ mod tests {
         let v = f64_array(&[10.0, 0.0, -0.000001]);
         let lo = f64_array(&[10.0; 3]);
         let hi = f64_array(&[0.0; 3]);
-        let n = i32_array_all(3, 10);
+        let n = i64_array_all(3, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -537,7 +539,7 @@ mod tests {
         let v = f64_array(&[1.0, 5.0, 9.0]);
         let lo = f64_array(&[0.0, 0.0, 0.0]);
         let hi = f64_array(&[10.0, 10.0, 10.0]);
-        let n = Arc::new(Int32Array::from(vec![0, -1, 10]));
+        let n = Arc::new(Int64Array::from(vec![0, -1, 10]));
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
         assert!(out.is_null(0));
@@ -547,7 +549,7 @@ mod tests {
         let v = f64_array(&[1.0]);
         let lo = f64_array(&[5.0]);
         let hi = f64_array(&[5.0]);
-        let n = i32_array_all(1, 10);
+        let n = i64_array_all(1, 10);
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
         assert!(out.is_null(0));
@@ -555,7 +557,7 @@ mod tests {
         let v = f64_array_opt(&[Some(f64::NAN)]);
         let lo = f64_array(&[0.0]);
         let hi = f64_array(&[10.0]);
-        let n = i32_array_all(1, 10);
+        let n = i64_array_all(1, 10);
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
         assert!(out.is_null(0));
@@ -566,7 +568,7 @@ mod tests {
         let v = f64_array_opt(&[None, Some(1.0), Some(2.0), Some(3.0)]);
         let lo = f64_array(&[0.0; 4]);
         let hi = f64_array(&[10.0; 4]);
-        let n = i32_array_all(4, 10);
+        let n = i64_array_all(4, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -578,7 +580,7 @@ mod tests {
         let v = f64_array(&[1.0]);
         let lo = f64_array_opt(&[None]);
         let hi = f64_array(&[10.0]);
-        let n = i32_array_all(1, 10);
+        let n = i64_array_all(1, 10);
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
         assert!(out.is_null(0));
@@ -591,7 +593,7 @@ mod tests {
         let v = dur_us_array(&[1_000_000, 0, -1]);
         let lo = dur_us_array(&[0, 0, 0]);
         let hi = dur_us_array(&[2_000_000, 2_000_000, 2_000_000]);
-        let n = i32_array_all(3, 2);
+        let n = i64_array_all(3, 2);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -603,7 +605,7 @@ mod tests {
         let v = dur_us_array(&[0]);
         let lo = dur_us_array(&[1]);
         let hi = dur_us_array(&[1]);
-        let n = i32_array_all(1, 10);
+        let n = i64_array_all(1, 10);
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         assert!(downcast_i32(&out).is_null(0));
     }
@@ -615,7 +617,7 @@ mod tests {
         let v = ym_array(&[0, 5, 11, 12, 13]);
         let lo = ym_array(&[0; 5]);
         let hi = ym_array(&[12; 5]);
-        let n = i32_array_all(5, 12);
+        let n = i64_array_all(5, 12);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -627,7 +629,7 @@ mod tests {
         let v = ym_array(&[11, 12, 0, -1, 13]);
         let lo = ym_array(&[12; 5]);
         let hi = ym_array(&[0; 5]);
-        let n = i32_array_all(5, 12);
+        let n = i64_array_all(5, 12);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -641,7 +643,7 @@ mod tests {
         let v = mdn_array(&[(0, 0, 0), (5, 0, 0), (11, 0, 0), (12, 0, 0), (13, 
0, 0)]);
         let lo = mdn_array(&[(0, 0, 0); 5]);
         let hi = mdn_array(&[(12, 0, 0); 5]);
-        let n = i32_array_all(5, 12);
+        let n = i64_array_all(5, 12);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -653,7 +655,7 @@ mod tests {
         let v = mdn_array(&[(11, 0, 0), (12, 0, 0), (0, 0, 0), (-1, 0, 0), 
(13, 0, 0)]);
         let lo = mdn_array(&[(12, 0, 0); 5]);
         let hi = mdn_array(&[(0, 0, 0); 5]);
-        let n = i32_array_all(5, 12);
+        let n = i64_array_all(5, 12);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -673,7 +675,7 @@ mod tests {
         ]);
         let lo = mdn_array(&[(0, 0, 0); 6]);
         let hi = mdn_array(&[(0, 10, 0); 6]);
-        let n = i32_array_all(6, 10);
+        let n = i64_array_all(6, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -686,7 +688,7 @@ mod tests {
         let v = mdn_array(&[(0, 9, 0), (0, 10, 0), (0, 0, 0), (0, -1, 0), (0, 
11, 0)]);
         let lo = mdn_array(&[(0, 10, 0); 5]);
         let hi = mdn_array(&[(0, 0, 0); 5]);
-        let n = i32_array_all(5, 10);
+        let n = i64_array_all(5, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -698,7 +700,7 @@ mod tests {
         let v = mdn_array(&[(0, 9, 1), (0, 10, 0), (0, 0, 0), (0, -1, 0), (0, 
11, 0)]);
         let lo = mdn_array(&[(0, 10, 0); 5]);
         let hi = mdn_array(&[(0, 0, 0); 5]);
-        let n = i32_array_all(5, 10);
+        let n = i64_array_all(5, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -711,7 +713,7 @@ mod tests {
         let v = mdn_array(&[(0, 1, 0)]);
         let lo = mdn_array(&[(0, 0, 0)]);
         let hi = mdn_array(&[(1, 1, 0)]);
-        let n = i32_array_all(1, 4);
+        let n = i64_array_all(1, 4);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -723,7 +725,7 @@ mod tests {
         let v = mdn_array(&[(0, 0, 0)]);
         let lo = mdn_array(&[(1, 2, 3)]);
         let hi = mdn_array(&[(1, 2, 3)]); // lo == hi
-        let n = i32_array_all(1, 10);
+        let n = i64_array_all(1, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         assert!(downcast_i32(&out).is_null(0));
@@ -734,7 +736,7 @@ mod tests {
         let v = mdn_array(&[(0, 0, 0)]);
         let lo = mdn_array(&[(0, 0, 0)]);
         let hi = mdn_array(&[(0, 10, 0)]);
-        let n = Arc::new(Int32Array::from(vec![0])); // n <= 0
+        let n = Arc::new(Int64Array::from(vec![0])); // n <= 0
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         assert!(downcast_i32(&out).is_null(0));
@@ -748,7 +750,7 @@ mod tests {
         ]));
         let lo = mdn_array(&[(0, 0, 0), (0, 0, 0)]);
         let hi = mdn_array(&[(0, 10, 0), (0, 10, 0)]);
-        let n = i32_array_all(2, 10);
+        let n = i64_array_all(2, 10);
 
         let out = width_bucket_kern(&[v, lo, hi, n]).unwrap();
         let out = downcast_i32(&out);
@@ -773,7 +775,7 @@ mod tests {
         let v: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
         let lo = f64_array(&[0.0, 0.0, 0.0]);
         let hi = f64_array(&[10.0, 10.0, 10.0]);
-        let n = i32_array_all(3, 10);
+        let n = i64_array_all(3, 10);
 
         let err = width_bucket_kern(&[v, lo, hi, n]).unwrap_err();
         let msg = format!("{err}");


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) 01/01: chore: change width_bucket buckets parameter from i32 to i64

Reply via email to