This is an automated email from the ASF dual-hosted git repository. comphead pushed a commit to branch width-bucket-i64 in repository https://gitbox.apache.org/repos/asf/datafusion.git
commit ef9d22ff3c2dd7b45e11b80b756c66662b4fca9b Author: comphead <[email protected]> AuthorDate: Thu Feb 12 14:16:58 2026 -0800 chore: change width_bucket buckets parameter from i32 to i64 Co-Authored-By: Claude Opus 4.5 <[email protected]> --- datafusion/spark/src/function/math/width_bucket.rs | 100 +++++++++++---------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/datafusion/spark/src/function/math/width_bucket.rs b/datafusion/spark/src/function/math/width_bucket.rs index 8d748439ad..bd68c37edb 100644 --- a/datafusion/spark/src/function/math/width_bucket.rs +++ b/datafusion/spark/src/function/math/width_bucket.rs @@ -26,11 +26,11 @@ use arrow::datatypes::DataType; use arrow::datatypes::DataType::{Duration, Float64, Int32, Interval}; use arrow::datatypes::IntervalUnit::{MonthDayNano, YearMonth}; use datafusion_common::cast::{ - as_duration_microsecond_array, as_float64_array, as_int32_array, + as_duration_microsecond_array, as_float64_array, as_int64_array, as_interval_mdn_array, as_interval_ym_array, }; use datafusion_common::types::{ - NativeType, logical_duration_microsecond, logical_float64, logical_int32, + NativeType, logical_duration_microsecond, logical_float64, logical_int64, logical_interval_mdn, logical_interval_year_month, }; use datafusion_common::{Result, exec_err, internal_err}; @@ -41,7 +41,7 @@ use datafusion_expr::{ }; use datafusion_functions::utils::make_scalar_function; -use arrow::array::{Int32Array, Int32Builder}; +use arrow::array::{Int32Array, Int32Builder, Int64Array}; use arrow::datatypes::TimeUnit::Microsecond; use datafusion_expr::Coercion; use datafusion_expr::Volatility::Immutable; @@ -75,9 +75,9 @@ impl SparkWidthBucket { let interval_mdn = Coercion::new_exact(TypeSignatureClass::Native(logical_interval_mdn())); let bucket = Coercion::new_implicit( - TypeSignatureClass::Native(logical_int32()), + TypeSignatureClass::Native(logical_int64()), vec![TypeSignatureClass::Integer], - NativeType::Int32, + NativeType::Int64, ); let type_signature = Signature::one_of( vec![ @@ -160,28 +160,28 @@ fn width_bucket_kern(args: &[ArrayRef]) -> Result<ArrayRef> { let v = as_float64_array(v)?; let min = as_float64_array(minv)?; let max = as_float64_array(maxv)?; - let n_bucket = as_int32_array(nb)?; + let n_bucket = as_int64_array(nb)?; Ok(Arc::new(width_bucket_float64(v, min, max, n_bucket))) } Duration(Microsecond) => { let v = as_duration_microsecond_array(v)?; let min = as_duration_microsecond_array(minv)?; let max = as_duration_microsecond_array(maxv)?; - let n_bucket = as_int32_array(nb)?; + let n_bucket = as_int64_array(nb)?; Ok(Arc::new(width_bucket_i64_as_float(v, min, max, n_bucket))) } Interval(YearMonth) => { let v = as_interval_ym_array(v)?; let min = as_interval_ym_array(minv)?; let max = as_interval_ym_array(maxv)?; - let n_bucket = as_int32_array(nb)?; + let n_bucket = as_int64_array(nb)?; Ok(Arc::new(width_bucket_i32_as_float(v, min, max, n_bucket))) } Interval(MonthDayNano) => { let v = as_interval_mdn_array(v)?; let min = as_interval_mdn_array(minv)?; let max = as_interval_mdn_array(maxv)?; - let n_bucket = as_int32_array(nb)?; + let n_bucket = as_int64_array(nb)?; Ok(Arc::new(width_bucket_interval_mdn_exact( v, min, max, n_bucket, ))) @@ -203,7 +203,7 @@ macro_rules! width_bucket_kernel_impl { v: &$arr_ty, min: &$arr_ty, max: &$arr_ty, - n_bucket: &Int32Array, + n_bucket: &Int64Array, ) -> Int32Array { let len = v.len(); let mut b = Int32Builder::with_capacity(len); @@ -223,6 +223,7 @@ macro_rules! width_bucket_kernel_impl { b.append_null(); continue; } + let next_bucket = (buckets + 1) as i32; if $check_nan { if !x.is_finite() || !l.is_finite() || !h.is_finite() { b.append_null(); @@ -249,7 +250,7 @@ macro_rules! width_bucket_kernel_impl { continue; } if x >= h { - b.append_value(buckets + 1); + b.append_value(next_bucket); continue; } } else { @@ -258,7 +259,7 @@ macro_rules! width_bucket_kernel_impl { continue; } if x <= h { - b.append_value(buckets + 1); + b.append_value(next_bucket); continue; } } @@ -272,8 +273,8 @@ macro_rules! width_bucket_kernel_impl { if bucket < 1 { bucket = 1; } - if bucket > buckets + 1 { - bucket = buckets + 1; + if bucket > next_bucket { + bucket = next_bucket; } b.append_value(bucket); @@ -309,7 +310,7 @@ pub(crate) fn width_bucket_interval_mdn_exact( v: &IntervalMonthDayNanoArray, lo: &IntervalMonthDayNanoArray, hi: &IntervalMonthDayNanoArray, - n: &Int32Array, + n: &Int64Array, ) -> Int32Array { let len = v.len(); let mut b = Int32Builder::with_capacity(len); @@ -324,6 +325,7 @@ pub(crate) fn width_bucket_interval_mdn_exact( b.append_null(); continue; } + let next_bucket = (buckets + 1) as i32; let x = v.value(i); let l = lo.value(i); @@ -349,7 +351,7 @@ pub(crate) fn width_bucket_interval_mdn_exact( continue; } if x_m >= h_m { - b.append_value(buckets + 1); + b.append_value(next_bucket); continue; } } else { @@ -358,7 +360,7 @@ pub(crate) fn width_bucket_interval_mdn_exact( continue; } if x_m <= h_m { - b.append_value(buckets + 1); + b.append_value(next_bucket); continue; } } @@ -373,8 +375,8 @@ pub(crate) fn width_bucket_interval_mdn_exact( if bucket < 1 { bucket = 1; } - if bucket > buckets + 1 { - bucket = buckets + 1; + if bucket > next_bucket { + bucket = next_bucket; } b.append_value(bucket); continue; @@ -400,7 +402,7 @@ pub(crate) fn width_bucket_interval_mdn_exact( continue; } if x_f >= h_f { - b.append_value(buckets + 1); + b.append_value(next_bucket); continue; } } else { @@ -409,7 +411,7 @@ pub(crate) fn width_bucket_interval_mdn_exact( continue; } if x_f <= h_f { - b.append_value(buckets + 1); + b.append_value(next_bucket); continue; } } @@ -424,8 +426,8 @@ pub(crate) fn width_bucket_interval_mdn_exact( if bucket < 1 { bucket = 1; } - if bucket > buckets + 1 { - bucket = buckets + 1; + if bucket > next_bucket { + bucket = next_bucket; } b.append_value(bucket); continue; @@ -443,15 +445,15 @@ mod tests { use std::sync::Arc; use arrow::array::{ - ArrayRef, DurationMicrosecondArray, Float64Array, Int32Array, + ArrayRef, DurationMicrosecondArray, Float64Array, Int32Array, Int64Array, IntervalYearMonthArray, }; use arrow::datatypes::IntervalMonthDayNano; // --- Helpers ------------------------------------------------------------- - fn i32_array_all(len: usize, val: i32) -> Arc<Int32Array> { - Arc::new(Int32Array::from(vec![val; len])) + fn i64_array_all(len: usize, val: i64) -> Arc<Int64Array> { + Arc::new(Int64Array::from(vec![val; len])) } fn f64_array(vals: &[f64]) -> Arc<Float64Array> { @@ -489,7 +491,7 @@ mod tests { let v = f64_array(&[0.5, 1.0, 9.9, -1.0, 10.0]); let lo = f64_array(&[0.0, 0.0, 0.0, 0.0, 0.0]); let hi = f64_array(&[10.0, 10.0, 10.0, 10.0, 10.0]); - let n = i32_array_all(5, 10); + let n = i64_array_all(5, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -501,7 +503,7 @@ mod tests { let v = f64_array(&[9.9, 10.0, 0.0, -0.1, 10.1]); let lo = f64_array(&[10.0; 5]); let hi = f64_array(&[0.0; 5]); - let n = i32_array_all(5, 10); + let n = i64_array_all(5, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -513,7 +515,7 @@ mod tests { let v = f64_array(&[0.0, 9.999999999, 10.0]); let lo = f64_array(&[0.0; 3]); let hi = f64_array(&[10.0; 3]); - let n = i32_array_all(3, 10); + let n = i64_array_all(3, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -525,7 +527,7 @@ mod tests { let v = f64_array(&[10.0, 0.0, -0.000001]); let lo = f64_array(&[10.0; 3]); let hi = f64_array(&[0.0; 3]); - let n = i32_array_all(3, 10); + let n = i64_array_all(3, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -537,7 +539,7 @@ mod tests { let v = f64_array(&[1.0, 5.0, 9.0]); let lo = f64_array(&[0.0, 0.0, 0.0]); let hi = f64_array(&[10.0, 10.0, 10.0]); - let n = Arc::new(Int32Array::from(vec![0, -1, 10])); + let n = Arc::new(Int64Array::from(vec![0, -1, 10])); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); assert!(out.is_null(0)); @@ -547,7 +549,7 @@ mod tests { let v = f64_array(&[1.0]); let lo = f64_array(&[5.0]); let hi = f64_array(&[5.0]); - let n = i32_array_all(1, 10); + let n = i64_array_all(1, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); assert!(out.is_null(0)); @@ -555,7 +557,7 @@ mod tests { let v = f64_array_opt(&[Some(f64::NAN)]); let lo = f64_array(&[0.0]); let hi = f64_array(&[10.0]); - let n = i32_array_all(1, 10); + let n = i64_array_all(1, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); assert!(out.is_null(0)); @@ -566,7 +568,7 @@ mod tests { let v = f64_array_opt(&[None, Some(1.0), Some(2.0), Some(3.0)]); let lo = f64_array(&[0.0; 4]); let hi = f64_array(&[10.0; 4]); - let n = i32_array_all(4, 10); + let n = i64_array_all(4, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -578,7 +580,7 @@ mod tests { let v = f64_array(&[1.0]); let lo = f64_array_opt(&[None]); let hi = f64_array(&[10.0]); - let n = i32_array_all(1, 10); + let n = i64_array_all(1, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); assert!(out.is_null(0)); @@ -591,7 +593,7 @@ mod tests { let v = dur_us_array(&[1_000_000, 0, -1]); let lo = dur_us_array(&[0, 0, 0]); let hi = dur_us_array(&[2_000_000, 2_000_000, 2_000_000]); - let n = i32_array_all(3, 2); + let n = i64_array_all(3, 2); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -603,7 +605,7 @@ mod tests { let v = dur_us_array(&[0]); let lo = dur_us_array(&[1]); let hi = dur_us_array(&[1]); - let n = i32_array_all(1, 10); + let n = i64_array_all(1, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); assert!(downcast_i32(&out).is_null(0)); } @@ -615,7 +617,7 @@ mod tests { let v = ym_array(&[0, 5, 11, 12, 13]); let lo = ym_array(&[0; 5]); let hi = ym_array(&[12; 5]); - let n = i32_array_all(5, 12); + let n = i64_array_all(5, 12); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -627,7 +629,7 @@ mod tests { let v = ym_array(&[11, 12, 0, -1, 13]); let lo = ym_array(&[12; 5]); let hi = ym_array(&[0; 5]); - let n = i32_array_all(5, 12); + let n = i64_array_all(5, 12); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -641,7 +643,7 @@ mod tests { let v = mdn_array(&[(0, 0, 0), (5, 0, 0), (11, 0, 0), (12, 0, 0), (13, 0, 0)]); let lo = mdn_array(&[(0, 0, 0); 5]); let hi = mdn_array(&[(12, 0, 0); 5]); - let n = i32_array_all(5, 12); + let n = i64_array_all(5, 12); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -653,7 +655,7 @@ mod tests { let v = mdn_array(&[(11, 0, 0), (12, 0, 0), (0, 0, 0), (-1, 0, 0), (13, 0, 0)]); let lo = mdn_array(&[(12, 0, 0); 5]); let hi = mdn_array(&[(0, 0, 0); 5]); - let n = i32_array_all(5, 12); + let n = i64_array_all(5, 12); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -673,7 +675,7 @@ mod tests { ]); let lo = mdn_array(&[(0, 0, 0); 6]); let hi = mdn_array(&[(0, 10, 0); 6]); - let n = i32_array_all(6, 10); + let n = i64_array_all(6, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -686,7 +688,7 @@ mod tests { let v = mdn_array(&[(0, 9, 0), (0, 10, 0), (0, 0, 0), (0, -1, 0), (0, 11, 0)]); let lo = mdn_array(&[(0, 10, 0); 5]); let hi = mdn_array(&[(0, 0, 0); 5]); - let n = i32_array_all(5, 10); + let n = i64_array_all(5, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -698,7 +700,7 @@ mod tests { let v = mdn_array(&[(0, 9, 1), (0, 10, 0), (0, 0, 0), (0, -1, 0), (0, 11, 0)]); let lo = mdn_array(&[(0, 10, 0); 5]); let hi = mdn_array(&[(0, 0, 0); 5]); - let n = i32_array_all(5, 10); + let n = i64_array_all(5, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -711,7 +713,7 @@ mod tests { let v = mdn_array(&[(0, 1, 0)]); let lo = mdn_array(&[(0, 0, 0)]); let hi = mdn_array(&[(1, 1, 0)]); - let n = i32_array_all(1, 4); + let n = i64_array_all(1, 4); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -723,7 +725,7 @@ mod tests { let v = mdn_array(&[(0, 0, 0)]); let lo = mdn_array(&[(1, 2, 3)]); let hi = mdn_array(&[(1, 2, 3)]); // lo == hi - let n = i32_array_all(1, 10); + let n = i64_array_all(1, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); assert!(downcast_i32(&out).is_null(0)); @@ -734,7 +736,7 @@ mod tests { let v = mdn_array(&[(0, 0, 0)]); let lo = mdn_array(&[(0, 0, 0)]); let hi = mdn_array(&[(0, 10, 0)]); - let n = Arc::new(Int32Array::from(vec![0])); // n <= 0 + let n = Arc::new(Int64Array::from(vec![0])); // n <= 0 let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); assert!(downcast_i32(&out).is_null(0)); @@ -748,7 +750,7 @@ mod tests { ])); let lo = mdn_array(&[(0, 0, 0), (0, 0, 0)]); let hi = mdn_array(&[(0, 10, 0), (0, 10, 0)]); - let n = i32_array_all(2, 10); + let n = i64_array_all(2, 10); let out = width_bucket_kern(&[v, lo, hi, n]).unwrap(); let out = downcast_i32(&out); @@ -773,7 +775,7 @@ mod tests { let v: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3])); let lo = f64_array(&[0.0, 0.0, 0.0]); let hi = f64_array(&[10.0, 10.0, 10.0]); - let n = i32_array_all(3, 10); + let n = i64_array_all(3, 10); let err = width_bucket_kern(&[v, lo, hi, n]).unwrap_err(); let msg = format!("{err}"); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
