This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 76c381ff0a Remove redundant benchmarks in `cast_kernels` (#9789)
76c381ff0a is described below
commit 76c381ff0a2cd3f0b5f6690b3491428ffa0806a0
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu May 7 13:56:33 2026 -0400
Remove redundant benchmarks in `cast_kernels` (#9789)
# Which issue does this PR close?
- Follow on to https://github.com/apache/arrow-rs/pull/9729
# Rationale for this change
#9729, added many new cases to `cast_kernels` but many of these are
redundant and increase the benchmark runtime without providing
proportional value in coverage.
This PR reduces the redundancy by:
1. Keeps one representative benchmark for each major physical code path
(e.g., `i128` vs `i256` storage).
2. Removes redundant combinations of target types (e.g., casting
`decimal128` to every integer width when `int64` is sufficient).
3. Consolidating invalid/error path testing into a single representative
case.
4. Reducing the total number of benchmark cases from over 60 new
additions to 10 high-value cases.
# What changes are included in this PR?
- Pruned redundant decimal-to-integer/float and string/float-to-decimal
benchmarks in `arrow/benches/cast_kernels.rs`.
- Added `create_primitive_array_range` helper to
`arrow/src/util/bench_util.rs`
Compared to main before PR #9729, the following benchmarks will be new
after my PR #9789 is merged:
1. New Decimal Casting Benchmarks
These cases cover the core performance paths for casting to and from
decimals using representative physical storage types (i128 and i256):
* cast string to decimal128(38, 3)
* cast float64 to decimal128(32, 3)
* cast invalid float64 to to decimal128(32, 3) (Error path testing)
* cast decimal128 to float64
* cast decimal128 to int64
* cast decimal256 to float64
* cast decimal256 to int64
* cast decimal128 to decimal128 512 with lower scale (infallible)
(specifically testing the fast path for infallible
# Are these changes tested?
CI covers verification.
# Are there any user-facing changes?
No.
---
arrow/benches/cast_kernels.rs | 343 +-----------------------------------------
1 file changed, 8 insertions(+), 335 deletions(-)
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index 2af3145a84..82e9696f04 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -83,36 +83,6 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool)
-> ArrayRef {
Arc::new(builder.finish())
}
-fn build_decimal32_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
- let mut rng = seedable_rng();
- let mut builder = Decimal32Builder::with_capacity(size);
-
- for _ in 0..size {
- builder.append_value(rng.random_range::<i32, _>(0..1000000));
- }
- Arc::new(
- builder
- .finish()
- .with_precision_and_scale(precision, scale)
- .unwrap(),
- )
-}
-
-fn build_decimal64_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
- let mut rng = seedable_rng();
- let mut builder = Decimal64Builder::with_capacity(size);
-
- for _ in 0..size {
- builder.append_value(rng.random_range::<i64, _>(0..1000000000));
- }
- Arc::new(
- builder
- .finish()
- .with_precision_and_scale(precision, scale)
- .unwrap(),
- )
-}
-
fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal128Builder::with_capacity(size);
@@ -175,6 +145,14 @@ fn build_string_float_array(size: usize, null_density:
f32) -> ArrayRef {
Arc::new(builder.finish())
}
+fn build_float64_array_for_cast_to_decimal(size: usize, null_density: f32) ->
ArrayRef {
+ Arc::new(create_primitive_array_range::<Float64Type>(
+ size,
+ null_density,
+ -999_999_999f64..999_999_999f64,
+ ))
+}
+
macro_rules! build_array_with_samples {
($builder: ident, $size: ident, $null_density: expr, $samples: ident) => {{
let mut rng = seedable_rng();
@@ -189,46 +167,6 @@ macro_rules! build_array_with_samples {
}};
}
-fn build_string_float_array_invalid_item(size: usize, null_density: f32) ->
ArrayRef {
- let invalid_items = [
- "--1.23",
- "1.2.3",
- "-1.-23499",
- "--1.23456789",
- "1-.234",
- "e10",
- "1e",
- "1e++10",
- "NaN",
- "Infinity",
- ];
- let mut builder = StringBuilder::new();
- build_array_with_samples!(builder, size, null_density, invalid_items)
-}
-
-fn build_float32_array_for_cast_to_decimal(size: usize, null_density: f32) ->
ArrayRef {
- Arc::new(create_primitive_array_range::<Float32Type>(
- size,
- null_density,
- -999_999_999f32..999_999_999f32,
- ))
-}
-
-fn build_float64_array_for_cast_to_decimal(size: usize, null_density: f32) ->
ArrayRef {
- Arc::new(create_primitive_array_range::<Float64Type>(
- size,
- null_density,
- -999_999_999f64..999_999_999f64,
- ))
-}
-
-fn build_float32_array_invalid_item(size: usize, null_density: f32) ->
ArrayRef {
- let mut builder = Float32Builder::with_capacity(size);
- let invalid_values = [f32::NAN, f32::INFINITY, f32::NEG_INFINITY];
-
- build_array_with_samples!(builder, size, null_density, invalid_values)
-}
-
fn build_float64_array_invalid_items(size: usize, null_density: f32) ->
ArrayRef {
let mut builder = Float64Builder::with_capacity(size);
let invalid_values = [f64::NAN, f64::INFINITY, f64::NEG_INFINITY];
@@ -268,8 +206,6 @@ fn add_benchmark(c: &mut Criterion) {
let utf8_date_array = build_utf8_date_array(512, true);
let utf8_date_time_array = build_utf8_date_time_array(512, true);
- let decimal32_array = build_decimal32_array(8_000, 9, 3);
- let decimal64_array = build_decimal64_array(8_000, 10, 3);
let decimal128_array = build_decimal128_array(8_000, 10, 3);
let decimal256_array = build_decimal256_array(8_000, 50, 3);
let string_array = build_string_array(512);
@@ -280,10 +216,7 @@ fn add_benchmark(c: &mut Criterion) {
let binary_view_array = cast(&string_view_array,
&DataType::BinaryView).unwrap();
let string_float_array_normal = build_string_float_array(5_000, 0.1);
- let invalid_string_float_array =
build_string_float_array_invalid_item(8_000, 0.1);
- let float32_array_cast_to_decimal =
build_float32_array_for_cast_to_decimal(8_000, 0.1);
let float64_array_cast_to_decimal =
build_float64_array_for_cast_to_decimal(8_000, 0.1);
- let invalid_float32_array_to_decimal =
build_float32_array_invalid_item(8_000, 0.1);
let invalid_float64_array_to_decimal =
build_float64_array_invalid_items(8_000, 0.1);
c.bench_function("cast int32 to int32 512", |b| {
@@ -366,22 +299,6 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
});
- c.bench_function("cast decimal32 to decimal32 512", |b| {
- b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(9, 4)))
- });
- c.bench_function("cast decimal32 to decimal32 512 lower precision", |b| {
- b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(6, 5)))
- });
- c.bench_function("cast decimal32 to decimal64 512", |b| {
- b.iter(|| cast_array(&decimal32_array, DataType::Decimal64(11, 5)))
- });
- c.bench_function("cast decimal64 to decimal32 512", |b| {
- b.iter(|| cast_array(&decimal64_array, DataType::Decimal32(9, 2)))
- });
- c.bench_function("cast decimal64 to decimal64 512", |b| {
- b.iter(|| cast_array(&decimal64_array, DataType::Decimal64(12, 4)))
- });
-
c.bench_function("cast decimal128 to decimal128 512", |b| {
b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
});
@@ -455,288 +372,44 @@ fn add_benchmark(c: &mut Criterion) {
}
// cast string with normal items to decimals
- benchmark_cast!(
- "cast string to decimal32(9, 2)",
- string_float_array_normal,
- DataType::Decimal32(9, 2)
- );
- benchmark_cast!(
- "cast string to decimal64(18, 2)",
- string_float_array_normal,
- DataType::Decimal64(18, 2)
- );
benchmark_cast!(
"cast string to decimal128(38, 3)",
string_float_array_normal,
DataType::Decimal128(38, 3)
);
- benchmark_cast!(
- "cast string to decimal256(76, 4)",
- string_float_array_normal,
- DataType::Decimal256(76, 4)
- );
-
- // cast invalid string to decimals
- benchmark_cast!(
- "cast invalid string to decimal32(9, 2)",
- invalid_string_float_array,
- DataType::Decimal32(9, 2)
- );
- benchmark_cast!(
- "cast invalid string to decimal64(18, 2)",
- invalid_string_float_array,
- DataType::Decimal64(18, 2)
- );
- benchmark_cast!(
- "cast invalid string to decimal128(38, 3)",
- invalid_string_float_array,
- DataType::Decimal128(38, 3)
- );
- benchmark_cast!(
- "cast invalid string to decimal256(76, 4)",
- invalid_string_float_array,
- DataType::Decimal256(76, 4)
- );
-
- // cast float32 to decimals
- benchmark_cast!(
- "cast float32 to decimal32(9, 2)",
- float32_array_cast_to_decimal,
- DataType::Decimal32(9, 2)
- );
- benchmark_cast!(
- "cast float32 to decimal64(18, 2",
- float32_array_cast_to_decimal,
- DataType::Decimal64(18, 2)
- );
- benchmark_cast!(
- "cast float32 to decimal128(32, 3)",
- float32_array_cast_to_decimal,
- DataType::Decimal128(38, 3)
- );
- benchmark_cast!(
- "cast float32 to decimal256(76, 4)",
- float32_array_cast_to_decimal,
- DataType::Decimal256(76, 4)
- );
-
- // cast invalid float32 to decimals
- benchmark_cast!(
- "cast invalid float32 to decimal32(9, 2)",
- invalid_float32_array_to_decimal,
- DataType::Decimal32(9, 2)
- );
- benchmark_cast!(
- "cast invalid float32 to decimal64(18, 2",
- invalid_float32_array_to_decimal,
- DataType::Decimal64(18, 2)
- );
- benchmark_cast!(
- "cast invalid float32 to decimal128(32, 3)",
- invalid_float32_array_to_decimal,
- DataType::Decimal128(32, 3)
- );
- benchmark_cast!(
- "cast invalid float32 to decimal256(76, 4)",
- invalid_float32_array_to_decimal,
- DataType::Decimal256(76, 4)
- );
// cast float64 to decimals
- benchmark_cast!(
- "cast float64 to decimal32(9, 2)",
- float64_array_cast_to_decimal,
- DataType::Decimal32(9, 2)
- );
- benchmark_cast!(
- "cast float64 to decimal64(18, 2",
- float64_array_cast_to_decimal,
- DataType::Decimal64(18, 2)
- );
benchmark_cast!(
"cast float64 to decimal128(32, 3)",
float64_array_cast_to_decimal,
DataType::Decimal128(32, 3)
);
- benchmark_cast!(
- "cast float64 to decimal256(76, 4)",
- float64_array_cast_to_decimal,
- DataType::Decimal256(76, 4)
- );
// cast invalid float64 to decimals
- benchmark_cast!(
- "cast invalid float64 to decimal32(9, 2)",
- invalid_float64_array_to_decimal,
- DataType::Decimal32(9, 2)
- );
- benchmark_cast!(
- "cast invalid float64 to to decimal64(18, 2)",
- invalid_float64_array_to_decimal,
- DataType::Decimal64(18, 2)
- );
benchmark_cast!(
"cast invalid float64 to to decimal128(32, 3)",
invalid_float64_array_to_decimal,
DataType::Decimal128(32, 3)
);
- benchmark_cast!(
- "cast invalid float64 to to decimal256(76, 4)",
- invalid_float64_array_to_decimal,
- DataType::Decimal256(76, 4)
- );
// cast decimals to float/integers
- benchmark_cast!(
- "cast decimal32 to float32",
- decimal32_array,
- DataType::Float32
- );
- benchmark_cast!(
- "cast decimal32 to float64",
- decimal32_array,
- DataType::Float64
- );
- benchmark_cast!("cast decimal32 to uint8", decimal32_array,
DataType::UInt8);
- benchmark_cast!(
- "cast decimal32 to uint16",
- decimal32_array,
- DataType::UInt16
- );
- benchmark_cast!(
- "cast decimal32 to uint32",
- decimal32_array,
- DataType::UInt32
- );
- benchmark_cast!(
- "cast decimal32 to uint64",
- decimal32_array,
- DataType::UInt64
- );
- benchmark_cast!("cast decimal32 to int8", decimal32_array, DataType::Int8);
- benchmark_cast!("cast decimal32 to int16", decimal32_array,
DataType::Int16);
- benchmark_cast!("cast decimal32 to int32", decimal32_array,
DataType::Int32);
- benchmark_cast!("cast decimal32 to int64", decimal32_array,
DataType::Int64);
-
- benchmark_cast!(
- "cast decimal64 to float32",
- decimal64_array,
- DataType::Float32
- );
- benchmark_cast!(
- "cast decimal64 to float64",
- decimal64_array,
- DataType::Float64
- );
- benchmark_cast!("cast decimal64 to uint8", decimal64_array,
DataType::UInt8);
- benchmark_cast!(
- "cast decimal64 to uint16",
- decimal64_array,
- DataType::UInt16
- );
- benchmark_cast!(
- "cast decimal64 to uint32",
- decimal64_array,
- DataType::UInt32
- );
- benchmark_cast!(
- "cast decimal64 to uint64",
- decimal64_array,
- DataType::UInt64
- );
- benchmark_cast!("cast decimal64 to int8", decimal64_array, DataType::Int8);
- benchmark_cast!("cast decimal64 to int16", decimal64_array,
DataType::Int16);
- benchmark_cast!("cast decimal64 to int32", decimal64_array,
DataType::Int32);
- benchmark_cast!("cast decimal64 to int64", decimal64_array,
DataType::Int64);
-
- benchmark_cast!(
- "cast decimal128 to float32",
- decimal128_array,
- DataType::Float32
- );
benchmark_cast!(
"cast decimal128 to float64",
decimal128_array,
DataType::Float64
);
- benchmark_cast!(
- "cast decimal128 to uint8",
- decimal128_array,
- DataType::UInt8
- );
- benchmark_cast!(
- "cast decimal128 to uint16",
- decimal128_array,
- DataType::UInt16
- );
- benchmark_cast!(
- "cast decimal128 to uint32",
- decimal128_array,
- DataType::UInt32
- );
- benchmark_cast!(
- "cast decimal128 to uint64",
- decimal128_array,
- DataType::UInt64
- );
benchmark_cast!("cast decimal128 to int8", decimal128_array,
DataType::Int8);
- benchmark_cast!(
- "cast decimal128 to int16",
- decimal128_array,
- DataType::Int16
- );
- benchmark_cast!(
- "cast decimal128 to int32",
- decimal128_array,
- DataType::Int32
- );
benchmark_cast!(
"cast decimal128 to int64",
decimal128_array,
DataType::Int64
);
- benchmark_cast!(
- "cast decimal256 to float32",
- decimal256_array,
- DataType::Float32
- );
benchmark_cast!(
"cast decimal256 to float64",
decimal256_array,
DataType::Float64
);
- benchmark_cast!(
- "cast decimal256 to uint8",
- decimal256_array,
- DataType::UInt8
- );
- benchmark_cast!(
- "cast decimal256 to uint16",
- decimal256_array,
- DataType::UInt16
- );
- benchmark_cast!(
- "cast decimal256 to uint32",
- decimal256_array,
- DataType::UInt32
- );
- benchmark_cast!(
- "cast decimal256 to uint64",
- decimal256_array,
- DataType::UInt64
- );
- benchmark_cast!("cast decimal256 to int8", decimal256_array,
DataType::Int8);
- benchmark_cast!(
- "cast decimal256 to int16",
- decimal256_array,
- DataType::Int16
- );
- benchmark_cast!(
- "cast decimal256 to int32",
- decimal256_array,
- DataType::Int32
- );
benchmark_cast!(
"cast decimal256 to int64",
decimal256_array,