scovich commented on code in PR #9729:
URL: https://github.com/apache/arrow-rs/pull/9729#discussion_r3094295168


##########
arrow-cast/benches/cast_decimals.rs:
##########
@@ -0,0 +1,346 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{
+    ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, 
Decimal256Array, Float32Array,
+    Float64Array, StringArray,
+};
+use arrow_buffer::i256;
+use arrow_schema::DataType::{
+    Decimal32, Decimal64, Decimal128, Decimal256, Float32, Float64, Int8, 
Int16, Int32, Int64,
+    UInt8, UInt16, UInt32, UInt64,
+};
+use criterion::*;
+use rand::prelude::StdRng;
+use rand::{Rng, SeedableRng};
+use std::sync::Arc;
+
+fn cast_string_from_decimals(c: &mut Criterion) {
+    let total_records = 10_000;
+    let mut rng = StdRng::seed_from_u64(42);
+    let str_array = StringArray::from_iter((0..total_records).map(|x| match x 
% 20 {
+        0 => None,
+        1 => Some("".to_string()),
+        2 => Some(" ".to_string()),
+        3 => Some("-1.-23499999".to_string()),
+        4 => Some("--1.23456789".to_string()),
+        5 => Some("1.-23456789".to_string()),
+        6 => Some("000.123".to_string()),
+        7 => Some("+123".to_string()),
+        8 => Some("+123.12345000".to_string()),
+        9 => Some("0".to_string()),
+        10 => Some("000.000".to_string()),
+        11 => Some("0000000000000000012345.000".to_string()),

Review Comment:
   Fully half the values in the array are invalid, so this bench will mostly 
measure the cost of the error paths. Is that intentional? Do we have a happy 
path bench to complement it?



##########
arrow-cast/benches/cast_decimals.rs:
##########
@@ -0,0 +1,346 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{
+    ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, 
Decimal256Array, Float32Array,
+    Float64Array, StringArray,
+};
+use arrow_buffer::i256;
+use arrow_schema::DataType::{
+    Decimal32, Decimal64, Decimal128, Decimal256, Float32, Float64, Int8, 
Int16, Int32, Int64,
+    UInt8, UInt16, UInt32, UInt64,
+};
+use criterion::*;
+use rand::prelude::StdRng;
+use rand::{Rng, SeedableRng};
+use std::sync::Arc;
+
+fn cast_string_from_decimals(c: &mut Criterion) {
+    let total_records = 10_000;
+    let mut rng = StdRng::seed_from_u64(42);
+    let str_array = StringArray::from_iter((0..total_records).map(|x| match x 
% 20 {
+        0 => None,
+        1 => Some("".to_string()),
+        2 => Some(" ".to_string()),
+        3 => Some("-1.-23499999".to_string()),
+        4 => Some("--1.23456789".to_string()),
+        5 => Some("1.-23456789".to_string()),
+        6 => Some("000.123".to_string()),
+        7 => Some("+123".to_string()),
+        8 => Some("+123.12345000".to_string()),
+        9 => Some("0".to_string()),
+        10 => Some("000.000".to_string()),
+        11 => Some("0000000000000000012345.000".to_string()),
+        _ => Some(format!("{:.6}", f64::from(x) * rng.random::<f64>())),
+    }));
+    let array = Arc::new(str_array) as ArrayRef;
+
+    let bench_suite = [
+        ("string2decimal32(9, 2)", Decimal32(9, 2)),
+        ("string2decimal64(18, 2)", Decimal64(18, 2)),
+        ("string2decimal128(38, 3)", Decimal128(38, 3)),
+        ("string2decimal256(76, 4)", Decimal256(76, 4)),
+    ];
+    for bench in bench_suite {
+        c.bench_function(bench.0, |b| {
+            b.iter(|| {
+                let r = arrow_cast::cast(&array, &bench.1);
+                std::hint::black_box(r)
+            })
+        });
+    }
+}
+
+fn cast_float_to_decimals(c: &mut Criterion) {
+    let total_records = 50_000;
+    let mut rng = StdRng::seed_from_u64(42);
+    let bench_suite_float32 = [
+        ("float32_to_decimal32(9, 2)", Decimal32(9, 2)),
+        ("float32_to_decimal64(18, 2)", Decimal64(18, 2)),
+        ("float32_to_decimal128(38, 3)", Decimal128(38, 3)),
+        ("float32_to_decimal256(76, 4)", Decimal256(76, 4)),
+    ];
+
+    let float32_array = Float32Array::from_iter((0..total_records).map(|x| 
match x % 10 {
+        0 => None,
+        1 => Some(f32::MIN),
+        2 => Some(f32::MAX),
+        _ => match x % 2 {
+            0 => Some((x as f32) * rng.random::<f32>()),
+            _ => Some(-(x as f32) * rng.random::<f32>()),
+        },

Review Comment:
   The random values are uniformly distributed over 0..1 (floating point) so 
this exercises mostly values in -5..5 with exponentially decreasing probability 
of large negative exponents (e.g. 1e-6). Do we want any coverage of large 
positive exponents (e.g. 1e6) as well?



##########
arrow-cast/benches/cast_decimals.rs:
##########
@@ -0,0 +1,346 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{
+    ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, 
Decimal256Array, Float32Array,
+    Float64Array, StringArray,
+};
+use arrow_buffer::i256;
+use arrow_schema::DataType::{
+    Decimal32, Decimal64, Decimal128, Decimal256, Float32, Float64, Int8, 
Int16, Int32, Int64,
+    UInt8, UInt16, UInt32, UInt64,
+};
+use criterion::*;
+use rand::prelude::StdRng;
+use rand::{Rng, SeedableRng};
+use std::sync::Arc;
+
+fn cast_string_from_decimals(c: &mut Criterion) {
+    let total_records = 10_000;
+    let mut rng = StdRng::seed_from_u64(42);
+    let str_array = StringArray::from_iter((0..total_records).map(|x| match x 
% 20 {
+        0 => None,
+        1 => Some("".to_string()),
+        2 => Some(" ".to_string()),
+        3 => Some("-1.-23499999".to_string()),
+        4 => Some("--1.23456789".to_string()),
+        5 => Some("1.-23456789".to_string()),
+        6 => Some("000.123".to_string()),
+        7 => Some("+123".to_string()),
+        8 => Some("+123.12345000".to_string()),
+        9 => Some("0".to_string()),
+        10 => Some("000.000".to_string()),
+        11 => Some("0000000000000000012345.000".to_string()),
+        _ => Some(format!("{:.6}", f64::from(x) * rng.random::<f64>())),
+    }));
+    let array = Arc::new(str_array) as ArrayRef;
+
+    let bench_suite = [
+        ("string2decimal32(9, 2)", Decimal32(9, 2)),
+        ("string2decimal64(18, 2)", Decimal64(18, 2)),
+        ("string2decimal128(38, 3)", Decimal128(38, 3)),
+        ("string2decimal256(76, 4)", Decimal256(76, 4)),
+    ];
+    for bench in bench_suite {
+        c.bench_function(bench.0, |b| {
+            b.iter(|| {
+                let r = arrow_cast::cast(&array, &bench.1);
+                std::hint::black_box(r)

Review Comment:
   Looking at https://doc.rust-lang.org/beta/std/hint/fn.black_box.html, I 
think this should be
   ```suggestion
                   black_box(arrow_cast::cast(black_box(&array), 
black_box(&bench.1)));
   ```
   (prevents compiler from making any assumptions about the input or the output 
of `cast`)



##########
arrow-cast/benches/cast_decimals.rs:
##########
@@ -0,0 +1,346 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{
+    ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, 
Decimal256Array, Float32Array,
+    Float64Array, StringArray,
+};
+use arrow_buffer::i256;
+use arrow_schema::DataType::{
+    Decimal32, Decimal64, Decimal128, Decimal256, Float32, Float64, Int8, 
Int16, Int32, Int64,
+    UInt8, UInt16, UInt32, UInt64,
+};
+use criterion::*;
+use rand::prelude::StdRng;
+use rand::{Rng, SeedableRng};
+use std::sync::Arc;
+
+fn cast_string_from_decimals(c: &mut Criterion) {
+    let total_records = 10_000;
+    let mut rng = StdRng::seed_from_u64(42);
+    let str_array = StringArray::from_iter((0..total_records).map(|x| match x 
% 20 {
+        0 => None,
+        1 => Some("".to_string()),
+        2 => Some(" ".to_string()),
+        3 => Some("-1.-23499999".to_string()),
+        4 => Some("--1.23456789".to_string()),
+        5 => Some("1.-23456789".to_string()),
+        6 => Some("000.123".to_string()),
+        7 => Some("+123".to_string()),
+        8 => Some("+123.12345000".to_string()),
+        9 => Some("0".to_string()),
+        10 => Some("000.000".to_string()),
+        11 => Some("0000000000000000012345.000".to_string()),
+        _ => Some(format!("{:.6}", f64::from(x) * rng.random::<f64>())),
+    }));
+    let array = Arc::new(str_array) as ArrayRef;
+
+    let bench_suite = [
+        ("string2decimal32(9, 2)", Decimal32(9, 2)),
+        ("string2decimal64(18, 2)", Decimal64(18, 2)),
+        ("string2decimal128(38, 3)", Decimal128(38, 3)),
+        ("string2decimal256(76, 4)", Decimal256(76, 4)),
+    ];
+    for bench in bench_suite {
+        c.bench_function(bench.0, |b| {
+            b.iter(|| {
+                let r = arrow_cast::cast(&array, &bench.1);
+                std::hint::black_box(r)
+            })
+        });
+    }
+}
+
+fn cast_float_to_decimals(c: &mut Criterion) {
+    let total_records = 50_000;
+    let mut rng = StdRng::seed_from_u64(42);
+    let bench_suite_float32 = [
+        ("float32_to_decimal32(9, 2)", Decimal32(9, 2)),
+        ("float32_to_decimal64(18, 2)", Decimal64(18, 2)),
+        ("float32_to_decimal128(38, 3)", Decimal128(38, 3)),
+        ("float32_to_decimal256(76, 4)", Decimal256(76, 4)),
+    ];
+
+    let float32_array = Float32Array::from_iter((0..total_records).map(|x| 
match x % 10 {
+        0 => None,
+        1 => Some(f32::MIN),
+        2 => Some(f32::MAX),
+        _ => match x % 2 {
+            0 => Some((x as f32) * rng.random::<f32>()),
+            _ => Some(-(x as f32) * rng.random::<f32>()),
+        },
+    }));
+    for bench in &bench_suite_float32 {
+        c.bench_function(bench.0, |b| {
+            b.iter(|| {
+                let r = arrow_cast::cast(&float32_array, &bench.1);
+                std::hint::black_box(r)
+            })
+        });
+    }
+
+    let bench_suite_float64 = [
+        ("float64_to_decimal32(9, 2)", Decimal32(9, 2)),
+        ("float64_to_decimal64(18, 4)", Decimal64(18, 2)),
+        ("float64_to_decimal128(38, 3)", Decimal128(38, 3)),
+        ("float64_to_decimal256(76, 4)", Decimal256(76, 4)),
+    ];
+
+    rng = StdRng::seed_from_u64(42);
+    let float64_array = Float64Array::from_iter((0..total_records).map(|x| 
match x % 10 {
+        0 => None,
+        1 => Some(f64::MIN),
+        2 => Some(f64::MAX),
+        _ => match x % 2 {
+            0 => Some(f64::from(x) * rng.random::<f64>()),
+            _ => Some(-f64::from(x) * rng.random::<f64>()),
+        },
+    }));
+    for bench in &bench_suite_float64 {
+        c.bench_function(bench.0, |b| {
+            b.iter(|| {
+                let r = arrow_cast::cast(&float64_array, &bench.1);
+                std::hint::black_box(r)
+            })
+        });
+    }
+}
+
+fn cast_decimal_to_float(c: &mut Criterion) {
+    let total_records = 100_000;
+    let mut rng = StdRng::seed_from_u64(42);
+    let decimal32_array = Decimal32Array::from_iter((0..total_records).map(|x| 
match x % 10 {
+        0 => None,
+        1 => Some(i32::MIN),
+        2 => Some(i32::MAX),

Review Comment:
   These are out of gamut values, so 20% of the values will exercise error path



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to