vertexclique commented on a change in pull request #8630:
URL: https://github.com/apache/arrow/pull/8630#discussion_r521101147
##########
File path: rust/arrow/benches/filter_kernels.rs
##########
@@ -14,137 +14,136 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
+extern crate arrow;
+
+use rand::{
+ distributions::{Alphanumeric, Standard},
+ prelude::Distribution,
+ Rng,
+};
use arrow::array::*;
-use arrow::compute::{filter, FilterContext};
+use arrow::compute::{build_filter, filter};
use arrow::datatypes::ArrowNumericType;
+use arrow::datatypes::{Float32Type, UInt8Type};
+
use criterion::{criterion_group, criterion_main, Criterion};
-fn create_primitive_array<T, F>(size: usize, value_fn: F) -> PrimitiveArray<T>
+fn create_primitive_array<T>(size: usize, null_density: f32) ->
PrimitiveArray<T>
where
T: ArrowNumericType,
- F: Fn(usize) -> T::Native,
+ Standard: Distribution<T::Native>,
{
+ // use random numbers to avoid spurious compiler optimizations wrt to
branching
+ let mut rng = rand::thread_rng();
let mut builder = PrimitiveArray::<T>::builder(size);
- for i in 0..size {
- builder.append_value(value_fn(i)).unwrap();
+
+ for _ in 0..size {
+ if rng.gen::<f32>() < null_density {
+ builder.append_null().unwrap();
+ } else {
+ builder.append_value(rng.gen()).unwrap();
+ }
}
builder.finish()
}
-fn create_u8_array_with_nulls(size: usize) -> UInt8Array {
- let mut builder = UInt8Builder::new(size);
- for i in 0..size {
- if i % 2 == 0 {
- builder.append_value(1).unwrap();
- } else {
+fn create_string_array(size: usize, null_density: f32) -> StringArray {
+ // use random numbers to avoid spurious compiler optimizations wrt to
branching
+ let mut rng = rand::thread_rng();
+ let mut builder = StringBuilder::new(size);
+
+ for _ in 0..size {
+ if rng.gen::<f32>() < null_density {
builder.append_null().unwrap();
+ } else {
+ let value =
rng.sample_iter(&Alphanumeric).take(10).collect::<String>();
+ builder.append_value(&value).unwrap();
}
}
builder.finish()
}
-fn create_bool_array<F>(size: usize, value_fn: F) -> BooleanArray
-where
- F: Fn(usize) -> bool,
-{
+fn create_bool_array(size: usize, trues_density: f32) -> BooleanArray {
+ let mut rng = rand::thread_rng();
let mut builder = BooleanBuilder::new(size);
- for i in 0..size {
- builder.append_value(value_fn(i)).unwrap();
+ for _ in 0..size {
+ let value = rng.gen::<f32>() < trues_density;
Review comment:
> I agree. Would freezing it with a seed address the concern? My main
concern with if i % 2 == 0 and the like is that these are highly predictable
patterns and unlikely in real world situations. This predictability can make
our benchmarks not very informative as they are benchmarking speculative
execution and other optimizations, not the code (and again, these patterns are
unlikely in real-world).
thread id is xored with the seed, thread_rng doesn't fit reproducible
benchmarks point of view, so check out the pr I've opened @jorgecarleitao .
tell me what you think.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]