This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new ecb5a91448 vectorize random() scalar function (#12078)
ecb5a91448 is described below
commit ecb5a9144819e3954c073eebcb4bc2bb5a173cf1
Author: Yongting You <[email protected]>
AuthorDate: Wed Aug 21 02:33:58 2024 +0800
vectorize random() scalar function (#12078)
---
datafusion/functions/Cargo.toml | 5 ++++
datafusion/functions/benches/random.rs | 49 +++++++++++++++++++++++++++++++++
datafusion/functions/src/math/random.rs | 7 +++--
3 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 2b3f80fc93..9ef020b772 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -156,3 +156,8 @@ required-features = ["unicode_expressions"]
harness = false
name = "repeat"
required-features = ["string_expressions"]
+
+[[bench]]
+harness = false
+name = "random"
+required-features = ["math_expressions"]
diff --git a/datafusion/functions/benches/random.rs
b/datafusion/functions/benches/random.rs
new file mode 100644
index 0000000000..a721836bb6
--- /dev/null
+++ b/datafusion/functions/benches/random.rs
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ScalarUDFImpl;
+use datafusion_functions::math::random::RandomFunc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let random_func = RandomFunc::new();
+
+ // Benchmark to evaluate 1M rows in batch size 8192
+ let iterations = 1_000_000 / 8192; // Calculate how many iterations are
needed to reach approximately 1M rows
+ c.bench_function("random_1M_rows_batch_8192", |b| {
+ b.iter(|| {
+ for _ in 0..iterations {
+ black_box(random_func.invoke_no_args(8192).unwrap());
+ }
+ })
+ });
+
+ // Benchmark to evaluate 1M rows in batch size 128
+ let iterations_128 = 1_000_000 / 128; // Calculate how many iterations are
needed to reach approximately 1M rows with batch size 128
+ c.bench_function("random_1M_rows_batch_128", |b| {
+ b.iter(|| {
+ for _ in 0..iterations_128 {
+ black_box(random_func.invoke_no_args(128).unwrap());
+ }
+ })
+ });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/random.rs
b/datafusion/functions/src/math/random.rs
index b5eece212a..20591a02a9 100644
--- a/datafusion/functions/src/math/random.rs
+++ b/datafusion/functions/src/math/random.rs
@@ -69,8 +69,11 @@ impl ScalarUDFImpl for RandomFunc {
fn invoke_no_args(&self, num_rows: usize) -> Result<ColumnarValue> {
let mut rng = thread_rng();
- let values = std::iter::repeat_with(||
rng.gen_range(0.0..1.0)).take(num_rows);
- let array = Float64Array::from_iter_values(values);
+ let mut values = vec![0.0; num_rows];
+ // Equivalent to set each element with rng.gen_range(0.0..1.0), but
more efficient
+ rng.fill(&mut values[..]);
+ let array = Float64Array::from(values);
+
Ok(ColumnarValue::Array(Arc::new(array)))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]