This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new ecb5a91448 vectorize random() scalar function (#12078)
ecb5a91448 is described below

commit ecb5a9144819e3954c073eebcb4bc2bb5a173cf1
Author: Yongting You <[email protected]>
AuthorDate: Wed Aug 21 02:33:58 2024 +0800

    vectorize random() scalar function (#12078)
---
 datafusion/functions/Cargo.toml         |  5 ++++
 datafusion/functions/benches/random.rs  | 49 +++++++++++++++++++++++++++++++++
 datafusion/functions/src/math/random.rs |  7 +++--
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 2b3f80fc93..9ef020b772 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -156,3 +156,8 @@ required-features = ["unicode_expressions"]
 harness = false
 name = "repeat"
 required-features = ["string_expressions"]
+
+[[bench]]
+harness = false
+name = "random"
+required-features = ["math_expressions"]
diff --git a/datafusion/functions/benches/random.rs 
b/datafusion/functions/benches/random.rs
new file mode 100644
index 0000000000..a721836bb6
--- /dev/null
+++ b/datafusion/functions/benches/random.rs
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ScalarUDFImpl;
+use datafusion_functions::math::random::RandomFunc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let random_func = RandomFunc::new();
+
+    // Benchmark to evaluate 1M rows in batch size 8192
+    let iterations = 1_000_000 / 8192; // Calculate how many iterations are 
needed to reach approximately 1M rows
+    c.bench_function("random_1M_rows_batch_8192", |b| {
+        b.iter(|| {
+            for _ in 0..iterations {
+                black_box(random_func.invoke_no_args(8192).unwrap());
+            }
+        })
+    });
+
+    // Benchmark to evaluate 1M rows in batch size 128
+    let iterations_128 = 1_000_000 / 128; // Calculate how many iterations are 
needed to reach approximately 1M rows with batch size 128
+    c.bench_function("random_1M_rows_batch_128", |b| {
+        b.iter(|| {
+            for _ in 0..iterations_128 {
+                black_box(random_func.invoke_no_args(128).unwrap());
+            }
+        })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/random.rs 
b/datafusion/functions/src/math/random.rs
index b5eece212a..20591a02a9 100644
--- a/datafusion/functions/src/math/random.rs
+++ b/datafusion/functions/src/math/random.rs
@@ -69,8 +69,11 @@ impl ScalarUDFImpl for RandomFunc {
 
     fn invoke_no_args(&self, num_rows: usize) -> Result<ColumnarValue> {
         let mut rng = thread_rng();
-        let values = std::iter::repeat_with(|| 
rng.gen_range(0.0..1.0)).take(num_rows);
-        let array = Float64Array::from_iter_values(values);
+        let mut values = vec![0.0; num_rows];
+        // Equivalent to set each element with rng.gen_range(0.0..1.0), but 
more efficient
+        rng.fill(&mut values[..]);
+        let array = Float64Array::from(values);
+
         Ok(ColumnarValue::Array(Arc::new(array)))
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to