This is an automated email from the ASF dual-hosted git repository.

jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 1582e8d9cc Optimize `iszero` function (3-5x faster) (#12881)
1582e8d9cc is described below

commit 1582e8d9cc0d307bdc3311cd22566c66fb6b840f
Author: Simon Vandel Sillesen <[email protected]>
AuthorDate: Sun Oct 13 06:02:36 2024 +0200

    Optimize `iszero` function (3-5x faster) (#12881)
    
    * add bench
    
    * Optimize iszero function (3-5x) faster
---
 datafusion/functions/Cargo.toml         |  5 ++++
 datafusion/functions/benches/iszero.rs  | 46 +++++++++++++++++++++++++++++++++
 datafusion/functions/src/math/iszero.rs | 24 +++++++----------
 3 files changed, 60 insertions(+), 15 deletions(-)

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index a3d114221d..2ffe93a0e5 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -117,6 +117,11 @@ harness = false
 name = "make_date"
 required-features = ["datetime_expressions"]
 
+[[bench]]
+harness = false
+name = "iszero"
+required-features = ["math_expressions"]
+
 [[bench]]
 harness = false
 name = "nullif"
diff --git a/datafusion/functions/benches/iszero.rs 
b/datafusion/functions/benches/iszero.rs
new file mode 100644
index 0000000000..3348d172e1
--- /dev/null
+++ b/datafusion/functions/benches/iszero.rs
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::{
+    datatypes::{Float32Type, Float64Type},
+    util::bench_util::create_primitive_array,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::math::iszero;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let iszero = iszero();
+    for size in [1024, 4096, 8192] {
+        let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 
0.2));
+        let f32_args = vec![ColumnarValue::Array(f32_array)];
+        c.bench_function(&format!("iszero f32 array: {}", size), |b| {
+            b.iter(|| black_box(iszero.invoke(&f32_args).unwrap()))
+        });
+        let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 
0.2));
+        let f64_args = vec![ColumnarValue::Array(f64_array)];
+        c.bench_function(&format!("iszero f64 array: {}", size), |b| {
+            b.iter(|| black_box(iszero.invoke(&f64_args).unwrap()))
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/iszero.rs 
b/datafusion/functions/src/math/iszero.rs
index e6a7280533..74611b65aa 100644
--- a/datafusion/functions/src/math/iszero.rs
+++ b/datafusion/functions/src/math/iszero.rs
@@ -18,11 +18,11 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
-use arrow::datatypes::DataType;
+use arrow::array::{ArrayRef, AsArray, BooleanArray};
 use arrow::datatypes::DataType::{Boolean, Float32, Float64};
+use arrow::datatypes::{DataType, Float32Type, Float64Type};
 
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -77,20 +77,14 @@ impl ScalarUDFImpl for IsZeroFunc {
 /// Iszero SQL function
 pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args[0].data_type() {
-        Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
-            &args[0],
-            "x",
-            Float64Array,
-            BooleanArray,
-            { |x: f64| { x == 0_f64 } }
+        Float64 => Ok(Arc::new(BooleanArray::from_unary(
+            args[0].as_primitive::<Float64Type>(),
+            |x| x == 0.0,
         )) as ArrayRef),
 
-        Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
-            &args[0],
-            "x",
-            Float32Array,
-            BooleanArray,
-            { |x: f32| { x == 0_f32 } }
+        Float32 => Ok(Arc::new(BooleanArray::from_unary(
+            args[0].as_primitive::<Float32Type>(),
+            |x| x == 0.0,
         )) as ArrayRef),
 
         other => exec_err!("Unsupported data type {other:?} for function 
iszero"),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to