This is an automated email from the ASF dual-hosted git repository.
jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 1582e8d9cc Optimize `iszero` function (3-5x faster) (#12881)
1582e8d9cc is described below
commit 1582e8d9cc0d307bdc3311cd22566c66fb6b840f
Author: Simon Vandel Sillesen <[email protected]>
AuthorDate: Sun Oct 13 06:02:36 2024 +0200
Optimize `iszero` function (3-5x faster) (#12881)
* add bench
* Optimize iszero function (3-5x) faster
---
datafusion/functions/Cargo.toml | 5 ++++
datafusion/functions/benches/iszero.rs | 46 +++++++++++++++++++++++++++++++++
datafusion/functions/src/math/iszero.rs | 24 +++++++----------
3 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index a3d114221d..2ffe93a0e5 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -117,6 +117,11 @@ harness = false
name = "make_date"
required-features = ["datetime_expressions"]
+[[bench]]
+harness = false
+name = "iszero"
+required-features = ["math_expressions"]
+
[[bench]]
harness = false
name = "nullif"
diff --git a/datafusion/functions/benches/iszero.rs
b/datafusion/functions/benches/iszero.rs
new file mode 100644
index 0000000000..3348d172e1
--- /dev/null
+++ b/datafusion/functions/benches/iszero.rs
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::{
+ datatypes::{Float32Type, Float64Type},
+ util::bench_util::create_primitive_array,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::math::iszero;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let iszero = iszero();
+ for size in [1024, 4096, 8192] {
+ let f32_array = Arc::new(create_primitive_array::<Float32Type>(size,
0.2));
+ let f32_args = vec![ColumnarValue::Array(f32_array)];
+ c.bench_function(&format!("iszero f32 array: {}", size), |b| {
+ b.iter(|| black_box(iszero.invoke(&f32_args).unwrap()))
+ });
+ let f64_array = Arc::new(create_primitive_array::<Float64Type>(size,
0.2));
+ let f64_args = vec![ColumnarValue::Array(f64_array)];
+ c.bench_function(&format!("iszero f64 array: {}", size), |b| {
+ b.iter(|| black_box(iszero.invoke(&f64_args).unwrap()))
+ });
+ }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/iszero.rs
b/datafusion/functions/src/math/iszero.rs
index e6a7280533..74611b65aa 100644
--- a/datafusion/functions/src/math/iszero.rs
+++ b/datafusion/functions/src/math/iszero.rs
@@ -18,11 +18,11 @@
use std::any::Any;
use std::sync::Arc;
-use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
-use arrow::datatypes::DataType;
+use arrow::array::{ArrayRef, AsArray, BooleanArray};
use arrow::datatypes::DataType::{Boolean, Float32, Float64};
+use arrow::datatypes::{DataType, Float32Type, Float64Type};
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{exec_err, Result};
use datafusion_expr::ColumnarValue;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
@@ -77,20 +77,14 @@ impl ScalarUDFImpl for IsZeroFunc {
/// Iszero SQL function
pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
- Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
- &args[0],
- "x",
- Float64Array,
- BooleanArray,
- { |x: f64| { x == 0_f64 } }
+ Float64 => Ok(Arc::new(BooleanArray::from_unary(
+ args[0].as_primitive::<Float64Type>(),
+ |x| x == 0.0,
)) as ArrayRef),
- Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
- &args[0],
- "x",
- Float32Array,
- BooleanArray,
- { |x: f32| { x == 0_f32 } }
+ Float32 => Ok(Arc::new(BooleanArray::from_unary(
+ args[0].as_primitive::<Float32Type>(),
+ |x| x == 0.0,
)) as ArrayRef),
other => exec_err!("Unsupported data type {other:?} for function
iszero"),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]