This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 1936774ed1 Optimize `isnan` (2-5x faster) (#12889)
1936774ed1 is described below
commit 1936774ed160b7062783510020ca75ac90d37762
Author: Simon Vandel Sillesen <[email protected]>
AuthorDate: Tue Oct 15 12:44:09 2024 +0200
Optimize `isnan` (2-5x faster) (#12889)
* add bench
* optimize isnan
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/functions/Cargo.toml | 5 ++++
datafusion/functions/benches/isnan.rs | 46 +++++++++++++++++++++++++++++++++++
datafusion/functions/src/math/nans.rs | 29 +++++++++-------------
3 files changed, 63 insertions(+), 17 deletions(-)
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index e08dfb2de0..c1852329e6 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -137,6 +137,11 @@ harness = false
name = "to_char"
required-features = ["datetime_expressions"]
+[[bench]]
+harness = false
+name = "isnan"
+required-features = ["math_expressions"]
+
[[bench]]
harness = false
name = "signum"
diff --git a/datafusion/functions/benches/isnan.rs
b/datafusion/functions/benches/isnan.rs
new file mode 100644
index 0000000000..16bbe073da
--- /dev/null
+++ b/datafusion/functions/benches/isnan.rs
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::{
+ datatypes::{Float32Type, Float64Type},
+ util::bench_util::create_primitive_array,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::math::isnan;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let isnan = isnan();
+ for size in [1024, 4096, 8192] {
+ let f32_array = Arc::new(create_primitive_array::<Float32Type>(size,
0.2));
+ let f32_args = vec![ColumnarValue::Array(f32_array)];
+ c.bench_function(&format!("isnan f32 array: {}", size), |b| {
+ b.iter(|| black_box(isnan.invoke(&f32_args).unwrap()))
+ });
+ let f64_array = Arc::new(create_primitive_array::<Float64Type>(size,
0.2));
+ let f64_args = vec![ColumnarValue::Array(f64_array)];
+ c.bench_function(&format!("isnan f64 array: {}", size), |b| {
+ b.iter(|| black_box(isnan.invoke(&f64_args).unwrap()))
+ });
+ }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/nans.rs
b/datafusion/functions/src/math/nans.rs
index b02839b40b..07747418ea 100644
--- a/datafusion/functions/src/math/nans.rs
+++ b/datafusion/functions/src/math/nans.rs
@@ -17,11 +17,11 @@
//! Math function: `isnan()`.
-use arrow::datatypes::DataType;
-use datafusion_common::{exec_err, DataFusionError, Result};
+use arrow::datatypes::{DataType, Float32Type, Float64Type};
+use datafusion_common::{exec_err, Result};
use datafusion_expr::{ColumnarValue, TypeSignature};
-use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
+use arrow::array::{ArrayRef, AsArray, BooleanArray};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use std::any::Any;
use std::sync::Arc;
@@ -72,20 +72,15 @@ impl ScalarUDFImpl for IsNanFunc {
let args = ColumnarValue::values_to_arrays(args)?;
let arr: ArrayRef = match args[0].data_type() {
- DataType::Float64 =>
Arc::new(make_function_scalar_inputs_return_type!(
- &args[0],
- self.name(),
- Float64Array,
- BooleanArray,
- { f64::is_nan }
- )),
- DataType::Float32 =>
Arc::new(make_function_scalar_inputs_return_type!(
- &args[0],
- self.name(),
- Float32Array,
- BooleanArray,
- { f32::is_nan }
- )),
+ DataType::Float64 => Arc::new(BooleanArray::from_unary(
+ args[0].as_primitive::<Float64Type>(),
+ f64::is_nan,
+ )) as ArrayRef,
+
+ DataType::Float32 => Arc::new(BooleanArray::from_unary(
+ args[0].as_primitive::<Float32Type>(),
+ f32::is_nan,
+ )) as ArrayRef,
other => {
return exec_err!(
"Unsupported data type {other:?} for function {}",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]