This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 1936774ed1 Optimize `isnan` (2-5x faster) (#12889)
1936774ed1 is described below

commit 1936774ed160b7062783510020ca75ac90d37762
Author: Simon Vandel Sillesen <[email protected]>
AuthorDate: Tue Oct 15 12:44:09 2024 +0200

    Optimize `isnan` (2-5x faster) (#12889)
    
    * add bench
    
    * optimize isnan
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions/Cargo.toml       |  5 ++++
 datafusion/functions/benches/isnan.rs | 46 +++++++++++++++++++++++++++++++++++
 datafusion/functions/src/math/nans.rs | 29 +++++++++-------------
 3 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index e08dfb2de0..c1852329e6 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -137,6 +137,11 @@ harness = false
 name = "to_char"
 required-features = ["datetime_expressions"]
 
+[[bench]]
+harness = false
+name = "isnan"
+required-features = ["math_expressions"]
+
 [[bench]]
 harness = false
 name = "signum"
diff --git a/datafusion/functions/benches/isnan.rs 
b/datafusion/functions/benches/isnan.rs
new file mode 100644
index 0000000000..16bbe073da
--- /dev/null
+++ b/datafusion/functions/benches/isnan.rs
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::{
+    datatypes::{Float32Type, Float64Type},
+    util::bench_util::create_primitive_array,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::math::isnan;
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let isnan = isnan();
+    for size in [1024, 4096, 8192] {
+        let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 
0.2));
+        let f32_args = vec![ColumnarValue::Array(f32_array)];
+        c.bench_function(&format!("isnan f32 array: {}", size), |b| {
+            b.iter(|| black_box(isnan.invoke(&f32_args).unwrap()))
+        });
+        let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 
0.2));
+        let f64_args = vec![ColumnarValue::Array(f64_array)];
+        c.bench_function(&format!("isnan f64 array: {}", size), |b| {
+            b.iter(|| black_box(isnan.invoke(&f64_args).unwrap()))
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/nans.rs 
b/datafusion/functions/src/math/nans.rs
index b02839b40b..07747418ea 100644
--- a/datafusion/functions/src/math/nans.rs
+++ b/datafusion/functions/src/math/nans.rs
@@ -17,11 +17,11 @@
 
 //! Math function: `isnan()`.
 
-use arrow::datatypes::DataType;
-use datafusion_common::{exec_err, DataFusionError, Result};
+use arrow::datatypes::{DataType, Float32Type, Float64Type};
+use datafusion_common::{exec_err, Result};
 use datafusion_expr::{ColumnarValue, TypeSignature};
 
-use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
+use arrow::array::{ArrayRef, AsArray, BooleanArray};
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::Arc;
@@ -72,20 +72,15 @@ impl ScalarUDFImpl for IsNanFunc {
         let args = ColumnarValue::values_to_arrays(args)?;
 
         let arr: ArrayRef = match args[0].data_type() {
-            DataType::Float64 => 
Arc::new(make_function_scalar_inputs_return_type!(
-                &args[0],
-                self.name(),
-                Float64Array,
-                BooleanArray,
-                { f64::is_nan }
-            )),
-            DataType::Float32 => 
Arc::new(make_function_scalar_inputs_return_type!(
-                &args[0],
-                self.name(),
-                Float32Array,
-                BooleanArray,
-                { f32::is_nan }
-            )),
+            DataType::Float64 => Arc::new(BooleanArray::from_unary(
+                args[0].as_primitive::<Float64Type>(),
+                f64::is_nan,
+            )) as ArrayRef,
+
+            DataType::Float32 => Arc::new(BooleanArray::from_unary(
+                args[0].as_primitive::<Float32Type>(),
+                f32::is_nan,
+            )) as ArrayRef,
             other => {
                 return exec_err!(
                     "Unsupported data type {other:?} for function {}",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to