(datafusion) branch main updated: Optimize performance of math::trunc (#12909)

alamb Wed, 16 Oct 2024 10:13:32 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new caeabc1d19 Optimize performance of math::trunc (#12909)
caeabc1d19 is described below

commit caeabc1d198b54cd6a11d20f896ae880d8da2d50
Author: Tai Le Manh <[email protected]>
AuthorDate: Thu Oct 17 00:13:22 2024 +0700

    Optimize performance of math::trunc (#12909)
    
    Signed-off-by: Tai Le Manh <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions/Cargo.toml        |  5 +++
 datafusion/functions/benches/trunc.rs  | 47 +++++++++++++++++++
 datafusion/functions/src/math/trunc.rs | 82 +++++++++++++++++++++-------------
 3 files changed, 104 insertions(+), 30 deletions(-)

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 4d7f14f9b1..6099ad62c1 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -201,3 +201,8 @@ required-features = ["math_expressions"]
 harness = false
 name = "strpos"
 required-features = ["unicode_expressions"]
+
+[[bench]]
+harness = false
+name = "trunc"
+required-features = ["math_expressions"]
diff --git a/datafusion/functions/benches/trunc.rs 
b/datafusion/functions/benches/trunc.rs
new file mode 100644
index 0000000000..92a08abf3d
--- /dev/null
+++ b/datafusion/functions/benches/trunc.rs
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::{
+    datatypes::{Float32Type, Float64Type},
+    util::bench_util::create_primitive_array,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::math::trunc;
+
+use std::sync::Arc;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let trunc = trunc();
+    for size in [1024, 4096, 8192] {
+        let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 
0.2));
+        let f32_args = vec![ColumnarValue::Array(f32_array)];
+        c.bench_function(&format!("trunc f32 array: {}", size), |b| {
+            b.iter(|| black_box(trunc.invoke(&f32_args).unwrap()))
+        });
+        let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 
0.2));
+        let f64_args = vec![ColumnarValue::Array(f64_array)];
+        c.bench_function(&format!("trunc f64 array: {}", size), |b| {
+            b.iter(|| black_box(trunc.invoke(&f64_args).unwrap()))
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/math/trunc.rs 
b/datafusion/functions/src/math/trunc.rs
index 355d1e52d6..17a8442031 100644
--- a/datafusion/functions/src/math/trunc.rs
+++ b/datafusion/functions/src/math/trunc.rs
@@ -20,11 +20,11 @@ use std::sync::{Arc, OnceLock};
 
 use crate::utils::make_scalar_function;
 
-use arrow::array::{ArrayRef, Float32Array, Float64Array, Int64Array};
-use arrow::datatypes::DataType;
+use arrow::array::{ArrayRef, AsArray, PrimitiveArray};
 use arrow::datatypes::DataType::{Float32, Float64};
+use arrow::datatypes::{DataType, Float32Type, Float64Type, Int64Type};
 use datafusion_common::ScalarValue::Int64;
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{exec_err, Result};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH;
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::TypeSignature::Exact;
@@ -139,8 +139,8 @@ fn trunc(args: &[ArrayRef]) -> Result<ArrayRef> {
         );
     }
 
-    //if only one arg then invoke toolchain trunc(num) and precision = 0 by 
default
-    //or then invoke the compute_truncate method to process precision
+    // If only one arg then invoke toolchain trunc(num) and precision = 0 by 
default
+    // or then invoke the compute_truncate method to process precision
     let num = &args[0];
     let precision = if args.len() == 1 {
         ColumnarValue::Scalar(Int64(Some(0)))
@@ -148,35 +148,57 @@ fn trunc(args: &[ArrayRef]) -> Result<ArrayRef> {
         ColumnarValue::Array(Arc::clone(&args[1]))
     };
 
-    match args[0].data_type() {
+    match num.data_type() {
         Float64 => match precision {
-            ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new(
-                make_function_scalar_inputs!(num, "num", Float64Array, { 
f64::trunc }),
-            ) as ArrayRef),
-            ColumnarValue::Array(precision) => 
Ok(Arc::new(make_function_inputs2!(
-                num,
-                precision,
-                "x",
-                "y",
-                Float64Array,
-                Int64Array,
-                { compute_truncate64 }
-            )) as ArrayRef),
+            ColumnarValue::Scalar(Int64(Some(0))) => {
+                Ok(Arc::new(
+                    args[0]
+                        .as_primitive::<Float64Type>()
+                        .unary::<_, Float64Type>(|x: f64| {
+                            if x == 0_f64 {
+                                0_f64
+                            } else {
+                                x.trunc()
+                            }
+                        }),
+                ) as ArrayRef)
+            }
+            ColumnarValue::Array(precision) => {
+                let num_array = num.as_primitive::<Float64Type>();
+                let precision_array = precision.as_primitive::<Int64Type>();
+                let result: PrimitiveArray<Float64Type> =
+                    arrow::compute::binary(num_array, precision_array, |x, y| {
+                        compute_truncate64(x, y)
+                    })?;
+
+                Ok(Arc::new(result) as ArrayRef)
+            }
             _ => exec_err!("trunc function requires a scalar or array for 
precision"),
         },
         Float32 => match precision {
-            ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new(
-                make_function_scalar_inputs!(num, "num", Float32Array, { 
f32::trunc }),
-            ) as ArrayRef),
-            ColumnarValue::Array(precision) => 
Ok(Arc::new(make_function_inputs2!(
-                num,
-                precision,
-                "x",
-                "y",
-                Float32Array,
-                Int64Array,
-                { compute_truncate32 }
-            )) as ArrayRef),
+            ColumnarValue::Scalar(Int64(Some(0))) => {
+                Ok(Arc::new(
+                    args[0]
+                        .as_primitive::<Float32Type>()
+                        .unary::<_, Float32Type>(|x: f32| {
+                            if x == 0_f32 {
+                                0_f32
+                            } else {
+                                x.trunc()
+                            }
+                        }),
+                ) as ArrayRef)
+            }
+            ColumnarValue::Array(precision) => {
+                let num_array = num.as_primitive::<Float32Type>();
+                let precision_array = precision.as_primitive::<Int64Type>();
+                let result: PrimitiveArray<Float32Type> =
+                    arrow::compute::binary(num_array, precision_array, |x, y| {
+                        compute_truncate32(x, y)
+                    })?;
+
+                Ok(Arc::new(result) as ArrayRef)
+            }
             _ => exec_err!("trunc function requires a scalar or array for 
precision"),
         },
         other => exec_err!("Unsupported data type {other:?} for function 
trunc"),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: Optimize performance of math::trunc (#12909)

Reply via email to