This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2f68741  ARROW-11039: [Rust] Performance improvement for utf-8 to 
float cast
2f68741 is described below

commit 2f6874158e3d94bd5eb31765c2550dce8d015c19
Author: Daniël Heres <[email protected]>
AuthorDate: Sun Dec 27 10:41:12 2020 +0000

    ARROW-11039: [Rust] Performance improvement for utf-8 to float cast
    
    Utilize `lexical_core::parse` for faster parsing.
    
    ```
    cast utf8 to f32        time:   [25.840 us 25.878 us 25.921 us]
                            change: [-45.735% -45.590% -45.408%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 7 outliers among 100 measurements (7.00%)
      1 (1.00%) low mild
      3 (3.00%) high mild
      3 (3.00%) high severe
    ```
    
    Closes #9018 from Dandandan/perf_cast_float
    
    Lead-authored-by: Daniël Heres <[email protected]>
    Co-authored-by: Heres, Daniel <[email protected]>
    Signed-off-by: Jorge C. Leitao <[email protected]>
---
 rust/arrow/benches/cast_kernels.rs     |  6 ++++++
 rust/arrow/src/compute/kernels/cast.rs | 13 ++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/rust/arrow/benches/cast_kernels.rs 
b/rust/arrow/benches/cast_kernels.rs
index 7f6acd2..b9e33cc 100644
--- a/rust/arrow/benches/cast_kernels.rs
+++ b/rust/arrow/benches/cast_kernels.rs
@@ -120,6 +120,8 @@ fn add_benchmark(c: &mut Criterion) {
     let i32_array = build_array::<Int32Type>(512);
     let i64_array = build_array::<Int64Type>(512);
     let f32_array = build_array::<Float32Type>(512);
+    let f32_utf8_array = cast(&build_array::<Float32Type>(512), 
&DataType::Utf8).unwrap();
+
     let f64_array = build_array::<Float64Type>(512);
     let date64_array = build_array::<Date64Type>(512);
     let date32_array = build_array::<Date32Type>(512);
@@ -188,6 +190,10 @@ fn add_benchmark(c: &mut Criterion) {
             )
         })
     });
+    c.bench_function("cast utf8 to f32", |b| {
+        b.iter(|| cast_array(&f32_utf8_array, DataType::Float32))
+    });
+
     c.bench_function("cast timestamp_ms to i64 512", |b| {
         b.iter(|| cast_array(&time_ms_array, DataType::Int64))
     });
diff --git a/rust/arrow/src/compute/kernels/cast.rs 
b/rust/arrow/src/compute/kernels/cast.rs
index 1dfcc1b..028f115 100644
--- a/rust/arrow/src/compute/kernels/cast.rs
+++ b/rust/arrow/src/compute/kernels/cast.rs
@@ -923,11 +923,12 @@ where
 }
 
 /// Cast numeric types to Utf8
-fn cast_string_to_numeric<TO>(from: &ArrayRef) -> Result<ArrayRef>
+fn cast_string_to_numeric<T>(from: &ArrayRef) -> Result<ArrayRef>
 where
-    TO: ArrowNumericType,
+    T: ArrowNumericType,
+    <T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
 {
-    Ok(Arc::new(string_to_numeric_cast::<TO>(
+    Ok(Arc::new(string_to_numeric_cast::<T>(
         from.as_any().downcast_ref::<StringArray>().unwrap(),
     )))
 }
@@ -935,16 +936,14 @@ where
 fn string_to_numeric_cast<T>(from: &StringArray) -> PrimitiveArray<T>
 where
     T: ArrowNumericType,
+    <T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
 {
     (0..from.len())
         .map(|i| {
             if from.is_null(i) {
                 None
             } else {
-                match from.value(i).parse::<T::Native>() {
-                    Ok(v) => Some(v),
-                    Err(_) => None,
-                }
+                lexical_core::parse(from.value(i).as_bytes()).ok()
             }
         })
         .collect()

Reply via email to