This is an automated email from the ASF dual-hosted git repository.
jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2f68741 ARROW-11039: [Rust] Performance improvement for utf-8 to
float cast
2f68741 is described below
commit 2f6874158e3d94bd5eb31765c2550dce8d015c19
Author: Daniël Heres <[email protected]>
AuthorDate: Sun Dec 27 10:41:12 2020 +0000
ARROW-11039: [Rust] Performance improvement for utf-8 to float cast
Utilize `lexical_core::parse` for faster parsing.
```
cast utf8 to f32 time: [25.840 us 25.878 us 25.921 us]
change: [-45.735% -45.590% -45.408%] (p = 0.00 <
0.05)
Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
1 (1.00%) low mild
3 (3.00%) high mild
3 (3.00%) high severe
```
Closes #9018 from Dandandan/perf_cast_float
Lead-authored-by: Daniël Heres <[email protected]>
Co-authored-by: Heres, Daniel <[email protected]>
Signed-off-by: Jorge C. Leitao <[email protected]>
---
rust/arrow/benches/cast_kernels.rs | 6 ++++++
rust/arrow/src/compute/kernels/cast.rs | 13 ++++++-------
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/rust/arrow/benches/cast_kernels.rs
b/rust/arrow/benches/cast_kernels.rs
index 7f6acd2..b9e33cc 100644
--- a/rust/arrow/benches/cast_kernels.rs
+++ b/rust/arrow/benches/cast_kernels.rs
@@ -120,6 +120,8 @@ fn add_benchmark(c: &mut Criterion) {
let i32_array = build_array::<Int32Type>(512);
let i64_array = build_array::<Int64Type>(512);
let f32_array = build_array::<Float32Type>(512);
+ let f32_utf8_array = cast(&build_array::<Float32Type>(512),
&DataType::Utf8).unwrap();
+
let f64_array = build_array::<Float64Type>(512);
let date64_array = build_array::<Date64Type>(512);
let date32_array = build_array::<Date32Type>(512);
@@ -188,6 +190,10 @@ fn add_benchmark(c: &mut Criterion) {
)
})
});
+ c.bench_function("cast utf8 to f32", |b| {
+ b.iter(|| cast_array(&f32_utf8_array, DataType::Float32))
+ });
+
c.bench_function("cast timestamp_ms to i64 512", |b| {
b.iter(|| cast_array(&time_ms_array, DataType::Int64))
});
diff --git a/rust/arrow/src/compute/kernels/cast.rs
b/rust/arrow/src/compute/kernels/cast.rs
index 1dfcc1b..028f115 100644
--- a/rust/arrow/src/compute/kernels/cast.rs
+++ b/rust/arrow/src/compute/kernels/cast.rs
@@ -923,11 +923,12 @@ where
}
/// Cast numeric types to Utf8
-fn cast_string_to_numeric<TO>(from: &ArrayRef) -> Result<ArrayRef>
+fn cast_string_to_numeric<T>(from: &ArrayRef) -> Result<ArrayRef>
where
- TO: ArrowNumericType,
+ T: ArrowNumericType,
+ <T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
- Ok(Arc::new(string_to_numeric_cast::<TO>(
+ Ok(Arc::new(string_to_numeric_cast::<T>(
from.as_any().downcast_ref::<StringArray>().unwrap(),
)))
}
@@ -935,16 +936,14 @@ where
fn string_to_numeric_cast<T>(from: &StringArray) -> PrimitiveArray<T>
where
T: ArrowNumericType,
+ <T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
(0..from.len())
.map(|i| {
if from.is_null(i) {
None
} else {
- match from.value(i).parse::<T::Native>() {
- Ok(v) => Some(v),
- Err(_) => None,
- }
+ lexical_core::parse(from.value(i).as_bytes()).ok()
}
})
.collect()