This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1f98388  ARROW-10785: [Rust] Optimize take string
1f98388 is described below

commit 1f98388f3bfe995d12f80abee9d307d2de723b63
Author: Heres, Daniel <[email protected]>
AuthorDate: Thu Dec 3 06:26:26 2020 +0200

    ARROW-10785: [Rust] Optimize take string
    
    Further optimizes take string to benefit from creating a buffer directly.
    
    Interestingly, it seems it is still faster to copy a `Vec`  than to use a 
buffer for the the string values. Presumably `extend_from_slice` is much slower 
than the `Vec` one.
    
    This PR:
    
    * pre allocates buffer for offsets
    * adds a special case for 0 null indices in which case we can skip a check
    * adding some benchmarks for string value arrays containing nulls
    
    Benchmark results
    
    ```
    take str 512            time:   [2.1942 us 2.1956 us 2.1971 us]
                            change: [-22.950% -22.847% -22.738%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 4 outliers among 100 measurements (4.00%)
      2 (2.00%) low severe
      1 (1.00%) high mild
      1 (1.00%) high severe
    
    take str 1024           time:   [3.7476 us 3.7529 us 3.7605 us]
                            change: [-23.958% -23.825% -23.678%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 7 outliers among 100 measurements (7.00%)
      1 (1.00%) low mild
      3 (3.00%) high mild
      3 (3.00%) high severe
    
    take str null indices 512
                            time:   [2.0983 us 2.1015 us 2.1056 us]
                            change: [-24.742% -24.387% -23.981%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 8 outliers among 100 measurements (8.00%)
      5 (5.00%) high mild
      3 (3.00%) high severe
    
    take str null indices 1024
                            time:   [3.6767 us 3.6788 us 3.6812 us]
                            change: [-22.588% -22.497% -22.406%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 2 outliers among 100 measurements (2.00%)
      2 (2.00%) high mild
    
    take str null values 1024
                            time:   [3.6732 us 3.6773 us 3.6836 us]
                            change: [-23.029% -22.857% -22.695%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 3 outliers among 100 measurements (3.00%)
      2 (2.00%) high mild
      1 (1.00%) high severe
    
    take str null values null indices 1024
                            time:   [5.3789 us 5.3821 us 5.3853 us]
                            change: [-10.711% -10.610% -10.509%] (p = 0.00 < 
0.05)
                            Performance has improved.
    Found 1 outliers among 100 measurements (1.00%)
    ```
    
    Closes #8814 from Dandandan/opt_take_string
    
    Authored-by: Heres, Daniel <[email protected]>
    Signed-off-by: Neville Dipale <[email protected]>
---
 rust/arrow/benches/take_kernels.rs     | 52 ++++++++++++++++++++------------
 rust/arrow/src/compute/kernels/take.rs | 55 ++++++++++++++++++++++++----------
 2 files changed, 73 insertions(+), 34 deletions(-)

diff --git a/rust/arrow/benches/take_kernels.rs 
b/rust/arrow/benches/take_kernels.rs
index d20f175..4838494 100644
--- a/rust/arrow/benches/take_kernels.rs
+++ b/rust/arrow/benches/take_kernels.rs
@@ -47,19 +47,20 @@ where
     Arc::new(array) as ArrayRef
 }
 
-fn create_strings(size: usize) -> ArrayRef {
-    let v = (0..size)
-        .map(|_| {
-            seedable_rng()
-                .sample_iter(&Alphanumeric)
-                .take(5)
-                .collect::<String>()
-        })
-        .collect::<Vec<_>>();
-
-    Arc::new(StringArray::from(
-        v.iter().map(|x| &**x).collect::<Vec<&str>>(),
-    ))
+fn create_strings(size: usize, null_density: f32) -> ArrayRef {
+    let rng = &mut seedable_rng();
+
+    let mut builder = StringBuilder::new(size);
+    for _ in 0..size {
+        let x = rng.gen::<f32>();
+        if x < null_density {
+            let value = 
rng.sample_iter(&Alphanumeric).take(4).collect::<String>();
+            builder.append_value(&value).unwrap();
+        } else {
+            builder.append_null().unwrap()
+        }
+    }
+    Arc::new(builder.finish())
 }
 
 fn create_random_index(size: usize, null_density: f32) -> UInt32Array {
@@ -122,25 +123,38 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_take(&values, &indices))
     });
 
-    let values = create_strings(512);
+    let values = create_strings(512, 0.0);
     let indices = create_random_index(512, 0.0);
     c.bench_function("take str 512", |b| b.iter(|| bench_take(&values, 
&indices)));
 
-    let values = create_strings(1024);
+    let values = create_strings(1024, 0.0);
     let indices = create_random_index(1024, 0.0);
     c.bench_function("take str 1024", |b| {
         b.iter(|| bench_take(&values, &indices))
     });
 
-    let values = create_strings(512);
+    let values = create_strings(512, 0.0);
     let indices = create_random_index(512, 0.5);
-    c.bench_function("take str nulls 512", |b| {
+    c.bench_function("take str null indices 512", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_strings(1024, 0.0);
+    let indices = create_random_index(1024, 0.5);
+    c.bench_function("take str null indices 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_strings(1024, 0.5);
+
+    let indices = create_random_index(1024, 0.0);
+    c.bench_function("take str null values 1024", |b| {
         b.iter(|| bench_take(&values, &indices))
     });
 
-    let values = create_strings(1024);
+    let values = create_strings(1024, 0.5);
     let indices = create_random_index(1024, 0.5);
-    c.bench_function("take str nulls 1024", |b| {
+    c.bench_function("take str null values null indices 1024", |b| {
         b.iter(|| bench_take(&values, &indices))
     });
 }
diff --git a/rust/arrow/src/compute/kernels/take.rs 
b/rust/arrow/src/compute/kernels/take.rs
index 2d9deba..7d3c490 100644
--- a/rust/arrow/src/compute/kernels/take.rs
+++ b/rust/arrow/src/compute/kernels/take.rs
@@ -366,17 +366,18 @@ where
         .downcast_ref::<GenericStringArray<OffsetSize>>()
         .unwrap();
 
-    let num_bytes = bit_util::ceil(data_len, 8);
+    let bytes_offset = (data_len + 1) * std::mem::size_of::<OffsetSize>();
+    let mut offsets_buffer = MutableBuffer::new(bytes_offset);
+    offsets_buffer.resize(bytes_offset);
 
-    let null_count = array.null_count();
-    let mut offsets = Vec::with_capacity(data_len + 1);
-    let mut values = Vec::with_capacity(data_len);
+    let offsets = offsets_buffer.typed_data_mut();
+    let mut values = Vec::with_capacity(bytes_offset);
     let mut length_so_far = OffsetSize::zero();
-    offsets.push(length_so_far);
+    offsets[0] = length_so_far;
 
     let nulls;
-    if null_count == 0 && indices.null_count() == 0 {
-        for i in 0..data_len {
+    if array.null_count() == 0 && indices.null_count() == 0 {
+        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
             let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| 
{
                 ArrowError::ComputeError("Cast to usize failed".to_string())
             })?;
@@ -385,11 +386,33 @@ where
 
             length_so_far += OffsetSize::from_usize(s.len()).unwrap();
             values.extend_from_slice(s.as_bytes());
-            offsets.push(length_so_far);
+            *offset = length_so_far;
         }
         nulls = None
-    } else if null_count == 0 {
-        for i in 0..data_len {
+    } else if indices.null_count() == 0 {
+        let num_bytes = bit_util::ceil(data_len, 8);
+
+        let mut null_buf = 
MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
+        let null_slice = null_buf.data_mut();
+
+        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
+            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| 
{
+                ArrowError::ComputeError("Cast to usize failed".to_string())
+            })?;
+
+            if array.is_valid(index) {
+                let s = array.value(index);
+
+                length_so_far += OffsetSize::from_usize(s.len()).unwrap();
+                values.extend_from_slice(s.as_bytes());
+            } else {
+                bit_util::unset_bit(null_slice, i);
+            }
+            *offset = length_so_far;
+        }
+        nulls = Some(null_buf.freeze());
+    } else if array.null_count() == 0 {
+        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
             if indices.is_valid(i) {
                 let index =
                     ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
@@ -401,14 +424,16 @@ where
                 length_so_far += OffsetSize::from_usize(s.len()).unwrap();
                 values.extend_from_slice(s.as_bytes());
             }
-            offsets.push(length_so_far);
+            *offset = length_so_far;
         }
         nulls = indices.data_ref().null_buffer().cloned();
     } else {
+        let num_bytes = bit_util::ceil(data_len, 8);
+
         let mut null_buf = 
MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
         let null_slice = null_buf.data_mut();
 
-        for i in 0..data_len {
+        for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
             let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| 
{
                 ArrowError::ComputeError("Cast to usize failed".to_string())
             })?;
@@ -422,7 +447,7 @@ where
                 // set null bit
                 bit_util::unset_bit(null_slice, i);
             }
-            offsets.push(length_so_far);
+            *offset = length_so_far;
         }
 
         nulls = match indices.data_ref().null_buffer() {
@@ -435,8 +460,8 @@ where
 
     let mut data = ArrayData::builder(<OffsetSize as 
StringOffsetSizeTrait>::DATA_TYPE)
         .len(data_len)
-        .add_buffer(Buffer::from(offsets.to_byte_slice()))
-        .add_buffer(Buffer::from(&values[..]));
+        .add_buffer(offsets_buffer.freeze())
+        .add_buffer(Buffer::from(values));
     if let Some(null_buffer) = nulls {
         data = data.null_bit_buffer(null_buffer);
     }

Reply via email to