This is an automated email from the ASF dual-hosted git repository.
nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1f98388 ARROW-10785: [Rust] Optimize take string
1f98388 is described below
commit 1f98388f3bfe995d12f80abee9d307d2de723b63
Author: Heres, Daniel <[email protected]>
AuthorDate: Thu Dec 3 06:26:26 2020 +0200
ARROW-10785: [Rust] Optimize take string
Further optimizes take string to benefit from creating a buffer directly.
Interestingly, it seems it is still faster to copy a `Vec` than to use a
buffer for the the string values. Presumably `extend_from_slice` is much slower
than the `Vec` one.
This PR:
* pre allocates buffer for offsets
* adds a special case for 0 null indices in which case we can skip a check
* adding some benchmarks for string value arrays containing nulls
Benchmark results
```
take str 512 time: [2.1942 us 2.1956 us 2.1971 us]
change: [-22.950% -22.847% -22.738%] (p = 0.00 <
0.05)
Performance has improved.
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) low severe
1 (1.00%) high mild
1 (1.00%) high severe
take str 1024 time: [3.7476 us 3.7529 us 3.7605 us]
change: [-23.958% -23.825% -23.678%] (p = 0.00 <
0.05)
Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
1 (1.00%) low mild
3 (3.00%) high mild
3 (3.00%) high severe
take str null indices 512
time: [2.0983 us 2.1015 us 2.1056 us]
change: [-24.742% -24.387% -23.981%] (p = 0.00 <
0.05)
Performance has improved.
Found 8 outliers among 100 measurements (8.00%)
5 (5.00%) high mild
3 (3.00%) high severe
take str null indices 1024
time: [3.6767 us 3.6788 us 3.6812 us]
change: [-22.588% -22.497% -22.406%] (p = 0.00 <
0.05)
Performance has improved.
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
take str null values 1024
time: [3.6732 us 3.6773 us 3.6836 us]
change: [-23.029% -22.857% -22.695%] (p = 0.00 <
0.05)
Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
take str null values null indices 1024
time: [5.3789 us 5.3821 us 5.3853 us]
change: [-10.711% -10.610% -10.509%] (p = 0.00 <
0.05)
Performance has improved.
Found 1 outliers among 100 measurements (1.00%)
```
Closes #8814 from Dandandan/opt_take_string
Authored-by: Heres, Daniel <[email protected]>
Signed-off-by: Neville Dipale <[email protected]>
---
rust/arrow/benches/take_kernels.rs | 52 ++++++++++++++++++++------------
rust/arrow/src/compute/kernels/take.rs | 55 ++++++++++++++++++++++++----------
2 files changed, 73 insertions(+), 34 deletions(-)
diff --git a/rust/arrow/benches/take_kernels.rs
b/rust/arrow/benches/take_kernels.rs
index d20f175..4838494 100644
--- a/rust/arrow/benches/take_kernels.rs
+++ b/rust/arrow/benches/take_kernels.rs
@@ -47,19 +47,20 @@ where
Arc::new(array) as ArrayRef
}
-fn create_strings(size: usize) -> ArrayRef {
- let v = (0..size)
- .map(|_| {
- seedable_rng()
- .sample_iter(&Alphanumeric)
- .take(5)
- .collect::<String>()
- })
- .collect::<Vec<_>>();
-
- Arc::new(StringArray::from(
- v.iter().map(|x| &**x).collect::<Vec<&str>>(),
- ))
+fn create_strings(size: usize, null_density: f32) -> ArrayRef {
+ let rng = &mut seedable_rng();
+
+ let mut builder = StringBuilder::new(size);
+ for _ in 0..size {
+ let x = rng.gen::<f32>();
+ if x < null_density {
+ let value =
rng.sample_iter(&Alphanumeric).take(4).collect::<String>();
+ builder.append_value(&value).unwrap();
+ } else {
+ builder.append_null().unwrap()
+ }
+ }
+ Arc::new(builder.finish())
}
fn create_random_index(size: usize, null_density: f32) -> UInt32Array {
@@ -122,25 +123,38 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_take(&values, &indices))
});
- let values = create_strings(512);
+ let values = create_strings(512, 0.0);
let indices = create_random_index(512, 0.0);
c.bench_function("take str 512", |b| b.iter(|| bench_take(&values,
&indices)));
- let values = create_strings(1024);
+ let values = create_strings(1024, 0.0);
let indices = create_random_index(1024, 0.0);
c.bench_function("take str 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});
- let values = create_strings(512);
+ let values = create_strings(512, 0.0);
let indices = create_random_index(512, 0.5);
- c.bench_function("take str nulls 512", |b| {
+ c.bench_function("take str null indices 512", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_strings(1024, 0.0);
+ let indices = create_random_index(1024, 0.5);
+ c.bench_function("take str null indices 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_strings(1024, 0.5);
+
+ let indices = create_random_index(1024, 0.0);
+ c.bench_function("take str null values 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});
- let values = create_strings(1024);
+ let values = create_strings(1024, 0.5);
let indices = create_random_index(1024, 0.5);
- c.bench_function("take str nulls 1024", |b| {
+ c.bench_function("take str null values null indices 1024", |b| {
b.iter(|| bench_take(&values, &indices))
});
}
diff --git a/rust/arrow/src/compute/kernels/take.rs
b/rust/arrow/src/compute/kernels/take.rs
index 2d9deba..7d3c490 100644
--- a/rust/arrow/src/compute/kernels/take.rs
+++ b/rust/arrow/src/compute/kernels/take.rs
@@ -366,17 +366,18 @@ where
.downcast_ref::<GenericStringArray<OffsetSize>>()
.unwrap();
- let num_bytes = bit_util::ceil(data_len, 8);
+ let bytes_offset = (data_len + 1) * std::mem::size_of::<OffsetSize>();
+ let mut offsets_buffer = MutableBuffer::new(bytes_offset);
+ offsets_buffer.resize(bytes_offset);
- let null_count = array.null_count();
- let mut offsets = Vec::with_capacity(data_len + 1);
- let mut values = Vec::with_capacity(data_len);
+ let offsets = offsets_buffer.typed_data_mut();
+ let mut values = Vec::with_capacity(bytes_offset);
let mut length_so_far = OffsetSize::zero();
- offsets.push(length_so_far);
+ offsets[0] = length_so_far;
let nulls;
- if null_count == 0 && indices.null_count() == 0 {
- for i in 0..data_len {
+ if array.null_count() == 0 && indices.null_count() == 0 {
+ for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(||
{
ArrowError::ComputeError("Cast to usize failed".to_string())
})?;
@@ -385,11 +386,33 @@ where
length_so_far += OffsetSize::from_usize(s.len()).unwrap();
values.extend_from_slice(s.as_bytes());
- offsets.push(length_so_far);
+ *offset = length_so_far;
}
nulls = None
- } else if null_count == 0 {
- for i in 0..data_len {
+ } else if indices.null_count() == 0 {
+ let num_bytes = bit_util::ceil(data_len, 8);
+
+ let mut null_buf =
MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
+ let null_slice = null_buf.data_mut();
+
+ for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
+ let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(||
{
+ ArrowError::ComputeError("Cast to usize failed".to_string())
+ })?;
+
+ if array.is_valid(index) {
+ let s = array.value(index);
+
+ length_so_far += OffsetSize::from_usize(s.len()).unwrap();
+ values.extend_from_slice(s.as_bytes());
+ } else {
+ bit_util::unset_bit(null_slice, i);
+ }
+ *offset = length_so_far;
+ }
+ nulls = Some(null_buf.freeze());
+ } else if array.null_count() == 0 {
+ for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
if indices.is_valid(i) {
let index =
ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
@@ -401,14 +424,16 @@ where
length_so_far += OffsetSize::from_usize(s.len()).unwrap();
values.extend_from_slice(s.as_bytes());
}
- offsets.push(length_so_far);
+ *offset = length_so_far;
}
nulls = indices.data_ref().null_buffer().cloned();
} else {
+ let num_bytes = bit_util::ceil(data_len, 8);
+
let mut null_buf =
MutableBuffer::new(num_bytes).with_bitset(num_bytes, true);
let null_slice = null_buf.data_mut();
- for i in 0..data_len {
+ for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(||
{
ArrowError::ComputeError("Cast to usize failed".to_string())
})?;
@@ -422,7 +447,7 @@ where
// set null bit
bit_util::unset_bit(null_slice, i);
}
- offsets.push(length_so_far);
+ *offset = length_so_far;
}
nulls = match indices.data_ref().null_buffer() {
@@ -435,8 +460,8 @@ where
let mut data = ArrayData::builder(<OffsetSize as
StringOffsetSizeTrait>::DATA_TYPE)
.len(data_len)
- .add_buffer(Buffer::from(offsets.to_byte_slice()))
- .add_buffer(Buffer::from(&values[..]));
+ .add_buffer(offsets_buffer.freeze())
+ .add_buffer(Buffer::from(values));
if let Some(null_buffer) = nulls {
data = data.null_bit_buffer(null_buffer);
}