This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 63a6209b8 Reduce bounds check in `RowIter`, add `unsafe
Rows::row_unchecked` (#6142)
63a6209b8 is described below
commit 63a6209b87d9fb2d06265fa5d4c72817b6f47394
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Tue Aug 6 21:01:46 2024 +0800
Reduce bounds check in `RowIter`, add `unsafe Rows::row_unchecked` (#6142)
* update
* update comment
* update row-iter bench
* make clippy happy
---
arrow-row/src/lib.rs | 23 ++++++++++++++++++-----
arrow/benches/row_format.rs | 18 ++++++++++++++++++
2 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index a6fd03b5b..2d9af7575 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -835,10 +835,20 @@ impl Rows {
/// Returns the row at index `row`
pub fn row(&self, row: usize) -> Row<'_> {
- let end = self.offsets[row + 1];
- let start = self.offsets[row];
+ assert!(row + 1 < self.offsets.len());
+ unsafe { self.row_unchecked(row) }
+ }
+
+ /// Returns the row at `index` without bounds checking
+ ///
+ /// # Safety
+ /// Caller must ensure that `index` is less than the number of offsets
(#rows + 1)
+ pub unsafe fn row_unchecked(&self, index: usize) -> Row<'_> {
+ let end = unsafe { self.offsets.get_unchecked(index + 1) };
+ let start = unsafe { self.offsets.get_unchecked(index) };
+ let data = unsafe { self.buffer.get_unchecked(*start..*end) };
Row {
- data: &self.buffer[start..end],
+ data,
config: &self.config,
}
}
@@ -898,7 +908,9 @@ impl<'a> Iterator for RowsIter<'a> {
if self.end == self.start {
return None;
}
- let row = self.rows.row(self.start);
+
+ // SAFETY: We have checked that `start` is less than `end`
+ let row = unsafe { self.rows.row_unchecked(self.start) };
self.start += 1;
Some(row)
}
@@ -920,7 +932,8 @@ impl<'a> DoubleEndedIterator for RowsIter<'a> {
if self.end == self.start {
return None;
}
- let row = self.rows.row(self.end);
+ // Safety: We have checked that `start` is less than `end`
+ let row = unsafe { self.rows.row_unchecked(self.end) };
self.end -= 1;
Some(row)
}
diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs
index 0fb63b5b3..773cdc652 100644
--- a/arrow/benches/row_format.rs
+++ b/arrow/benches/row_format.rs
@@ -56,6 +56,22 @@ fn do_bench(c: &mut Criterion, name: &str, cols:
Vec<ArrayRef>) {
});
}
+fn bench_iter(c: &mut Criterion) {
+ let col = create_string_view_array_with_len(40960, 0., 100, false);
+ let converter =
RowConverter::new(vec![SortField::new(col.data_type().clone())]).unwrap();
+ let rows = converter
+ .convert_columns(&[Arc::new(col) as ArrayRef])
+ .unwrap();
+
+ c.bench_function("iterate rows", |b| {
+ b.iter(|| {
+ for r in rows.iter() {
+ std::hint::black_box(r.as_ref());
+ }
+ })
+ });
+}
+
fn row_bench(c: &mut Criterion) {
let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.))
as ArrayRef];
do_bench(c, "4096 u64(0)", cols);
@@ -145,6 +161,8 @@ fn row_bench(c: &mut Criterion) {
Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
];
do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30,
0), string_dictionary(100, 0), i64(0)", cols);
+
+ bench_iter(c);
}
criterion_group!(benches, row_bench);