This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 63a6209b8 Reduce bounds check in `RowIter`, add `unsafe 
Rows::row_unchecked` (#6142)
63a6209b8 is described below

commit 63a6209b87d9fb2d06265fa5d4c72817b6f47394
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Tue Aug 6 21:01:46 2024 +0800

    Reduce bounds check in `RowIter`, add `unsafe Rows::row_unchecked` (#6142)
    
    * update
    
    * update comment
    
    * update row-iter bench
    
    * make clippy happy
---
 arrow-row/src/lib.rs        | 23 ++++++++++++++++++-----
 arrow/benches/row_format.rs | 18 ++++++++++++++++++
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index a6fd03b5b..2d9af7575 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -835,10 +835,20 @@ impl Rows {
 
     /// Returns the row at index `row`
     pub fn row(&self, row: usize) -> Row<'_> {
-        let end = self.offsets[row + 1];
-        let start = self.offsets[row];
+        assert!(row + 1 < self.offsets.len());
+        unsafe { self.row_unchecked(row) }
+    }
+
+    /// Returns the row at `index` without bounds checking
+    ///
+    /// # Safety
+    /// Caller must ensure that `index` is less than the number of offsets 
(#rows + 1)
+    pub unsafe fn row_unchecked(&self, index: usize) -> Row<'_> {
+        let end = unsafe { self.offsets.get_unchecked(index + 1) };
+        let start = unsafe { self.offsets.get_unchecked(index) };
+        let data = unsafe { self.buffer.get_unchecked(*start..*end) };
         Row {
-            data: &self.buffer[start..end],
+            data,
             config: &self.config,
         }
     }
@@ -898,7 +908,9 @@ impl<'a> Iterator for RowsIter<'a> {
         if self.end == self.start {
             return None;
         }
-        let row = self.rows.row(self.start);
+
+        // SAFETY: We have checked that `start` is less than `end`
+        let row = unsafe { self.rows.row_unchecked(self.start) };
         self.start += 1;
         Some(row)
     }
@@ -920,7 +932,8 @@ impl<'a> DoubleEndedIterator for RowsIter<'a> {
         if self.end == self.start {
             return None;
         }
-        let row = self.rows.row(self.end);
+        // Safety: We have checked that `start` is less than `end`
+        let row = unsafe { self.rows.row_unchecked(self.end) };
         self.end -= 1;
         Some(row)
     }
diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs
index 0fb63b5b3..773cdc652 100644
--- a/arrow/benches/row_format.rs
+++ b/arrow/benches/row_format.rs
@@ -56,6 +56,22 @@ fn do_bench(c: &mut Criterion, name: &str, cols: 
Vec<ArrayRef>) {
     });
 }
 
+fn bench_iter(c: &mut Criterion) {
+    let col = create_string_view_array_with_len(40960, 0., 100, false);
+    let converter = 
RowConverter::new(vec![SortField::new(col.data_type().clone())]).unwrap();
+    let rows = converter
+        .convert_columns(&[Arc::new(col) as ArrayRef])
+        .unwrap();
+
+    c.bench_function("iterate rows", |b| {
+        b.iter(|| {
+            for r in rows.iter() {
+                std::hint::black_box(r.as_ref());
+            }
+        })
+    });
+}
+
 fn row_bench(c: &mut Criterion) {
     let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) 
as ArrayRef];
     do_bench(c, "4096 u64(0)", cols);
@@ -145,6 +161,8 @@ fn row_bench(c: &mut Criterion) {
         Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
     ];
     do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 
0), string_dictionary(100, 0), i64(0)", cols);
+
+    bench_iter(c);
 }
 
 criterion_group!(benches, row_bench);

Reply via email to