This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 93e4eb25c Faster `GenericByteView` construction (#6102)
93e4eb25c is described below

commit 93e4eb25c7a09e3d1eb0210f47639b3c836cd5af
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Tue Jul 23 15:04:34 2024 -0400

    Faster `GenericByteView` construction (#6102)
    
    * add benchmark to track performance
    
    * fast byte view construction
    
    * make doc happy
    
    * fix clippy
    
    * update comments
---
 .../src/builder/generic_bytes_view_builder.rs      | 56 ++++++++++++++++------
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs 
b/arrow-array/src/builder/generic_bytes_view_builder.rs
index 587255cc6..7726ee352 100644
--- a/arrow-array/src/builder/generic_bytes_view_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_view_builder.rs
@@ -420,23 +420,49 @@ pub type StringViewBuilder = 
GenericByteViewBuilder<StringViewType>;
 /// [`GenericByteViewBuilder::append_null`] as normal.
 pub type BinaryViewBuilder = GenericByteViewBuilder<BinaryViewType>;
 
+/// Creates a view from a fixed length input (the compiler can generate
+/// specialized code for this)
+fn make_inlined_view<const LEN: usize>(data: &[u8]) -> u128 {
+    let mut view_buffer = [0; 16];
+    view_buffer[0..4].copy_from_slice(&(LEN as u32).to_le_bytes());
+    view_buffer[4..4 + LEN].copy_from_slice(&data[..LEN]);
+    u128::from_le_bytes(view_buffer)
+}
+
 /// Create a view based on the given data, block id and offset
-#[inline(always)]
+/// Note that the code below is carefully examined with x86_64 assembly code: 
<https://godbolt.org/z/685YPsd5G>
+/// The goal is to avoid calling into `ptr::copy_non_interleave`, which makes 
function call (i.e., not inlined),
+/// which slows down things.
+#[inline(never)]
 pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {
-    let len = data.len() as u32;
-    if len <= 12 {
-        let mut view_buffer = [0; 16];
-        view_buffer[0..4].copy_from_slice(&len.to_le_bytes());
-        view_buffer[4..4 + data.len()].copy_from_slice(data);
-        u128::from_le_bytes(view_buffer)
-    } else {
-        let view = ByteView {
-            length: len,
-            prefix: u32::from_le_bytes(data[0..4].try_into().unwrap()),
-            buffer_index: block_id,
-            offset,
-        };
-        view.into()
+    let len = data.len();
+
+    // Generate specialized code for each potential small string length
+    // to improve performance
+    match len {
+        0 => make_inlined_view::<0>(data),
+        1 => make_inlined_view::<1>(data),
+        2 => make_inlined_view::<2>(data),
+        3 => make_inlined_view::<3>(data),
+        4 => make_inlined_view::<4>(data),
+        5 => make_inlined_view::<5>(data),
+        6 => make_inlined_view::<6>(data),
+        7 => make_inlined_view::<7>(data),
+        8 => make_inlined_view::<8>(data),
+        9 => make_inlined_view::<9>(data),
+        10 => make_inlined_view::<10>(data),
+        11 => make_inlined_view::<11>(data),
+        12 => make_inlined_view::<12>(data),
+        // When string is longer than 12 bytes, it can't be inlined, we create 
a ByteView instead.
+        _ => {
+            let view = ByteView {
+                length: len as u32,
+                prefix: u32::from_le_bytes(data[0..4].try_into().unwrap()),
+                buffer_index: block_id,
+                offset,
+            };
+            view.as_u128()
+        }
     }
 }
 

Reply via email to