This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 93e4eb25c Faster `GenericByteView` construction (#6102)
93e4eb25c is described below
commit 93e4eb25c7a09e3d1eb0210f47639b3c836cd5af
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Tue Jul 23 15:04:34 2024 -0400
Faster `GenericByteView` construction (#6102)
* add benchmark to track performance
* fast byte view construction
* make doc happy
* fix clippy
* update comments
---
.../src/builder/generic_bytes_view_builder.rs | 56 ++++++++++++++++------
1 file changed, 41 insertions(+), 15 deletions(-)
diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs
b/arrow-array/src/builder/generic_bytes_view_builder.rs
index 587255cc6..7726ee352 100644
--- a/arrow-array/src/builder/generic_bytes_view_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_view_builder.rs
@@ -420,23 +420,49 @@ pub type StringViewBuilder =
GenericByteViewBuilder<StringViewType>;
/// [`GenericByteViewBuilder::append_null`] as normal.
pub type BinaryViewBuilder = GenericByteViewBuilder<BinaryViewType>;
+/// Creates a view from a fixed length input (the compiler can generate
+/// specialized code for this)
+fn make_inlined_view<const LEN: usize>(data: &[u8]) -> u128 {
+ let mut view_buffer = [0; 16];
+ view_buffer[0..4].copy_from_slice(&(LEN as u32).to_le_bytes());
+ view_buffer[4..4 + LEN].copy_from_slice(&data[..LEN]);
+ u128::from_le_bytes(view_buffer)
+}
+
/// Create a view based on the given data, block id and offset
-#[inline(always)]
+/// Note that the code below is carefully examined with x86_64 assembly code:
<https://godbolt.org/z/685YPsd5G>
+/// The goal is to avoid calling into `ptr::copy_non_interleave`, which makes
function call (i.e., not inlined),
+/// which slows down things.
+#[inline(never)]
pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {
- let len = data.len() as u32;
- if len <= 12 {
- let mut view_buffer = [0; 16];
- view_buffer[0..4].copy_from_slice(&len.to_le_bytes());
- view_buffer[4..4 + data.len()].copy_from_slice(data);
- u128::from_le_bytes(view_buffer)
- } else {
- let view = ByteView {
- length: len,
- prefix: u32::from_le_bytes(data[0..4].try_into().unwrap()),
- buffer_index: block_id,
- offset,
- };
- view.into()
+ let len = data.len();
+
+ // Generate specialized code for each potential small string length
+ // to improve performance
+ match len {
+ 0 => make_inlined_view::<0>(data),
+ 1 => make_inlined_view::<1>(data),
+ 2 => make_inlined_view::<2>(data),
+ 3 => make_inlined_view::<3>(data),
+ 4 => make_inlined_view::<4>(data),
+ 5 => make_inlined_view::<5>(data),
+ 6 => make_inlined_view::<6>(data),
+ 7 => make_inlined_view::<7>(data),
+ 8 => make_inlined_view::<8>(data),
+ 9 => make_inlined_view::<9>(data),
+ 10 => make_inlined_view::<10>(data),
+ 11 => make_inlined_view::<11>(data),
+ 12 => make_inlined_view::<12>(data),
+ // When string is longer than 12 bytes, it can't be inlined, we create
a ByteView instead.
+ _ => {
+ let view = ByteView {
+ length: len as u32,
+ prefix: u32::from_le_bytes(data[0..4].try_into().unwrap()),
+ buffer_index: block_id,
+ offset,
+ };
+ view.as_u128()
+ }
}
}