crepererum commented on code in PR #6288: URL: https://github.com/apache/arrow-rs/pull/6288#discussion_r1749934148
########## arrow-buffer/src/util/bit_mask.rs: ########## @@ -32,33 +31,118 @@ pub fn set_bits( ) -> usize { let mut null_count = 0; - let mut bits_to_align = offset_write % 8; - if bits_to_align > 0 { - bits_to_align = std::cmp::min(len, 8 - bits_to_align); + let mut acc = 0; + while len > acc { + let (n, len_set) = set_upto_64bits( + write_data, + data, + offset_write + acc, + offset_read + acc, + len - acc, + ); + null_count += n; + acc += len_set; } - let mut write_byte_index = ceil(offset_write + bits_to_align, 8); - - // Set full bytes provided by bit chunk iterator (which iterates in 64 bits at a time) - let chunks = BitChunks::new(data, offset_read + bits_to_align, len - bits_to_align); - chunks.iter().for_each(|chunk| { - null_count += chunk.count_zeros(); - write_data[write_byte_index..write_byte_index + 8].copy_from_slice(&chunk.to_le_bytes()); - write_byte_index += 8; - }); - - // Set individual bits both to align write_data to a byte offset and the remainder bits not covered by the bit chunk iterator - let remainder_offset = len - chunks.remainder_len(); - (0..bits_to_align) - .chain(remainder_offset..len) - .for_each(|i| { - if get_bit(data, offset_read + i) { - set_bit(write_data, offset_write + i); + + null_count +} + +#[inline] +fn set_upto_64bits( + write_data: &mut [u8], + data: &[u8], + offset_write: usize, + offset_read: usize, + len: usize, Review Comment: It has to be `assert` (always executed), not `debug_assert` (only executed in debug builds). If the overhead of that check is too expansive, make the function unsafe and add a `SAFETY` comment where you call it stating why you think the parameters are always in range. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org