mbutrovich commented on code in PR #3659:
URL: https://github.com/apache/datafusion-comet/pull/3659#discussion_r3267395119
##########
native/shuffle/src/spark_unsafe/list.rs:
##########
@@ -45,40 +45,43 @@ macro_rules! impl_append_to_builder {
return;
}
+ // SAFETY: element_offset points to contiguous element data of
length num_elements.
+ debug_assert!(self.element_offset != 0, "element_offset is null");
+ let ptr = self.element_offset as *const $element_type;
+ let aligned = (ptr as
usize).is_multiple_of(std::mem::align_of::<$element_type>());
+
if NULLABLE {
- let mut ptr = self.element_offset as *const $element_type;
let null_words = self.null_bitset_ptr();
debug_assert!(!null_words.is_null(), "null_bitset_ptr is
null");
- debug_assert!(!ptr.is_null(), "element_offset pointer is
null");
- for idx in 0..num_elements {
- // SAFETY: null_words has ceil(num_elements/64) words, idx
< num_elements
- let is_null = unsafe { Self::is_null_in_bitset(null_words,
idx) };
-
- if is_null {
- builder.append_null();
- } else {
- // SAFETY: ptr is within element data bounds
- builder.append_value(unsafe { ptr.read_unaligned() });
+ if aligned {
+ let values = unsafe { std::slice::from_raw_parts(ptr,
num_elements) };
+ for (idx, &val) in values.iter().enumerate() {
+ if unsafe { Self::is_null_in_bitset(null_words, idx) }
{
+ builder.append_null();
+ } else {
+ builder.append_value(val);
+ }
}
- // SAFETY: ptr stays within bounds, iterating num_elements
times
- ptr = unsafe { ptr.add(1) };
- }
- } else {
- // SAFETY: element_offset points to contiguous data of length
num_elements
- debug_assert!(self.element_offset != 0, "element_offset is
null");
- let ptr = self.element_offset as *const $element_type;
- // Use bulk copy when data is properly aligned, fall back to
- // per-element unaligned reads otherwise
- if (ptr as
usize).is_multiple_of(std::mem::align_of::<$element_type>()) {
- let slice = unsafe { std::slice::from_raw_parts(ptr,
num_elements) };
- builder.append_slice(slice);
} else {
let mut ptr = ptr;
- for _ in 0..num_elements {
- builder.append_value(unsafe { ptr.read_unaligned() });
+ for idx in 0..num_elements {
+ if unsafe { Self::is_null_in_bitset(null_words, idx) }
{
Review Comment:
The refactor turns the manual `ptr.add(1)` into `values.iter().enumerate()`,
which is cleaner, but the hot path still branches on `is_null_in_bitset` per
element. Would you be open to pushing this further in the same PR? Two options
I can think of:
- `PrimitiveBuilder::append_values(values: &[T::Native], is_valid: &[bool])`
after materializing a `&[bool]` from the null bitset.
- Build a `BooleanBuffer` from the Spark null bitmap and feed validity to
the builder directly, so we skip the bool materialization too.
Either should remove the per-element branch. Given there is already a
microbenchmark covering with-nulls cases, it should be easy to check whether
this actually helps. Fine to keep it for a follow-up if you would rather land
the loop reshape first, but it feels like it belongs with this work.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]