comphead commented on code in PR #3289:
URL: https://github.com/apache/datafusion-comet/pull/3289#discussion_r2914908717
##########
native/core/src/execution/shuffle/spark_unsafe/list.rs:
##########
@@ -32,17 +32,72 @@ use arrow::array::{
};
use arrow::datatypes::{DataType, TimeUnit};
+/// Generates bulk append methods for primitive types in SparkUnsafeArray.
+///
+/// # Safety invariants for all generated methods:
+/// - `element_offset` points to contiguous element data of length
`num_elements`
+/// - `null_bitset_ptr()` returns a pointer to `ceil(num_elements/64)` i64
words
+/// - These invariants are guaranteed by the SparkUnsafeArray layout from the
JVM
+macro_rules! impl_append_to_builder {
+ ($method_name:ident, $builder_type:ty, $element_type:ty) => {
+ pub(crate) fn $method_name<const NULLABLE: bool>(&self, builder: &mut
$builder_type) {
+ let num_elements = self.num_elements;
+ if num_elements == 0 {
+ return;
+ }
+
+ if NULLABLE {
+ let mut ptr = self.element_offset as *const $element_type;
+ let null_words = self.null_bitset_ptr();
+ debug_assert!(!null_words.is_null(), "null_bitset_ptr is
null");
+ debug_assert!(!ptr.is_null(), "element_offset pointer is
null");
+ for idx in 0..num_elements {
+ // SAFETY: null_words has ceil(num_elements/64) words, idx
< num_elements
+ let is_null = unsafe { Self::is_null_in_bitset(null_words,
idx) };
+
+ if is_null {
+ builder.append_null();
+ } else {
+ // SAFETY: ptr is within element data bounds
+ builder.append_value(unsafe { ptr.read_unaligned() });
+ }
+ // SAFETY: ptr stays within bounds, iterating num_elements
times
+ ptr = unsafe { ptr.add(1) };
+ }
+ } else {
+ // SAFETY: element_offset points to contiguous data of length
num_elements
+ debug_assert!(self.element_offset != 0, "element_offset is
null");
+ let ptr = self.element_offset as *const $element_type;
+ // Use bulk copy when data is properly aligned, fall back to
+ // per-element unaligned reads otherwise
+ if (ptr as
usize).is_multiple_of(std::mem::align_of::<$element_type>()) {
Review Comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]