rluvaton commented on code in PR #9711:
URL: https://github.com/apache/arrow-rs/pull/9711#discussion_r3143244888
##########
arrow-buffer/src/buffer/offset.rs:
##########
@@ -238,6 +238,97 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
pub fn ptr_eq(&self, other: &Self) -> bool {
self.0.ptr_eq(&other.0)
}
+
+ /// Check if any null positions in the `null_buffer` correspond to
+ /// non-empty ranges in this [`OffsetBuffer`].
+ ///
+ /// In variable-length array types (e.g., `StringArray`, `ListArray`),
+ /// null entries may or may not have empty offset ranges. This method
+ /// detects cases where a null entry has a non-empty range
+ /// (i.e., `offsets[i] != offsets[i+1]`), which means the underlying
+ /// data buffer contains data behind nulls.
+ ///
+ /// This matters because unwrapping (flattening) a list array exposes
+ /// the child values, including those behind null entries. If null
+ /// entries point to non-empty ranges, the unwrapped values will
+ /// contain data that may not be meaningful to operate on and could
+ /// cause errors (e.g., division by zero in the child values).
+ ///
+ /// Returns `false` if `null_buffer` is `None` or contains no nulls.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer, NullBuffer};
+ /// // Offsets where null at index 1 has an empty range (3..3)
+ /// let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3,
3, 6]));
+ /// let nulls = NullBuffer::from(vec![true, false, true]);
+ ///
assert!(!offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls)));
+ ///
+ /// // Offsets where null at index 1 has a non-empty range (3..7)
+ /// let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3,
7, 10]));
+ /// let nulls = NullBuffer::from(vec![true, false, true]);
+ ///
assert!(offsets.is_there_null_pointing_to_non_empty_value(Some(&nulls)));
+ /// ```
+ ///
+ /// # Panics
+ ///
+ /// Panics if the length of the `null_buffer` does not equal `self.len() -
1`.
+ pub fn is_there_null_pointing_to_non_empty_value(
+ &self,
+ null_buffer: Option<&NullBuffer>,
+ ) -> bool {
+ let Some(null_buffer) = null_buffer else {
+ return false;
+ };
+
+ assert_eq!(
+ self.len() - 1,
+ null_buffer.len(),
+ "The length of the offsets should be 1 more than the length of the
null buffer"
+ );
+
+ if null_buffer.null_count() == 0 {
+ return false;
+ }
+
+ // Offsets always have at least 1 value
+ let initial_offset = self[0];
+ let last_offset = self[self.len() - 1];
+
+ // If all the values are null (offsets have 1 more value than the
length of the array)
+ if null_buffer.null_count() == self.len() - 1 {
+ return last_offset != initial_offset;
+ }
+
+ let mut valid_slices_iter = null_buffer.valid_slices();
Review Comment:
yes but I feel the code is pretty straight forward and this have comments as
well that explaining the logic
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]