This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new de9a90b1e Cleanup uses of Array::data_ref (#3880) (#3918)
de9a90b1e is described below

commit de9a90b1e09a43a6e8d2d3f3375f02a755d0cde1
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Mar 23 22:01:35 2023 +0000

    Cleanup uses of Array::data_ref (#3880) (#3918)
    
    * Cleanup uses of Array::data_ref (#3880)
    
    * Further cleanup and fixes
---
 arrow-arith/src/arithmetic.rs                    |  54 +---
 arrow-arith/src/boolean.rs                       | 351 +++++++++--------------
 arrow-array/src/array/dictionary_array.rs        |   2 +-
 arrow-array/src/array/fixed_size_binary_array.rs |   2 +-
 arrow-array/src/array/fixed_size_list_array.rs   |   2 +-
 arrow-array/src/array/list_array.rs              |   2 +-
 arrow-array/src/array/null_array.rs              |   2 +-
 arrow-ord/src/sort.rs                            |  21 +-
 arrow-select/src/filter.rs                       |  22 +-
 arrow/benches/mutable_array.rs                   |   3 +-
 arrow/tests/array_equal.rs                       |   6 +-
 parquet/src/arrow/array_reader/struct_array.rs   |   2 +-
 12 files changed, 170 insertions(+), 299 deletions(-)

diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs
index de4b0ccb8..7d60d131b 100644
--- a/arrow-arith/src/arithmetic.rs
+++ b/arrow-arith/src/arithmetic.rs
@@ -103,14 +103,13 @@ fn math_checked_divide_op_on_iters<T, F>(
     left: impl Iterator<Item = Option<T::Native>>,
     right: impl Iterator<Item = Option<T::Native>>,
     op: F,
-    len: usize,
-    null_bit_buffer: Option<arrow_buffer::Buffer>,
+    nulls: Option<arrow_buffer::NullBuffer>,
 ) -> Result<PrimitiveArray<T>, ArrowError>
 where
     T: ArrowNumericType,
     F: Fn(T::Native, T::Native) -> Result<T::Native, ArrowError>,
 {
-    let buffer = if null_bit_buffer.is_some() {
+    let buffer = if nulls.is_some() {
         let values = left.zip(right).map(|(left, right)| {
             if let (Some(l), Some(r)) = (left, right) {
                 op(l, r)
@@ -130,18 +129,7 @@ where
         unsafe { arrow_buffer::Buffer::try_from_trusted_len_iter(values) }
     }?;
 
-    let data = unsafe {
-        arrow_data::ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            len,
-            None,
-            null_bit_buffer,
-            0,
-            vec![buffer],
-            vec![],
-        )
-    };
-    Ok(PrimitiveArray::<T>::from(data))
+    Ok(PrimitiveArray::new(T::DATA_TYPE, buffer.into(), nulls))
 }
 
 /// Calculates the modulus operation `left % right` on two SIMD inputs.
@@ -284,20 +272,16 @@ where
     }
 
     // Create the combined `Bitmap`
-    let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
-        &[left.data_ref(), right.data_ref()],
-        left.len(),
-    );
+    let nulls = arrow_buffer::NullBuffer::union(left.nulls(), right.nulls());
 
     let lanes = T::lanes();
     let buffer_size = left.len() * std::mem::size_of::<T::Native>();
     let mut result =
         arrow_buffer::MutableBuffer::new(buffer_size).with_bitset(buffer_size, 
false);
 
-    match &null_bit_buffer {
+    match &nulls {
         Some(b) => {
-            // combine_option_bitmap returns a slice or new buffer starting at 0
-            let valid_chunks = b.bit_chunks(0, left.len());
+            let valid_chunks = b.inner().bit_chunks();
 
             // process data in chunks of 64 elements since we also get 64 bits 
of validity information at a time
 
@@ -372,18 +356,7 @@ where
         }
     }
 
-    let data = unsafe {
-        arrow_data::ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![result.into()],
-            vec![],
-        )
-    };
-    Ok(PrimitiveArray::<T>::from(data))
+    Ok(PrimitiveArray::new(T::DATA_TYPE, result.into(), nulls))
 }
 
 /// Applies $OP to $LEFT and $RIGHT which are two dictionaries which have (the 
same) key type $KT
@@ -628,10 +601,7 @@ where
         )));
     }
 
-    let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
-        &[left.data_ref(), right.data_ref()],
-        left.len(),
-    );
+    let nulls = arrow_buffer::NullBuffer::union(left.nulls(), right.nulls());
 
     // Safety justification: Since the inputs are valid Arrow arrays, all 
values are
     // valid indexes into the dictionary (which is verified during 
construction)
@@ -653,13 +623,7 @@ where
             .take_iter_unchecked(right.keys_iter())
     };
 
-    math_checked_divide_op_on_iters(
-        left_iter,
-        right_iter,
-        op,
-        left.len(),
-        null_bit_buffer,
-    )
+    math_checked_divide_op_on_iters(left_iter, right_iter, op, nulls)
 }
 
 #[cfg(feature = "dyn_arith_dict")]
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index 3e21c2f1b..eaef13782 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -24,30 +24,55 @@
 
 use arrow_array::*;
 use arrow_buffer::bit_util::ceil;
-use arrow_buffer::buffer::{
-    bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and, 
buffer_bin_or,
-    buffer_unary_not,
-};
-use arrow_buffer::{Buffer, MutableBuffer};
-use arrow_data::bit_mask::combine_option_bitmap;
+use arrow_buffer::buffer::{bitwise_bin_op_helper, 
bitwise_quaternary_op_helper};
+use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
 
-/// Updates null buffer based on data buffer and null buffer of the operand at 
other side
-/// in boolean AND kernel with Kleene logic. In short, because for AND kernel, 
null AND false
-/// results false. So we cannot simply AND two null buffers. This function 
updates null buffer
-/// of one side if other side is a false value.
-pub(crate) fn build_null_buffer_for_and_kleene(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
-) -> Option<Buffer> {
-    let left_buffer = &left_data.buffers()[0];
-    let right_buffer = &right_data.buffers()[0];
-
-    let left_null_buffer = left_data.nulls();
-    let right_null_buffer = right_data.nulls();
-
-    match (left_null_buffer, right_null_buffer) {
+/// Logical 'and' boolean values with Kleene logic
+///
+/// # Behavior
+///
+/// This function behaves as follows with nulls:
+///
+/// * `true` and `null` = `null`
+/// * `null` and `true` = `null`
+/// * `false` and `null` = `false`
+/// * `null` and `false` = `false`
+/// * `null` and `null` = `null`
+///
+/// In other words, in this context a null value really means \"unknown\",
+/// and an unknown value 'and' false is always false.
+/// For a different null behavior, see function \"and\".
+///
+/// # Example
+///
+/// ```rust
+/// # use arrow_array::BooleanArray;
+/// # use arrow_arith::boolean::and_kleene;
+/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
+/// let b = BooleanArray::from(vec![None, None, None]);
+/// let and_ab = and_kleene(&a, &b).unwrap();
+/// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None]));
+/// ```
+///
+/// # Fails
+///
+/// If the operands have different lengths
+pub fn and_kleene(
+    left: &BooleanArray,
+    right: &BooleanArray,
+) -> Result<BooleanArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform bitwise operation on arrays of different 
length".to_string(),
+        ));
+    }
+
+    let left_values = left.values();
+    let right_values = right.values();
+
+    let buffer = match (left.nulls(), right.nulls()) {
         (None, None) => None,
         (Some(left_null_buffer), None) => {
             // The right side has no null values.
@@ -57,9 +82,9 @@ pub(crate) fn build_null_buffer_for_and_kleene(
             Some(bitwise_bin_op_helper(
                 left_null_buffer.buffer(),
                 left_null_buffer.offset(),
-                right_buffer,
-                right_data.offset(),
-                left_data.len(),
+                right_values.inner(),
+                right_values.offset(),
+                left.len(),
                 |a, b| a | !b,
             ))
         }
@@ -68,9 +93,9 @@ pub(crate) fn build_null_buffer_for_and_kleene(
             Some(bitwise_bin_op_helper(
                 right_null_buffer.buffer(),
                 right_null_buffer.offset(),
-                left_buffer,
-                left_data.offset(),
-                left_data.len(),
+                left_values.inner(),
+                left_values.offset(),
+                left.len(),
                 |a, b| a | !b,
             ))
         }
@@ -83,44 +108,69 @@ pub(crate) fn build_null_buffer_for_and_kleene(
             Some(bitwise_quaternary_op_helper(
                 [
                     left_null_buffer.buffer(),
-                    left_buffer,
+                    left_values.inner(),
                     right_null_buffer.buffer(),
-                    right_buffer,
+                    right_values.inner(),
                 ],
                 [
                     left_null_buffer.offset(),
-                    left_data.offset(),
+                    left_values.offset(),
                     right_null_buffer.offset(),
-                    right_data.offset(),
+                    right_values.offset(),
                 ],
-                left_data.len(),
+                left.len(),
                 |a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
             ))
         }
-    }
+    };
+    let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, 
left.len())));
+    Ok(BooleanArray::new(left_values & right_values, nulls))
 }
 
-/// For AND/OR kernels, the result of null buffer is simply a bitwise `and` 
operation.
-pub(crate) fn build_null_buffer_for_and_or(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
-) -> Option<Buffer> {
-    // `arrays` are not empty, so safely do `unwrap` directly.
-    combine_option_bitmap(&[left_data, right_data], left_data.len())
-}
+/// Logical 'or' boolean values with Kleene logic
+///
+/// # Behavior
+///
+/// This function behaves as follows with nulls:
+///
+/// * `true` or `null` = `true`
+/// * `null` or `true` = `true`
+/// * `false` or `null` = `null`
+/// * `null` or `false` = `null`
+/// * `null` or `null` = `null`
+///
+/// In other words, in this context a null value really means \"unknown\",
+/// and an unknown value 'or' true is always true.
+/// For a different null behavior, see function \"or\".
+///
+/// # Example
+///
+/// ```rust
+/// # use arrow_array::BooleanArray;
+/// # use arrow_arith::boolean::or_kleene;
+/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
+/// let b = BooleanArray::from(vec![None, None, None]);
+/// let or_ab = or_kleene(&a, &b).unwrap();
+/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None]));
+/// ```
+///
+/// # Fails
+///
+/// If the operands have different lengths
+pub fn or_kleene(
+    left: &BooleanArray,
+    right: &BooleanArray,
+) -> Result<BooleanArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(
+            "Cannot perform bitwise operation on arrays of different 
length".to_string(),
+        ));
+    }
+
+    let left_values = left.values();
+    let right_values = right.values();
 
-/// Updates null buffer based on data buffer and null buffer of the operand at 
other side
-/// in boolean OR kernel with Kleene logic. In short, because for OR kernel, 
null OR true
-/// results true. So we cannot simply AND two null buffers. This function 
updates null
-/// buffer of one side if other side is a true value.
-pub(crate) fn build_null_buffer_for_or_kleene(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
-) -> Option<Buffer> {
-    let left_buffer = &left_data.buffers()[0];
-    let right_buffer = &right_data.buffers()[0];
-
-    match (left_data.nulls(), right_data.nulls()) {
+    let buffer = match (left.nulls(), right.nulls()) {
         (None, None) => None,
         (Some(left_nulls), None) => {
             // The right side has no null values.
@@ -130,9 +180,9 @@ pub(crate) fn build_null_buffer_for_or_kleene(
             Some(bitwise_bin_op_helper(
                 left_nulls.buffer(),
                 left_nulls.offset(),
-                right_buffer,
-                right_data.offset(),
-                right_data.len(),
+                right_values.inner(),
+                right_values.offset(),
+                left.len(),
                 |a, b| a | b,
             ))
         }
@@ -141,9 +191,9 @@ pub(crate) fn build_null_buffer_for_or_kleene(
             Some(bitwise_bin_op_helper(
                 right_nulls.buffer(),
                 right_nulls.offset(),
-                left_buffer,
-                left_data.offset(),
-                left_data.len(),
+                left_values.inner(),
+                left_values.offset(),
+                left.len(),
                 |a, b| a | b,
             ))
         }
@@ -156,33 +206,34 @@ pub(crate) fn build_null_buffer_for_or_kleene(
             Some(bitwise_quaternary_op_helper(
                 [
                     left_nulls.buffer(),
-                    left_buffer,
+                    left_values.inner(),
                     right_nulls.buffer(),
-                    right_buffer,
+                    right_values.inner(),
                 ],
                 [
                     left_nulls.offset(),
-                    left_data.offset(),
+                    left_values.offset(),
                     right_nulls.offset(),
-                    right_data.offset(),
+                    right_values.offset(),
                 ],
-                left_data.len(),
+                left.len(),
                 |a, b, c, d| (a | (c & d)) & (c | (a & b)),
             ))
         }
-    }
+    };
+
+    let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, 
left.len())));
+    Ok(BooleanArray::new(left_values | right_values, nulls))
 }
 
 /// Helper function to implement binary kernels
-pub(crate) fn binary_boolean_kernel<F, U>(
+pub(crate) fn binary_boolean_kernel<F>(
     left: &BooleanArray,
     right: &BooleanArray,
     op: F,
-    null_op: U,
 ) -> Result<BooleanArray, ArrowError>
 where
-    F: Fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer,
-    U: Fn(&ArrayData, &ArrayData) -> Option<Buffer>,
+    F: Fn(&BooleanBuffer, &BooleanBuffer) -> BooleanBuffer,
 {
     if left.len() != right.len() {
         return Err(ArrowError::ComputeError(
@@ -190,32 +241,9 @@ where
         ));
     }
 
-    let len = left.len();
-
-    let left_data = left.data_ref();
-    let right_data = right.data_ref();
-
-    let left_buffer = &left_data.buffers()[0];
-    let right_buffer = &right_data.buffers()[0];
-    let left_offset = left.offset();
-    let right_offset = right.offset();
-
-    let null_bit_buffer = null_op(left_data, right_data);
-
-    let values = op(left_buffer, left_offset, right_buffer, right_offset, len);
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            DataType::Boolean,
-            len,
-            None,
-            null_bit_buffer,
-            0,
-            vec![values],
-            vec![],
-        )
-    };
-    Ok(BooleanArray::from(data))
+    let nulls = NullBuffer::union(left.nulls(), right.nulls());
+    let values = op(left.values(), right.values());
+    Ok(BooleanArray::new(values, nulls))
 }
 
 /// Performs `AND` operation on two arrays. If either left or right value is 
null then the
@@ -235,49 +263,7 @@ pub fn and(
     left: &BooleanArray,
     right: &BooleanArray,
 ) -> Result<BooleanArray, ArrowError> {
-    binary_boolean_kernel(left, right, buffer_bin_and, 
build_null_buffer_for_and_or)
-}
-
-/// Logical 'and' boolean values with Kleene logic
-///
-/// # Behavior
-///
-/// This function behaves as follows with nulls:
-///
-/// * `true` and `null` = `null`
-/// * `null` and `true` = `null`
-/// * `false` and `null` = `false`
-/// * `null` and `false` = `false`
-/// * `null` and `null` = `null`
-///
-/// In other words, in this context a null value really means \"unknown\",
-/// and an unknown value 'and' false is always false.
-/// For a different null behavior, see function \"and\".
-///
-/// # Example
-///
-/// ```rust
-/// # use arrow_array::BooleanArray;
-/// # use arrow_arith::boolean::and_kleene;
-/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-/// let b = BooleanArray::from(vec![None, None, None]);
-/// let and_ab = and_kleene(&a, &b).unwrap();
-/// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None]));
-/// ```
-///
-/// # Fails
-///
-/// If the operands have different lengths
-pub fn and_kleene(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
-    binary_boolean_kernel(
-        left,
-        right,
-        buffer_bin_and,
-        build_null_buffer_for_and_kleene,
-    )
+    binary_boolean_kernel(left, right, |a, b| a & b)
 }
 
 /// Performs `OR` operation on two arrays. If either left or right value is 
null then the
@@ -294,44 +280,7 @@ pub fn and_kleene(
 /// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), Some(true), None]));
 /// ```
 pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, 
ArrowError> {
-    binary_boolean_kernel(left, right, buffer_bin_or, 
build_null_buffer_for_and_or)
-}
-
-/// Logical 'or' boolean values with Kleene logic
-///
-/// # Behavior
-///
-/// This function behaves as follows with nulls:
-///
-/// * `true` or `null` = `true`
-/// * `null` or `true` = `true`
-/// * `false` or `null` = `null`
-/// * `null` or `false` = `null`
-/// * `null` or `null` = `null`
-///
-/// In other words, in this context a null value really means \"unknown\",
-/// and an unknown value 'or' true is always true.
-/// For a different null behavior, see function \"or\".
-///
-/// # Example
-///
-/// ```rust
-/// # use arrow_array::BooleanArray;
-/// # use arrow_arith::boolean::or_kleene;
-/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-/// let b = BooleanArray::from(vec![None, None, None]);
-/// let or_ab = or_kleene(&a, &b).unwrap();
-/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None]));
-/// ```
-///
-/// # Fails
-///
-/// If the operands have different lengths
-pub fn or_kleene(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
-    binary_boolean_kernel(left, right, buffer_bin_or, 
build_null_buffer_for_or_kleene)
+    binary_boolean_kernel(left, right, |a, b| a | b)
 }
 
 /// Performs unary `NOT` operation on an arrays. If value is null then the 
result is also
@@ -347,26 +296,9 @@ pub fn or_kleene(
 /// assert_eq!(not_a, BooleanArray::from(vec![Some(true), Some(false), None]));
 /// ```
 pub fn not(left: &BooleanArray) -> Result<BooleanArray, ArrowError> {
-    let left_offset = left.offset();
-    let len = left.len();
-
-    let data = left.data_ref();
-    let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
-
-    let values = buffer_unary_not(data.buffers()[0], left_offset, len);
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            DataType::Boolean,
-            len,
-            None,
-            null_bit_buffer,
-            0,
-            vec![values],
-            vec![],
-        )
-    };
-    Ok(BooleanArray::from(data))
+    let nulls = left.nulls().cloned();
+    let values = !left.values();
+    Ok(BooleanArray::new(values, nulls))
 }
 
 /// Returns a non-null [BooleanArray] with whether each value of the array is 
null.
@@ -381,29 +313,12 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray, 
ArrowError> {
 /// assert_eq!(a_is_null, BooleanArray::from(vec![false, false, true]));
 /// ```
 pub fn is_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
-    let len = input.len();
-
-    let output = match input.data_ref().nulls() {
-        None => {
-            let len_bytes = ceil(len, 8);
-            MutableBuffer::from_len_zeroed(len_bytes).into()
-        }
-        Some(nulls) => buffer_unary_not(nulls.buffer(), nulls.offset(), 
nulls.len()),
+    let values = match input.nulls() {
+        None => NullBuffer::new_null(input.len()).into_inner(),
+        Some(nulls) => !nulls.inner(),
     };
 
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            DataType::Boolean,
-            len,
-            None,
-            None,
-            0,
-            vec![output],
-            vec![],
-        )
-    };
-
-    Ok(BooleanArray::from(data))
+    Ok(BooleanArray::new(values, None))
 }
 
 /// Returns a non-null [BooleanArray] with whether each value of the array is 
not null.
@@ -420,7 +335,7 @@ pub fn is_null(input: &dyn Array) -> Result<BooleanArray, 
ArrowError> {
 pub fn is_not_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
     let len = input.len();
 
-    let output = match input.data_ref().nulls() {
+    let output = match input.nulls() {
         None => {
             let len_bytes = ceil(len, 8);
             MutableBuffer::new(len_bytes)
diff --git a/arrow-array/src/array/dictionary_array.rs 
b/arrow-array/src/array/dictionary_array.rs
index 0862230a4..49a184369 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -294,7 +294,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
 
     /// Returns a clone of the value type of this list.
     pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
+        self.values.data_type().clone()
     }
 
     /// The length of the dictionary is the length of the keys array.
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs 
b/arrow-array/src/array/fixed_size_binary_array.rs
index 062961a20..af51ff787 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -425,7 +425,7 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
             .len(v.len())
             .offset(v.offset())
             .add_buffer(child_data.buffers()[0].slice(child_data.offset()))
-            .nulls(v.data_ref().nulls().cloned());
+            .nulls(v.nulls().cloned());
 
         let data = unsafe { builder.build_unchecked() };
         Self::from(data)
diff --git a/arrow-array/src/array/fixed_size_list_array.rs 
b/arrow-array/src/array/fixed_size_list_array.rs
index 7d65927cd..0910e2944 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -76,7 +76,7 @@ impl FixedSizeListArray {
 
     /// Returns a clone of the value type of this list.
     pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
+        self.values.data_type().clone()
     }
 
     /// Returns ith value of this list array.
diff --git a/arrow-array/src/array/list_array.rs 
b/arrow-array/src/array/list_array.rs
index dca256008..c7e2a817b 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -85,7 +85,7 @@ impl<OffsetSize: OffsetSizeTrait> 
GenericListArray<OffsetSize> {
 
     /// Returns a clone of the value type of this list.
     pub fn value_type(&self) -> DataType {
-        self.values.data_ref().data_type().clone()
+        self.values.data_type().clone()
     }
 
     /// Returns ith value of this list array.
diff --git a/arrow-array/src/array/null_array.rs 
b/arrow-array/src/array/null_array.rs
index fba6e41e8..3d65e9e9e 100644
--- a/arrow-array/src/array/null_array.rs
+++ b/arrow-array/src/array/null_array.rs
@@ -97,7 +97,7 @@ impl Array for NullArray {
     /// Returns the total number of null values in this array.
     /// The null count of a `NullArray` always equals its length.
     fn null_count(&self) -> usize {
-        self.data_ref().len()
+        self.len()
     }
 }
 
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index ab6460e83..c6fedb960 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -22,7 +22,7 @@ use arrow_array::builder::BufferBuilder;
 use arrow_array::cast::*;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, MutableBuffer};
+use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer};
 use arrow_data::ArrayData;
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};
@@ -1145,9 +1145,9 @@ where
 }
 
 type LexicographicalCompareItem<'a> = (
-    &'a ArrayData, // data
-    DynComparator, // comparator
-    SortOptions,   // sort_option
+    Option<&'a NullBuffer>, // nulls
+    DynComparator,          // comparator
+    SortOptions,            // sort_option
 );
 
 /// A lexicographical comparator that wraps given array data (columns) and can 
lexicographically compare data
@@ -1159,8 +1159,13 @@ pub struct LexicographicalComparator<'a> {
 impl LexicographicalComparator<'_> {
     /// lexicographically compare values at the wrapped columns with given 
indices.
     pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
-        for (data, comparator, sort_option) in &self.compare_items {
-            match (data.is_valid(a_idx), data.is_valid(b_idx)) {
+        for (nulls, comparator, sort_option) in &self.compare_items {
+            let (lhs_valid, rhs_valid) = match nulls {
+                Some(n) => (n.is_valid(a_idx), n.is_valid(b_idx)),
+                None => (true, true),
+            };
+
+            match (lhs_valid, rhs_valid) {
                 (true, true) => {
                     match (comparator)(a_idx, b_idx) {
                         // equal, move on to next column
@@ -1205,11 +1210,9 @@ impl LexicographicalComparator<'_> {
             .iter()
             .map(|column| {
                 // flatten and convert build comparators
-                // use ArrayData for is_valid checks later to avoid dynamic 
call
                 let values = column.values.as_ref();
-                let data = values.data_ref();
                 Ok((
-                    data,
+                    values.nulls(),
                     build_compare(values, values)?,
                     column.options.unwrap_or_default(),
                 ))
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 14fd5d9d1..567aaa58e 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -53,11 +53,7 @@ pub struct SlicesIterator<'a>(BitSliceIterator<'a>);
 
 impl<'a> SlicesIterator<'a> {
     pub fn new(filter: &'a BooleanArray) -> Self {
-        let values = &filter.data_ref().buffers()[0];
-        let len = filter.len();
-        let offset = filter.offset();
-
-        Self(BitSliceIterator::new(values, offset, len))
+        Self(filter.values().set_slices())
     }
 }
 
@@ -149,18 +145,9 @@ pub fn build_filter(filter: &BooleanArray) -> 
Result<Filter, ArrowError> {
 
 /// Remove null values by do a bitmask AND operation with null bits and the 
boolean bits.
 pub fn prep_null_mask_filter(filter: &BooleanArray) -> BooleanArray {
-    let array_data = filter.data_ref();
-    let nulls = array_data.nulls().unwrap();
+    let nulls = filter.nulls().unwrap();
     let mask = filter.values() & nulls.inner();
-
-    let array_data = ArrayData::builder(DataType::Boolean)
-        .len(mask.len())
-        .offset(mask.offset())
-        .add_buffer(mask.into_inner());
-
-    let array_data = unsafe { array_data.build_unchecked() };
-
-    BooleanArray::from(array_data)
+    BooleanArray::new(mask, None)
 }
 
 /// Filters an [Array], returning elements matching the filter (i.e. where the 
values are true).
@@ -365,9 +352,10 @@ fn filter_array(
                 t => unimplemented!("Filter not supported for dictionary type 
{:?}", t)
             }
             _ => {
+                let data = values.to_data();
                 // fallback to using MutableArrayData
                 let mut mutable = MutableArrayData::new(
-                    vec![values.data_ref()],
+                    vec![&data],
                     false,
                     predicate.count,
                 );
diff --git a/arrow/benches/mutable_array.rs b/arrow/benches/mutable_array.rs
index 3a42ec1be..b04e5cd84 100644
--- a/arrow/benches/mutable_array.rs
+++ b/arrow/benches/mutable_array.rs
@@ -39,7 +39,8 @@ fn create_slices(size: usize) -> Vec<(usize, usize)> {
 }
 
 fn bench<T: Array>(v1: &T, slices: &[(usize, usize)]) {
-    let mut mutable = MutableArrayData::new(vec![v1.data_ref()], false, 5);
+    let data = v1.to_data();
+    let mut mutable = MutableArrayData::new(vec![&data], false, 5);
     for (start, end) in slices {
         mutable.extend(0, *start, *end)
     }
diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs
index b6f81f6a4..af81b17e4 100644
--- a/arrow/tests/array_equal.rs
+++ b/arrow/tests/array_equal.rs
@@ -856,7 +856,7 @@ fn test_struct_equal_null() {
     )]))
     .null_bit_buffer(Some(Buffer::from(vec![0b00011110])))
     .len(5)
-    .add_child_data(a.data_ref().clone())
+    .add_child_data(a.to_data())
     .build()
     .unwrap();
     let a = make_array(a);
@@ -920,7 +920,7 @@ fn test_struct_equal_null_variable_size() {
     )]))
     .null_bit_buffer(Some(Buffer::from(vec![0b00001010])))
     .len(5)
-    .add_child_data(strings1.data_ref().clone())
+    .add_child_data(strings1.to_data())
     .build()
     .unwrap();
     let a = make_array(a);
@@ -932,7 +932,7 @@ fn test_struct_equal_null_variable_size() {
     )]))
     .null_bit_buffer(Some(Buffer::from(vec![0b00001010])))
     .len(5)
-    .add_child_data(strings2.data_ref().clone())
+    .add_child_data(strings2.to_data())
     .build()
     .unwrap();
     let b = make_array(b);
diff --git a/parquet/src/arrow/array_reader/struct_array.rs 
b/parquet/src/arrow/array_reader/struct_array.rs
index b470be5ad..91e839fc1 100644
--- a/parquet/src/arrow/array_reader/struct_array.rs
+++ b/parquet/src/arrow/array_reader/struct_array.rs
@@ -257,7 +257,7 @@ mod tests {
         assert_eq!(
             vec![true, false, false, false, false],
             (0..5)
-                .map(|idx| struct_array.data_ref().is_null(idx))
+                .map(|idx| struct_array.is_null(idx))
                 .collect::<Vec<bool>>()
         );
         assert_eq!(

Reply via email to