This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new de9a90b1e Cleanup uses of Array::data_ref (#3880) (#3918)
de9a90b1e is described below
commit de9a90b1e09a43a6e8d2d3f3375f02a755d0cde1
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Mar 23 22:01:35 2023 +0000
Cleanup uses of Array::data_ref (#3880) (#3918)
* Cleanup uses of Array::data_ref (#3880)
* Further cleanup and fixes
---
arrow-arith/src/arithmetic.rs | 54 +---
arrow-arith/src/boolean.rs | 351 +++++++++--------------
arrow-array/src/array/dictionary_array.rs | 2 +-
arrow-array/src/array/fixed_size_binary_array.rs | 2 +-
arrow-array/src/array/fixed_size_list_array.rs | 2 +-
arrow-array/src/array/list_array.rs | 2 +-
arrow-array/src/array/null_array.rs | 2 +-
arrow-ord/src/sort.rs | 21 +-
arrow-select/src/filter.rs | 22 +-
arrow/benches/mutable_array.rs | 3 +-
arrow/tests/array_equal.rs | 6 +-
parquet/src/arrow/array_reader/struct_array.rs | 2 +-
12 files changed, 170 insertions(+), 299 deletions(-)
diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs
index de4b0ccb8..7d60d131b 100644
--- a/arrow-arith/src/arithmetic.rs
+++ b/arrow-arith/src/arithmetic.rs
@@ -103,14 +103,13 @@ fn math_checked_divide_op_on_iters<T, F>(
left: impl Iterator<Item = Option<T::Native>>,
right: impl Iterator<Item = Option<T::Native>>,
op: F,
- len: usize,
- null_bit_buffer: Option<arrow_buffer::Buffer>,
+ nulls: Option<arrow_buffer::NullBuffer>,
) -> Result<PrimitiveArray<T>, ArrowError>
where
T: ArrowNumericType,
F: Fn(T::Native, T::Native) -> Result<T::Native, ArrowError>,
{
- let buffer = if null_bit_buffer.is_some() {
+ let buffer = if nulls.is_some() {
let values = left.zip(right).map(|(left, right)| {
if let (Some(l), Some(r)) = (left, right) {
op(l, r)
@@ -130,18 +129,7 @@ where
unsafe { arrow_buffer::Buffer::try_from_trusted_len_iter(values) }
}?;
- let data = unsafe {
- arrow_data::ArrayData::new_unchecked(
- T::DATA_TYPE,
- len,
- None,
- null_bit_buffer,
- 0,
- vec![buffer],
- vec![],
- )
- };
- Ok(PrimitiveArray::<T>::from(data))
+ Ok(PrimitiveArray::new(T::DATA_TYPE, buffer.into(), nulls))
}
/// Calculates the modulus operation `left % right` on two SIMD inputs.
@@ -284,20 +272,16 @@ where
}
// Create the combined `Bitmap`
- let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
- &[left.data_ref(), right.data_ref()],
- left.len(),
- );
+ let nulls = arrow_buffer::NullBuffer::union(left.nulls(), right.nulls());
let lanes = T::lanes();
let buffer_size = left.len() * std::mem::size_of::<T::Native>();
let mut result =
arrow_buffer::MutableBuffer::new(buffer_size).with_bitset(buffer_size,
false);
- match &null_bit_buffer {
+ match &nulls {
Some(b) => {
- // combine_option_bitmap returns a slice or new buffer starting at 0
- let valid_chunks = b.bit_chunks(0, left.len());
+ let valid_chunks = b.inner().bit_chunks();
// process data in chunks of 64 elements since we also get 64 bits
of validity information at a time
@@ -372,18 +356,7 @@ where
}
}
- let data = unsafe {
- arrow_data::ArrayData::new_unchecked(
- T::DATA_TYPE,
- left.len(),
- None,
- null_bit_buffer,
- 0,
- vec![result.into()],
- vec![],
- )
- };
- Ok(PrimitiveArray::<T>::from(data))
+ Ok(PrimitiveArray::new(T::DATA_TYPE, result.into(), nulls))
}
/// Applies $OP to $LEFT and $RIGHT which are two dictionaries which have (the
same) key type $KT
@@ -628,10 +601,7 @@ where
)));
}
- let null_bit_buffer = arrow_data::bit_mask::combine_option_bitmap(
- &[left.data_ref(), right.data_ref()],
- left.len(),
- );
+ let nulls = arrow_buffer::NullBuffer::union(left.nulls(), right.nulls());
// Safety justification: Since the inputs are valid Arrow arrays, all
values are
// valid indexes into the dictionary (which is verified during
construction)
@@ -653,13 +623,7 @@ where
.take_iter_unchecked(right.keys_iter())
};
- math_checked_divide_op_on_iters(
- left_iter,
- right_iter,
- op,
- left.len(),
- null_bit_buffer,
- )
+ math_checked_divide_op_on_iters(left_iter, right_iter, op, nulls)
}
#[cfg(feature = "dyn_arith_dict")]
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index 3e21c2f1b..eaef13782 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -24,30 +24,55 @@
use arrow_array::*;
use arrow_buffer::bit_util::ceil;
-use arrow_buffer::buffer::{
- bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and,
buffer_bin_or,
- buffer_unary_not,
-};
-use arrow_buffer::{Buffer, MutableBuffer};
-use arrow_data::bit_mask::combine_option_bitmap;
+use arrow_buffer::buffer::{bitwise_bin_op_helper,
bitwise_quaternary_op_helper};
+use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
-/// Updates null buffer based on data buffer and null buffer of the operand at
other side
-/// in boolean AND kernel with Kleene logic. In short, because for AND kernel,
null AND false
-/// results false. So we cannot simply AND two null buffers. This function
updates null buffer
-/// of one side if other side is a false value.
-pub(crate) fn build_null_buffer_for_and_kleene(
- left_data: &ArrayData,
- right_data: &ArrayData,
-) -> Option<Buffer> {
- let left_buffer = &left_data.buffers()[0];
- let right_buffer = &right_data.buffers()[0];
-
- let left_null_buffer = left_data.nulls();
- let right_null_buffer = right_data.nulls();
-
- match (left_null_buffer, right_null_buffer) {
+/// Logical 'and' boolean values with Kleene logic
+///
+/// # Behavior
+///
+/// This function behaves as follows with nulls:
+///
+/// * `true` and `null` = `null`
+/// * `null` and `true` = `null`
+/// * `false` and `null` = `false`
+/// * `null` and `false` = `false`
+/// * `null` and `null` = `null`
+///
+/// In other words, in this context a null value really means \"unknown\",
+/// and an unknown value 'and' false is always false.
+/// For a different null behavior, see function \"and\".
+///
+/// # Example
+///
+/// ```rust
+/// # use arrow_array::BooleanArray;
+/// # use arrow_arith::boolean::and_kleene;
+/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
+/// let b = BooleanArray::from(vec![None, None, None]);
+/// let and_ab = and_kleene(&a, &b).unwrap();
+/// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None]));
+/// ```
+///
+/// # Fails
+///
+/// If the operands have different lengths
+pub fn and_kleene(
+ left: &BooleanArray,
+ right: &BooleanArray,
+) -> Result<BooleanArray, ArrowError> {
+ if left.len() != right.len() {
+ return Err(ArrowError::ComputeError(
+ "Cannot perform bitwise operation on arrays of different
length".to_string(),
+ ));
+ }
+
+ let left_values = left.values();
+ let right_values = right.values();
+
+ let buffer = match (left.nulls(), right.nulls()) {
(None, None) => None,
(Some(left_null_buffer), None) => {
// The right side has no null values.
@@ -57,9 +82,9 @@ pub(crate) fn build_null_buffer_for_and_kleene(
Some(bitwise_bin_op_helper(
left_null_buffer.buffer(),
left_null_buffer.offset(),
- right_buffer,
- right_data.offset(),
- left_data.len(),
+ right_values.inner(),
+ right_values.offset(),
+ left.len(),
|a, b| a | !b,
))
}
@@ -68,9 +93,9 @@ pub(crate) fn build_null_buffer_for_and_kleene(
Some(bitwise_bin_op_helper(
right_null_buffer.buffer(),
right_null_buffer.offset(),
- left_buffer,
- left_data.offset(),
- left_data.len(),
+ left_values.inner(),
+ left_values.offset(),
+ left.len(),
|a, b| a | !b,
))
}
@@ -83,44 +108,69 @@ pub(crate) fn build_null_buffer_for_and_kleene(
Some(bitwise_quaternary_op_helper(
[
left_null_buffer.buffer(),
- left_buffer,
+ left_values.inner(),
right_null_buffer.buffer(),
- right_buffer,
+ right_values.inner(),
],
[
left_null_buffer.offset(),
- left_data.offset(),
+ left_values.offset(),
right_null_buffer.offset(),
- right_data.offset(),
+ right_values.offset(),
],
- left_data.len(),
+ left.len(),
|a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
))
}
- }
+ };
+ let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0,
left.len())));
+ Ok(BooleanArray::new(left_values & right_values, nulls))
}
-/// For AND/OR kernels, the result of null buffer is simply a bitwise `and`
operation.
-pub(crate) fn build_null_buffer_for_and_or(
- left_data: &ArrayData,
- right_data: &ArrayData,
-) -> Option<Buffer> {
- // `arrays` are not empty, so safely do `unwrap` directly.
- combine_option_bitmap(&[left_data, right_data], left_data.len())
-}
+/// Logical 'or' boolean values with Kleene logic
+///
+/// # Behavior
+///
+/// This function behaves as follows with nulls:
+///
+/// * `true` or `null` = `true`
+/// * `null` or `true` = `true`
+/// * `false` or `null` = `null`
+/// * `null` or `false` = `null`
+/// * `null` or `null` = `null`
+///
+/// In other words, in this context a null value really means \"unknown\",
+/// and an unknown value 'or' true is always true.
+/// For a different null behavior, see function \"or\".
+///
+/// # Example
+///
+/// ```rust
+/// # use arrow_array::BooleanArray;
+/// # use arrow_arith::boolean::or_kleene;
+/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
+/// let b = BooleanArray::from(vec![None, None, None]);
+/// let or_ab = or_kleene(&a, &b).unwrap();
+/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None]));
+/// ```
+///
+/// # Fails
+///
+/// If the operands have different lengths
+pub fn or_kleene(
+ left: &BooleanArray,
+ right: &BooleanArray,
+) -> Result<BooleanArray, ArrowError> {
+ if left.len() != right.len() {
+ return Err(ArrowError::ComputeError(
+ "Cannot perform bitwise operation on arrays of different
length".to_string(),
+ ));
+ }
+
+ let left_values = left.values();
+ let right_values = right.values();
-/// Updates null buffer based on data buffer and null buffer of the operand at
other side
-/// in boolean OR kernel with Kleene logic. In short, because for OR kernel,
null OR true
-/// results true. So we cannot simply AND two null buffers. This function
updates null
-/// buffer of one side if other side is a true value.
-pub(crate) fn build_null_buffer_for_or_kleene(
- left_data: &ArrayData,
- right_data: &ArrayData,
-) -> Option<Buffer> {
- let left_buffer = &left_data.buffers()[0];
- let right_buffer = &right_data.buffers()[0];
-
- match (left_data.nulls(), right_data.nulls()) {
+ let buffer = match (left.nulls(), right.nulls()) {
(None, None) => None,
(Some(left_nulls), None) => {
// The right side has no null values.
@@ -130,9 +180,9 @@ pub(crate) fn build_null_buffer_for_or_kleene(
Some(bitwise_bin_op_helper(
left_nulls.buffer(),
left_nulls.offset(),
- right_buffer,
- right_data.offset(),
- right_data.len(),
+ right_values.inner(),
+ right_values.offset(),
+ left.len(),
|a, b| a | b,
))
}
@@ -141,9 +191,9 @@ pub(crate) fn build_null_buffer_for_or_kleene(
Some(bitwise_bin_op_helper(
right_nulls.buffer(),
right_nulls.offset(),
- left_buffer,
- left_data.offset(),
- left_data.len(),
+ left_values.inner(),
+ left_values.offset(),
+ left.len(),
|a, b| a | b,
))
}
@@ -156,33 +206,34 @@ pub(crate) fn build_null_buffer_for_or_kleene(
Some(bitwise_quaternary_op_helper(
[
left_nulls.buffer(),
- left_buffer,
+ left_values.inner(),
right_nulls.buffer(),
- right_buffer,
+ right_values.inner(),
],
[
left_nulls.offset(),
- left_data.offset(),
+ left_values.offset(),
right_nulls.offset(),
- right_data.offset(),
+ right_values.offset(),
],
- left_data.len(),
+ left.len(),
|a, b, c, d| (a | (c & d)) & (c | (a & b)),
))
}
- }
+ };
+
+ let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0,
left.len())));
+ Ok(BooleanArray::new(left_values | right_values, nulls))
}
/// Helper function to implement binary kernels
-pub(crate) fn binary_boolean_kernel<F, U>(
+pub(crate) fn binary_boolean_kernel<F>(
left: &BooleanArray,
right: &BooleanArray,
op: F,
- null_op: U,
) -> Result<BooleanArray, ArrowError>
where
- F: Fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer,
- U: Fn(&ArrayData, &ArrayData) -> Option<Buffer>,
+ F: Fn(&BooleanBuffer, &BooleanBuffer) -> BooleanBuffer,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
@@ -190,32 +241,9 @@ where
));
}
- let len = left.len();
-
- let left_data = left.data_ref();
- let right_data = right.data_ref();
-
- let left_buffer = &left_data.buffers()[0];
- let right_buffer = &right_data.buffers()[0];
- let left_offset = left.offset();
- let right_offset = right.offset();
-
- let null_bit_buffer = null_op(left_data, right_data);
-
- let values = op(left_buffer, left_offset, right_buffer, right_offset, len);
-
- let data = unsafe {
- ArrayData::new_unchecked(
- DataType::Boolean,
- len,
- None,
- null_bit_buffer,
- 0,
- vec![values],
- vec![],
- )
- };
- Ok(BooleanArray::from(data))
+ let nulls = NullBuffer::union(left.nulls(), right.nulls());
+ let values = op(left.values(), right.values());
+ Ok(BooleanArray::new(values, nulls))
}
/// Performs `AND` operation on two arrays. If either left or right value is
null then the
@@ -235,49 +263,7 @@ pub fn and(
left: &BooleanArray,
right: &BooleanArray,
) -> Result<BooleanArray, ArrowError> {
- binary_boolean_kernel(left, right, buffer_bin_and,
build_null_buffer_for_and_or)
-}
-
-/// Logical 'and' boolean values with Kleene logic
-///
-/// # Behavior
-///
-/// This function behaves as follows with nulls:
-///
-/// * `true` and `null` = `null`
-/// * `null` and `true` = `null`
-/// * `false` and `null` = `false`
-/// * `null` and `false` = `false`
-/// * `null` and `null` = `null`
-///
-/// In other words, in this context a null value really means \"unknown\",
-/// and an unknown value 'and' false is always false.
-/// For a different null behavior, see function \"and\".
-///
-/// # Example
-///
-/// ```rust
-/// # use arrow_array::BooleanArray;
-/// # use arrow_arith::boolean::and_kleene;
-/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-/// let b = BooleanArray::from(vec![None, None, None]);
-/// let and_ab = and_kleene(&a, &b).unwrap();
-/// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None]));
-/// ```
-///
-/// # Fails
-///
-/// If the operands have different lengths
-pub fn and_kleene(
- left: &BooleanArray,
- right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
- binary_boolean_kernel(
- left,
- right,
- buffer_bin_and,
- build_null_buffer_for_and_kleene,
- )
+ binary_boolean_kernel(left, right, |a, b| a & b)
}
/// Performs `OR` operation on two arrays. If either left or right value is
null then the
@@ -294,44 +280,7 @@ pub fn and_kleene(
/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), Some(true), None]));
/// ```
pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray,
ArrowError> {
- binary_boolean_kernel(left, right, buffer_bin_or,
build_null_buffer_for_and_or)
-}
-
-/// Logical 'or' boolean values with Kleene logic
-///
-/// # Behavior
-///
-/// This function behaves as follows with nulls:
-///
-/// * `true` or `null` = `true`
-/// * `null` or `true` = `true`
-/// * `false` or `null` = `null`
-/// * `null` or `false` = `null`
-/// * `null` or `null` = `null`
-///
-/// In other words, in this context a null value really means \"unknown\",
-/// and an unknown value 'or' true is always true.
-/// For a different null behavior, see function \"or\".
-///
-/// # Example
-///
-/// ```rust
-/// # use arrow_array::BooleanArray;
-/// # use arrow_arith::boolean::or_kleene;
-/// let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-/// let b = BooleanArray::from(vec![None, None, None]);
-/// let or_ab = or_kleene(&a, &b).unwrap();
-/// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None]));
-/// ```
-///
-/// # Fails
-///
-/// If the operands have different lengths
-pub fn or_kleene(
- left: &BooleanArray,
- right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
- binary_boolean_kernel(left, right, buffer_bin_or,
build_null_buffer_for_or_kleene)
+ binary_boolean_kernel(left, right, |a, b| a | b)
}
/// Performs unary `NOT` operation on an arrays. If value is null then the
result is also
@@ -347,26 +296,9 @@ pub fn or_kleene(
/// assert_eq!(not_a, BooleanArray::from(vec![Some(true), Some(false), None]));
/// ```
pub fn not(left: &BooleanArray) -> Result<BooleanArray, ArrowError> {
- let left_offset = left.offset();
- let len = left.len();
-
- let data = left.data_ref();
- let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
-
- let values = buffer_unary_not(data.buffers()[0], left_offset, len);
-
- let data = unsafe {
- ArrayData::new_unchecked(
- DataType::Boolean,
- len,
- None,
- null_bit_buffer,
- 0,
- vec![values],
- vec![],
- )
- };
- Ok(BooleanArray::from(data))
+ let nulls = left.nulls().cloned();
+ let values = !left.values();
+ Ok(BooleanArray::new(values, nulls))
}
/// Returns a non-null [BooleanArray] with whether each value of the array is
null.
@@ -381,29 +313,12 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray,
ArrowError> {
/// assert_eq!(a_is_null, BooleanArray::from(vec![false, false, true]));
/// ```
pub fn is_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
- let len = input.len();
-
- let output = match input.data_ref().nulls() {
- None => {
- let len_bytes = ceil(len, 8);
- MutableBuffer::from_len_zeroed(len_bytes).into()
- }
- Some(nulls) => buffer_unary_not(nulls.buffer(), nulls.offset(),
nulls.len()),
+ let values = match input.nulls() {
+ None => NullBuffer::new_null(input.len()).into_inner(),
+ Some(nulls) => !nulls.inner(),
};
- let data = unsafe {
- ArrayData::new_unchecked(
- DataType::Boolean,
- len,
- None,
- None,
- 0,
- vec![output],
- vec![],
- )
- };
-
- Ok(BooleanArray::from(data))
+ Ok(BooleanArray::new(values, None))
}
/// Returns a non-null [BooleanArray] with whether each value of the array is
not null.
@@ -420,7 +335,7 @@ pub fn is_null(input: &dyn Array) -> Result<BooleanArray,
ArrowError> {
pub fn is_not_null(input: &dyn Array) -> Result<BooleanArray, ArrowError> {
let len = input.len();
- let output = match input.data_ref().nulls() {
+ let output = match input.nulls() {
None => {
let len_bytes = ceil(len, 8);
MutableBuffer::new(len_bytes)
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index 0862230a4..49a184369 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -294,7 +294,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
/// Returns a clone of the value type of this list.
pub fn value_type(&self) -> DataType {
- self.values.data_ref().data_type().clone()
+ self.values.data_type().clone()
}
/// The length of the dictionary is the length of the keys array.
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs
b/arrow-array/src/array/fixed_size_binary_array.rs
index 062961a20..af51ff787 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -425,7 +425,7 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
.len(v.len())
.offset(v.offset())
.add_buffer(child_data.buffers()[0].slice(child_data.offset()))
- .nulls(v.data_ref().nulls().cloned());
+ .nulls(v.nulls().cloned());
let data = unsafe { builder.build_unchecked() };
Self::from(data)
diff --git a/arrow-array/src/array/fixed_size_list_array.rs
b/arrow-array/src/array/fixed_size_list_array.rs
index 7d65927cd..0910e2944 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -76,7 +76,7 @@ impl FixedSizeListArray {
/// Returns a clone of the value type of this list.
pub fn value_type(&self) -> DataType {
- self.values.data_ref().data_type().clone()
+ self.values.data_type().clone()
}
/// Returns ith value of this list array.
diff --git a/arrow-array/src/array/list_array.rs
b/arrow-array/src/array/list_array.rs
index dca256008..c7e2a817b 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -85,7 +85,7 @@ impl<OffsetSize: OffsetSizeTrait>
GenericListArray<OffsetSize> {
/// Returns a clone of the value type of this list.
pub fn value_type(&self) -> DataType {
- self.values.data_ref().data_type().clone()
+ self.values.data_type().clone()
}
/// Returns ith value of this list array.
diff --git a/arrow-array/src/array/null_array.rs
b/arrow-array/src/array/null_array.rs
index fba6e41e8..3d65e9e9e 100644
--- a/arrow-array/src/array/null_array.rs
+++ b/arrow-array/src/array/null_array.rs
@@ -97,7 +97,7 @@ impl Array for NullArray {
/// Returns the total number of null values in this array.
/// The null count of a `NullArray` always equals its length.
fn null_count(&self) -> usize {
- self.data_ref().len()
+ self.len()
}
}
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index ab6460e83..c6fedb960 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -22,7 +22,7 @@ use arrow_array::builder::BufferBuilder;
use arrow_array::cast::*;
use arrow_array::types::*;
use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, MutableBuffer};
+use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer};
use arrow_data::ArrayData;
use arrow_data::ArrayDataBuilder;
use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};
@@ -1145,9 +1145,9 @@ where
}
type LexicographicalCompareItem<'a> = (
- &'a ArrayData, // data
- DynComparator, // comparator
- SortOptions, // sort_option
+ Option<&'a NullBuffer>, // nulls
+ DynComparator, // comparator
+ SortOptions, // sort_option
);
/// A lexicographical comparator that wraps given array data (columns) and can
lexicographically compare data
@@ -1159,8 +1159,13 @@ pub struct LexicographicalComparator<'a> {
impl LexicographicalComparator<'_> {
/// lexicographically compare values at the wrapped columns with given
indices.
pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
- for (data, comparator, sort_option) in &self.compare_items {
- match (data.is_valid(a_idx), data.is_valid(b_idx)) {
+ for (nulls, comparator, sort_option) in &self.compare_items {
+ let (lhs_valid, rhs_valid) = match nulls {
+ Some(n) => (n.is_valid(a_idx), n.is_valid(b_idx)),
+ None => (true, true),
+ };
+
+ match (lhs_valid, rhs_valid) {
(true, true) => {
match (comparator)(a_idx, b_idx) {
// equal, move on to next column
@@ -1205,11 +1210,9 @@ impl LexicographicalComparator<'_> {
.iter()
.map(|column| {
// flatten and convert build comparators
- // use ArrayData for is_valid checks later to avoid dynamic
call
let values = column.values.as_ref();
- let data = values.data_ref();
Ok((
- data,
+ values.nulls(),
build_compare(values, values)?,
column.options.unwrap_or_default(),
))
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 14fd5d9d1..567aaa58e 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -53,11 +53,7 @@ pub struct SlicesIterator<'a>(BitSliceIterator<'a>);
impl<'a> SlicesIterator<'a> {
pub fn new(filter: &'a BooleanArray) -> Self {
- let values = &filter.data_ref().buffers()[0];
- let len = filter.len();
- let offset = filter.offset();
-
- Self(BitSliceIterator::new(values, offset, len))
+ Self(filter.values().set_slices())
}
}
@@ -149,18 +145,9 @@ pub fn build_filter(filter: &BooleanArray) ->
Result<Filter, ArrowError> {
/// Remove null values by do a bitmask AND operation with null bits and the
boolean bits.
pub fn prep_null_mask_filter(filter: &BooleanArray) -> BooleanArray {
- let array_data = filter.data_ref();
- let nulls = array_data.nulls().unwrap();
+ let nulls = filter.nulls().unwrap();
let mask = filter.values() & nulls.inner();
-
- let array_data = ArrayData::builder(DataType::Boolean)
- .len(mask.len())
- .offset(mask.offset())
- .add_buffer(mask.into_inner());
-
- let array_data = unsafe { array_data.build_unchecked() };
-
- BooleanArray::from(array_data)
+ BooleanArray::new(mask, None)
}
/// Filters an [Array], returning elements matching the filter (i.e. where the
values are true).
@@ -365,9 +352,10 @@ fn filter_array(
t => unimplemented!("Filter not supported for dictionary type
{:?}", t)
}
_ => {
+ let data = values.to_data();
// fallback to using MutableArrayData
let mut mutable = MutableArrayData::new(
- vec![values.data_ref()],
+ vec![&data],
false,
predicate.count,
);
diff --git a/arrow/benches/mutable_array.rs b/arrow/benches/mutable_array.rs
index 3a42ec1be..b04e5cd84 100644
--- a/arrow/benches/mutable_array.rs
+++ b/arrow/benches/mutable_array.rs
@@ -39,7 +39,8 @@ fn create_slices(size: usize) -> Vec<(usize, usize)> {
}
fn bench<T: Array>(v1: &T, slices: &[(usize, usize)]) {
- let mut mutable = MutableArrayData::new(vec![v1.data_ref()], false, 5);
+ let data = v1.to_data();
+ let mut mutable = MutableArrayData::new(vec![&data], false, 5);
for (start, end) in slices {
mutable.extend(0, *start, *end)
}
diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs
index b6f81f6a4..af81b17e4 100644
--- a/arrow/tests/array_equal.rs
+++ b/arrow/tests/array_equal.rs
@@ -856,7 +856,7 @@ fn test_struct_equal_null() {
)]))
.null_bit_buffer(Some(Buffer::from(vec![0b00011110])))
.len(5)
- .add_child_data(a.data_ref().clone())
+ .add_child_data(a.to_data())
.build()
.unwrap();
let a = make_array(a);
@@ -920,7 +920,7 @@ fn test_struct_equal_null_variable_size() {
)]))
.null_bit_buffer(Some(Buffer::from(vec![0b00001010])))
.len(5)
- .add_child_data(strings1.data_ref().clone())
+ .add_child_data(strings1.to_data())
.build()
.unwrap();
let a = make_array(a);
@@ -932,7 +932,7 @@ fn test_struct_equal_null_variable_size() {
)]))
.null_bit_buffer(Some(Buffer::from(vec![0b00001010])))
.len(5)
- .add_child_data(strings2.data_ref().clone())
+ .add_child_data(strings2.to_data())
.build()
.unwrap();
let b = make_array(b);
diff --git a/parquet/src/arrow/array_reader/struct_array.rs
b/parquet/src/arrow/array_reader/struct_array.rs
index b470be5ad..91e839fc1 100644
--- a/parquet/src/arrow/array_reader/struct_array.rs
+++ b/parquet/src/arrow/array_reader/struct_array.rs
@@ -257,7 +257,7 @@ mod tests {
assert_eq!(
vec![true, false, false, false, false],
(0..5)
- .map(|idx| struct_array.data_ref().is_null(idx))
+ .map(|idx| struct_array.is_null(idx))
.collect::<Vec<bool>>()
);
assert_eq!(