This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new d38f8e0a0 Add BooleanArray::new (#3879) (#3898)
d38f8e0a0 is described below
commit d38f8e0a0a5af134a585686c109d022eae981574
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Mar 23 12:39:00 2023 +0000
Add BooleanArray::new (#3879) (#3898)
* Add BooleanArray::new (#3879)
* Review feedback
---
arrow-array/src/array/boolean_array.rs | 57 ++++++++++++++++++----------------
arrow-buffer/src/buffer/boolean.rs | 10 +++++-
2 files changed, 39 insertions(+), 28 deletions(-)
diff --git a/arrow-array/src/array/boolean_array.rs
b/arrow-array/src/array/boolean_array.rs
index c5775ad3b..98de62da0 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -19,8 +19,8 @@ use crate::array::print_long_array;
use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::{Array, ArrayAccessor, ArrayRef};
-use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
-use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_buffer::{bit_util, BooleanBuffer, MutableBuffer, NullBuffer};
+use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;
use std::sync::Arc;
@@ -81,6 +81,28 @@ impl std::fmt::Debug for BooleanArray {
}
impl BooleanArray {
+ /// Create a new [`BooleanArray`] from the provided values and nulls
+ ///
+ /// # Panics
+ ///
+ /// Panics if `values.len() != nulls.len()`
+ pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self {
+ if let Some(n) = nulls.as_ref() {
+ assert_eq!(values.len(), n.len());
+ }
+
+ // TODO: Don't store ArrayData inside arrays (#3880)
+ let data = unsafe {
+ ArrayData::builder(DataType::Boolean)
+ .len(values.len())
+ .offset(values.offset())
+ .nulls(nulls)
+ .buffers(vec![values.inner().clone()])
+ .build_unchecked()
+ };
+ Self { data, values }
+ }
+
/// Returns the length of this array.
pub fn len(&self) -> usize {
self.data.len()
@@ -182,24 +204,12 @@ impl BooleanArray {
where
F: FnMut(T::Item) -> bool,
{
- let null_bit_buffer = left.nulls().map(|x| x.inner().sliced());
- let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
+ let nulls = left.nulls().cloned();
+ let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
// SAFETY: i in range 0..len
op(left.value_unchecked(i))
});
-
- let data = unsafe {
- ArrayData::new_unchecked(
- DataType::Boolean,
- left.len(),
- None,
- null_bit_buffer,
- 0,
- vec![Buffer::from(buffer)],
- vec![],
- )
- };
- Self::from(data)
+ Self::new(values, nulls)
}
/// Create a [`BooleanArray`] by evaluating the binary operation for
@@ -229,19 +239,11 @@ impl BooleanArray {
assert_eq!(left.len(), right.len());
let nulls = NullBuffer::union(left.nulls(), right.nulls());
- let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
+ let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
// SAFETY: i in range 0..len
op(left.value_unchecked(i), right.value_unchecked(i))
});
-
- let data = unsafe {
- ArrayDataBuilder::new(DataType::Boolean)
- .len(left.len())
- .nulls(nulls)
- .buffers(vec![buffer.into()])
- .build_unchecked()
- };
- Self::from(data)
+ Self::new(values, nulls)
}
}
@@ -393,6 +395,7 @@ impl<Ptr: std::borrow::Borrow<Option<bool>>>
FromIterator<Ptr> for BooleanArray
#[cfg(test)]
mod tests {
use super::*;
+ use arrow_buffer::Buffer;
use rand::{thread_rng, Rng};
#[test]
diff --git a/arrow-buffer/src/buffer/boolean.rs
b/arrow-buffer/src/buffer/boolean.rs
index fea04cc79..53ead4573 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -17,7 +17,9 @@
use crate::bit_chunk_iterator::BitChunks;
use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
-use crate::{bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer};
+use crate::{
+ bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer,
MutableBuffer,
+};
use std::ops::{BitAnd, BitOr, Not};
/// A slice-able [`Buffer`] containing bit-packed booleans
@@ -61,6 +63,12 @@ impl BooleanBuffer {
}
}
+ /// Invokes `f` with indexes `0..len` collecting the boolean results into
a new `BooleanBuffer`
+ pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
+ let buffer = MutableBuffer::collect_bool(len, f);
+ Self::new(buffer.into(), 0, len)
+ }
+
/// Returns the number of set bits in this buffer
pub fn count_set_bits(&self) -> usize {
self.buffer.count_set_bits_offset(self.offset, self.len)