This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new d38f8e0a0 Add BooleanArray::new (#3879) (#3898)
d38f8e0a0 is described below

commit d38f8e0a0a5af134a585686c109d022eae981574
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Mar 23 12:39:00 2023 +0000

    Add BooleanArray::new (#3879) (#3898)
    
    * Add BooleanArray::new (#3879)
    
    * Review feedback
---
 arrow-array/src/array/boolean_array.rs | 57 ++++++++++++++++++----------------
 arrow-buffer/src/buffer/boolean.rs     | 10 +++++-
 2 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/arrow-array/src/array/boolean_array.rs 
b/arrow-array/src/array/boolean_array.rs
index c5775ad3b..98de62da0 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -19,8 +19,8 @@ use crate::array::print_long_array;
 use crate::builder::BooleanBuilder;
 use crate::iterator::BooleanIter;
 use crate::{Array, ArrayAccessor, ArrayRef};
-use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
-use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_buffer::{bit_util, BooleanBuffer, MutableBuffer, NullBuffer};
+use arrow_data::ArrayData;
 use arrow_schema::DataType;
 use std::any::Any;
 use std::sync::Arc;
@@ -81,6 +81,28 @@ impl std::fmt::Debug for BooleanArray {
 }
 
 impl BooleanArray {
+    /// Create a new [`BooleanArray`] from the provided values and nulls
+    ///
+    /// # Panics
+    ///
+    /// Panics if `values.len() != nulls.len()`
+    pub fn new(values: BooleanBuffer, nulls: Option<NullBuffer>) -> Self {
+        if let Some(n) = nulls.as_ref() {
+            assert_eq!(values.len(), n.len());
+        }
+
+        // TODO: Don't store ArrayData inside arrays (#3880)
+        let data = unsafe {
+            ArrayData::builder(DataType::Boolean)
+                .len(values.len())
+                .offset(values.offset())
+                .nulls(nulls)
+                .buffers(vec![values.inner().clone()])
+                .build_unchecked()
+        };
+        Self { data, values }
+    }
+
     /// Returns the length of this array.
     pub fn len(&self) -> usize {
         self.data.len()
@@ -182,24 +204,12 @@ impl BooleanArray {
     where
         F: FnMut(T::Item) -> bool,
     {
-        let null_bit_buffer = left.nulls().map(|x| x.inner().sliced());
-        let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
+        let nulls = left.nulls().cloned();
+        let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
             // SAFETY: i in range 0..len
             op(left.value_unchecked(i))
         });
-
-        let data = unsafe {
-            ArrayData::new_unchecked(
-                DataType::Boolean,
-                left.len(),
-                None,
-                null_bit_buffer,
-                0,
-                vec![Buffer::from(buffer)],
-                vec![],
-            )
-        };
-        Self::from(data)
+        Self::new(values, nulls)
     }
 
     /// Create a [`BooleanArray`] by evaluating the binary operation for
@@ -229,19 +239,11 @@ impl BooleanArray {
         assert_eq!(left.len(), right.len());
 
         let nulls = NullBuffer::union(left.nulls(), right.nulls());
-        let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
+        let values = BooleanBuffer::collect_bool(left.len(), |i| unsafe {
             // SAFETY: i in range 0..len
             op(left.value_unchecked(i), right.value_unchecked(i))
         });
-
-        let data = unsafe {
-            ArrayDataBuilder::new(DataType::Boolean)
-                .len(left.len())
-                .nulls(nulls)
-                .buffers(vec![buffer.into()])
-                .build_unchecked()
-        };
-        Self::from(data)
+        Self::new(values, nulls)
     }
 }
 
@@ -393,6 +395,7 @@ impl<Ptr: std::borrow::Borrow<Option<bool>>> 
FromIterator<Ptr> for BooleanArray
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow_buffer::Buffer;
     use rand::{thread_rng, Rng};
 
     #[test]
diff --git a/arrow-buffer/src/buffer/boolean.rs 
b/arrow-buffer/src/buffer/boolean.rs
index fea04cc79..53ead4573 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -17,7 +17,9 @@
 
 use crate::bit_chunk_iterator::BitChunks;
 use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
-use crate::{bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer};
+use crate::{
+    bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, 
MutableBuffer,
+};
 use std::ops::{BitAnd, BitOr, Not};
 
 /// A slice-able [`Buffer`] containing bit-packed booleans
@@ -61,6 +63,12 @@ impl BooleanBuffer {
         }
     }
 
+    /// Invokes `f` with indexes `0..len` collecting the boolean results into 
a new `BooleanBuffer`
+    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
+        let buffer = MutableBuffer::collect_bool(len, f);
+        Self::new(buffer.into(), 0, len)
+    }
+
     /// Returns the number of set bits in this buffer
     pub fn count_set_bits(&self) -> usize {
         self.buffer.count_set_bits_offset(self.offset, self.len)

Reply via email to