This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new ec43d6fd5 Provide `into_builder` for bytearray (#3326)
ec43d6fd5 is described below

commit ec43d6fd5ebdd5f64b3556790d44bf96829e8ae8
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Sat Dec 31 10:28:21 2022 -0800

    Provide `into_builder` for bytearray (#3326)
    
    * Provide into_builder for bytearray
    
    * For review
    
    * Remove slices_mut
    
    * Modify test and remove values_slice_mut
---
 arrow-array/src/array/byte_array.rs              | 86 ++++++++++++++++++++++++
 arrow-array/src/array/string_array.rs            | 25 +++++++
 arrow-array/src/builder/generic_bytes_builder.rs | 40 ++++++++++-
 arrow-array/src/builder/primitive_builder.rs     |  4 +-
 4 files changed, 152 insertions(+), 3 deletions(-)

diff --git a/arrow-array/src/array/byte_array.rs 
b/arrow-array/src/array/byte_array.rs
index eb528384e..2cb04efb8 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use crate::array::{empty_offsets, print_long_array};
+use crate::builder::GenericByteBuilder;
 use crate::iterator::ArrayIter;
 use crate::raw_pointer::RawPtrBox;
 use crate::types::bytes::ByteArrayNativeType;
@@ -139,6 +140,91 @@ impl<T: ByteArrayType> GenericByteArray<T> {
     pub fn iter(&self) -> ArrayIter<&Self> {
         ArrayIter::new(self)
     }
+
+    /// Returns `GenericByteBuilder` of this byte array for mutating its 
values if the underlying
+    /// offset and data buffers are not shared by others.
+    pub fn into_builder(self) -> Result<GenericByteBuilder<T>, Self> {
+        let len = self.len();
+        let null_bit_buffer = self
+            .data
+            .null_buffer()
+            .map(|b| b.bit_slice(self.data.offset(), len));
+
+        let element_len = std::mem::size_of::<T::Offset>();
+        let offset_buffer = self.data.buffers()[0]
+            .slice_with_length(self.data.offset() * element_len, (len + 1) * 
element_len);
+
+        let element_len = std::mem::size_of::<u8>();
+        let value_len =
+            T::Offset::as_usize(self.value_offsets()[len] - 
self.value_offsets()[0]);
+        let value_buffer = self.data.buffers()[1]
+            .slice_with_length(self.data.offset() * element_len, value_len * 
element_len);
+
+        drop(self.data);
+
+        let try_mutable_null_buffer = match null_bit_buffer {
+            None => Ok(None),
+            Some(null_buffer) => {
+                // Null buffer exists, tries to make it mutable
+                null_buffer.into_mutable().map(Some)
+            }
+        };
+
+        let try_mutable_buffers = match try_mutable_null_buffer {
+            Ok(mutable_null_buffer) => {
+                // Got mutable null buffer, tries to get mutable value buffer
+                let try_mutable_offset_buffer = offset_buffer.into_mutable();
+                let try_mutable_value_buffer = value_buffer.into_mutable();
+
+                // try_mutable_offset_buffer.map(...).map_err(...) doesn't 
work as the compiler complains
+                // mutable_null_buffer is moved into map closure.
+                match (try_mutable_offset_buffer, try_mutable_value_buffer) {
+                    (Ok(mutable_offset_buffer), Ok(mutable_value_buffer)) => 
unsafe {
+                        Ok(GenericByteBuilder::<T>::new_from_buffer(
+                            mutable_offset_buffer,
+                            mutable_value_buffer,
+                            mutable_null_buffer,
+                        ))
+                    },
+                    (Ok(mutable_offset_buffer), Err(value_buffer)) => Err((
+                        mutable_offset_buffer.into(),
+                        value_buffer,
+                        mutable_null_buffer.map(|b| b.into()),
+                    )),
+                    (Err(offset_buffer), Ok(mutable_value_buffer)) => Err((
+                        offset_buffer,
+                        mutable_value_buffer.into(),
+                        mutable_null_buffer.map(|b| b.into()),
+                    )),
+                    (Err(offset_buffer), Err(value_buffer)) => Err((
+                        offset_buffer,
+                        value_buffer,
+                        mutable_null_buffer.map(|b| b.into()),
+                    )),
+                }
+            }
+            Err(mutable_null_buffer) => {
+                // Unable to get mutable null buffer
+                Err((offset_buffer, value_buffer, Some(mutable_null_buffer)))
+            }
+        };
+
+        match try_mutable_buffers {
+            Ok(builder) => Ok(builder),
+            Err((offset_buffer, value_buffer, null_bit_buffer)) => {
+                let builder = ArrayData::builder(T::DATA_TYPE)
+                    .len(len)
+                    .add_buffer(offset_buffer)
+                    .add_buffer(value_buffer)
+                    .null_bit_buffer(null_bit_buffer);
+
+                let array_data = unsafe { builder.build_unchecked() };
+                let array = GenericByteArray::<T>::from(array_data);
+
+                Err(array)
+            }
+        }
+    }
 }
 
 impl<T: ByteArrayType> std::fmt::Debug for GenericByteArray<T> {
diff --git a/arrow-array/src/array/string_array.rs 
b/arrow-array/src/array/string_array.rs
index c8db589e3..4a4152adc 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -697,4 +697,29 @@ mod tests {
         assert_eq!(string.len(), 0);
         assert_eq!(string.value_offsets(), &[0]);
     }
+
+    #[test]
+    fn test_into_builder() {
+        let array: StringArray = vec!["hello", "arrow"].into();
+
+        // Append values
+        let mut builder = array.into_builder().unwrap();
+
+        builder.append_value("rust");
+
+        let expected: StringArray = vec!["hello", "arrow", "rust"].into();
+        let array = builder.finish();
+        assert_eq!(expected, array);
+    }
+
+    #[test]
+    fn test_into_builder_err() {
+        let array: StringArray = vec!["hello", "arrow"].into();
+
+        // Clone it, so we cannot get a mutable builder back
+        let shared_array = array.clone();
+
+        let err_return = array.into_builder().unwrap_err();
+        assert_eq!(&err_return, &shared_array);
+    }
 }
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs 
b/arrow-array/src/builder/generic_bytes_builder.rs
index 9f9078c70..195628f47 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder;
 use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
 use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
 use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait};
-use arrow_buffer::{ArrowNativeType, Buffer};
+use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
 use arrow_data::ArrayDataBuilder;
 use std::any::Any;
 use std::sync::Arc;
@@ -53,6 +53,34 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         }
     }
 
+    /// Creates a new  [`GenericByteBuilder`] from buffers.
+    ///
+    /// # Safety
+    /// This doesn't verify buffer contents as it assumes the buffers are from 
existing and
+    /// valid [`GenericByteArray`].
+    pub unsafe fn new_from_buffer(
+        offsets_buffer: MutableBuffer,
+        value_buffer: MutableBuffer,
+        null_buffer: Option<MutableBuffer>,
+    ) -> Self {
+        let offsets_builder = 
BufferBuilder::<T::Offset>::new_from_buffer(offsets_buffer);
+        let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);
+
+        let null_buffer_builder = null_buffer
+            .map(|buffer| {
+                NullBufferBuilder::new_from_buffer(buffer, 
offsets_builder.len() - 1)
+            })
+            .unwrap_or_else(|| {
+                NullBufferBuilder::new_with_len(offsets_builder.len() - 1)
+            });
+
+        Self {
+            offsets_builder,
+            value_builder,
+            null_buffer_builder,
+        }
+    }
+
     /// Appends a value into the builder.
     #[inline]
     pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
@@ -122,6 +150,16 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
     pub fn offsets_slice(&self) -> &[T::Offset] {
         self.offsets_builder.as_slice()
     }
+
+    /// Returns the current null buffer as a slice
+    pub fn validity_slice(&self) -> Option<&[u8]> {
+        self.null_buffer_builder.as_slice()
+    }
+
+    /// Returns the current null buffer as a mutable slice
+    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
+        self.null_buffer_builder.as_slice_mut()
+    }
 }
 
 impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
diff --git a/arrow-array/src/builder/primitive_builder.rs 
b/arrow-array/src/builder/primitive_builder.rs
index fa1dc3ad1..f3f3f3728 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -286,12 +286,12 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         self.values_builder.as_slice_mut()
     }
 
-    /// Returns the current values buffer as a slice
+    /// Returns the current null buffer as a slice
     pub fn validity_slice(&self) -> Option<&[u8]> {
         self.null_buffer_builder.as_slice()
     }
 
-    /// Returns the current values buffer as a mutable slice
+    /// Returns the current null buffer as a mutable slice
     pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
         self.null_buffer_builder.as_slice_mut()
     }

Reply via email to