zhuqi-lucas commented on code in PR #7873:
URL: https://github.com/apache/arrow-rs/pull/7873#discussion_r2188364217


##########
arrow-array/src/array/byte_view_array.rs:
##########
@@ -473,10 +473,25 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     /// Note: this function does not attempt to canonicalize / deduplicate 
values. For this
     /// feature see  [`GenericByteViewBuilder::with_deduplicate_strings`].
     pub fn gc(&self) -> Self {
-        let mut builder = 
GenericByteViewBuilder::<T>::with_capacity(self.len());
+        let len = self.len();
+        let mut builder = GenericByteViewBuilder::<T>::with_capacity(len);

Review Comment:
   It seems no improvement when i changing to remove builder:
   
   ```rust
   diff --git a/arrow-array/src/array/byte_view_array.rs 
b/arrow-array/src/array/byte_view_array.rs
   index b749459f9f..8605eb8108 100644
   --- a/arrow-array/src/array/byte_view_array.rs
   +++ b/arrow-array/src/array/byte_view_array.rs
   @@ -21,7 +21,7 @@ use crate::iterator::ArrayIter;
    use crate::types::bytes::ByteArrayNativeType;
    use crate::types::{BinaryViewType, ByteViewType, StringViewType};
    use crate::{Array, ArrayAccessor, ArrayRef, GenericByteArray, 
OffsetSizeTrait, Scalar};
   -use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, ScalarBuffer};
   +use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, NullBufferBuilder, 
ScalarBuffer};
    use arrow_data::{ArrayData, ArrayDataBuilder, ByteView, 
MAX_INLINE_VIEW_LEN};
    use arrow_schema::{ArrowError, DataType};
    use core::str;
   @@ -474,27 +474,65 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> 
{
        /// feature see  [`GenericByteViewBuilder::with_deduplicate_strings`].
        pub fn gc(&self) -> Self {
            let len = self.len();
   -        let mut builder = GenericByteViewBuilder::<T>::with_capacity(len);
            let views = self.views();
    
   +        let mut total_large_bytes = 0;
   +        for i in 0..len {
   +            if !self.is_null(i) {
   +                let length = views[i] as u32;
   +                if length > MAX_INLINE_VIEW_LEN {
   +                    total_large_bytes += length as usize;
   +                }
   +            }
   +        }
   +
   +        let mut data_buf = Vec::with_capacity(total_large_bytes);
   +        let mut views_buf = Vec::with_capacity(len);
   +        let mut null_builder = NullBufferBuilder::new(len);
   +
            for i in 0..len {
                if self.is_null(i) {
   -                builder.append_null();
   +                // null
   +                views_buf.push(0);
   +                null_builder.append_null();
                    continue;
                }
    
                let native: &T::Native = unsafe { self.value_unchecked(i) };
   -            let bytes: &[u8] = native.as_ref();
   +            let v: &[u8] = native.as_ref();
   +            let length = v.len() as u32;
    
   -            let length = views[i] as u32;
                if length <= MAX_INLINE_VIEW_LEN {
   -                builder.append_inlined(bytes, length);
   +                let mut view_bytes = [0u8; 16];
   +                view_bytes[0..4].copy_from_slice(&length.to_le_bytes());
   +                view_bytes[4..4 + v.len()].copy_from_slice(v);
   +                views_buf.push(u128::from_le_bytes(view_bytes));
                } else {
   -                builder.append_bytes(bytes, length);
   +                let offset = data_buf.len() as u32;
   +                data_buf.extend_from_slice(v);
   +
   +                let prefix = 
u32::from_le_bytes(v[0..4].try_into().unwrap());
   +                let bv = ByteView {
   +                    length,
   +                    prefix,
   +                    buffer_index: 0,
   +                    offset,
   +                };
   +                views_buf.push(bv.into());
                }
   +
   +            null_builder.append_non_null();
            }
    
   -        builder.finish()
   +        let data_block = Buffer::from_vec(data_buf);
   +        let nulls = null_builder.finish();
   +        unsafe {
   +            GenericByteViewArray::new_unchecked(
   +                ScalarBuffer::new(Buffer::from_slice_ref(&views_buf), 0, 
len),
   +                vec![data_block],
   +                nulls,
   +            )
   +        }
        }
    
        /// Returns the total number of bytes used by all non inlined views in 
all
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to