Dandandan commented on code in PR #7513:
URL: https://github.com/apache/arrow-rs/pull/7513#discussion_r2122243611


##########
arrow-array/src/builder/generic_bytes_view_builder.rs:
##########
@@ -406,6 +459,122 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
         };
         buffer_size + in_progress + tracker + views + null
     }
+
+    /// Append all views from the given array into the inprogress builder
+    ///
+    /// Will copy the underlying views based on the value of 
target_buffer_load_factor
+    pub fn append_array(&mut self, array: &GenericByteViewArray<T>) {
+        let num_rows = array.len();
+        if num_rows == 0 {
+            return; // nothing to do
+        }
+
+        let null_buffer_builder = &mut self.null_buffer_builder;
+        let views = &mut self.views_builder;
+
+        // Copy nulls
+        if let Some(nulls) = array.nulls() {
+            null_buffer_builder.append_buffer(nulls);
+        } else {
+            null_buffer_builder.append_n_non_nulls(array.len());
+        }
+
+        // Copy views from the source array
+        let starting_view = views.len();
+        views.append_slice(array.views());
+
+        // Safety we only appended views from array
+        unsafe {
+            self.finalize_copied_views(starting_view, array);
+        }
+    }
+
+    /// Finalizes the views and buffers of the array
+    ///
+    /// This must be called after appending views from `array` to the builder.
+    ///
+    /// The views from `array` will point to the old buffers. This function
+    /// updates all views starting at `starting_view` to point to the new
+    /// buffers or copies the values into a new buffer if the array is sparse.
+    ///
+    /// # Safety
+    ///
+    /// * self.views[starting_view..] must be valid views from `array`.
+    pub unsafe fn finalize_copied_views(
+        &mut self,
+        starting_view: usize,
+        array: &GenericByteViewArray<T>,
+    ) {
+        // Flush the in-progress buffer
+        self.flush_in_progress();
+
+        let buffers = &mut self.completed;
+        let views = &mut self.views_builder;
+
+        let mut used_buffer_size = 0;
+        let use_exising_buffers = match self.target_buffer_load_factor {
+            None => true,
+            Some(load_factor) => {
+                used_buffer_size = array.minimum_buffer_size();
+                let actual_buffer_size = array.get_buffer_memory_size();
+                // If the total size of the buffers is less than the load 
factor, copy them existing buffers
+                used_buffer_size >= (actual_buffer_size as f32 * load_factor) 
as usize
+            }
+        };
+
+        if use_exising_buffers {
+            let num_buffers_before: u32 = 
buffers.len().try_into().expect("buffer count overflow");
+            buffers.extend_from_slice(array.data_buffers()); //
+
+            // If there were no existing buffers, the views do not need to be 
updated
+            // as the buffers of `array` are the same
+            if num_buffers_before == 0 {
+                return;
+            }
+
+            // Update any views that point to the old buffers
+            for v in views.as_slice_mut()[starting_view..].iter_mut() {
+                let view_len = *v as u32;
+                // if view_len is 12 or less, data is inlined and doesn't need 
an update
+                // if view is 12 or more, need to update the buffer offset
+                if view_len > 12 {
+                    let mut view = ByteView::from(*v);
+                    let new_buffer_index = num_buffers_before + 
view.buffer_index;
+                    view.buffer_index = new_buffer_index;
+                    *v = view.into(); // update view
+                }
+            }
+        } else {
+            // otherwise the array is sparse so copy the data into a single new
+            // buffer as well as updating the views
+            let mut new_buffer: Vec<u8> = Vec::with_capacity(used_buffer_size);
+            let new_buffer_index = buffers.len() as u32; // making one new 
buffer
+                                                         // Update any views 
that point to the old buffers.
+            for v in views.as_slice_mut()[starting_view..].iter_mut() {

Review Comment:
   same - better to `extend`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to