Dandandan commented on code in PR #7614:
URL: https://github.com/apache/arrow-rs/pull/7614#discussion_r2133670542
##########
arrow-select/src/coalesce.rs:
##########
@@ -264,42 +268,73 @@ fn gc_string_view_batch(batch: &RecordBatch) ->
RecordBatch {
})
.sum();
let actual_buffer_size = s.get_buffer_memory_size();
+ let buffers = s.data_buffers();
// Re-creating the array copies data and can be time consuming.
// We only do it if the array is sparse
if actual_buffer_size > (ideal_buffer_size * 2) {
+ if ideal_buffer_size == 0 {
+ // If the ideal buffer size is 0, all views are inlined
+ // so just reuse the views
+ return Arc::new(unsafe {
+ StringViewArray::new_unchecked(
+ s.views().clone(),
+ vec![],
+ s.nulls().cloned(),
+ )
+ });
+ }
// We set the block size to `ideal_buffer_size` so that the
new StringViewArray only has one buffer, which accelerate later concat_batches.
// See https://github.com/apache/arrow-rs/issues/6094 for more
details.
- let mut builder = StringViewBuilder::with_capacity(s.len());
- if ideal_buffer_size > 0 {
- builder = builder.with_fixed_block_size(ideal_buffer_size
as u32);
- }
-
- for v in s.iter() {
- builder.append_option(v);
- }
-
- let gc_string = builder.finish();
-
- debug_assert!(gc_string.data_buffers().len() <= 1); // buffer
count can be 0 if the `ideal_buffer_size` is 0
+ let mut buffer: Vec<u8> =
Vec::with_capacity(ideal_buffer_size);
+
+ let views: Vec<u128> = s
+ .views()
+ .as_ref()
+ .iter()
+ .cloned()
+ .map(|v| {
+ let mut b: ByteView = ByteView::from(v);
+
+ if b.length > 12 {
+ let offset = buffer.len() as u32;
+ buffer.extend_from_slice(
+ buffers[b.buffer_index as usize]
+ .get(b.offset as usize..b.offset as usize
+ b.length as usize)
+ .expect("Invalid buffer slice"),
+ );
+ b.offset = offset;
+ b.buffer_index = 0; // Set buffer index to 0, as
we only have one buffer
+ }
+
+ b.into()
+ })
+ .collect();
Review Comment:
No that is generally worse - `.iter().collect()` on rust core types
optimize to better code:
* it knows the exact final length based on slice and does some extra
optimizations based on that (`TrustedLen`
https://doc.rust-lang.org/std/iter/trait.TrustedLen.html)
* `push` generally needs to check whether the item fits capacity on each
call (so adds some branching).
the `cloned()` you see on `iter().cloned()` will generally not make a
difference on primitive types for the generated code.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]