alamb commented on code in PR #22907:
URL: https://github.com/apache/datafusion/pull/22907#discussion_r3403533460
##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs:
##########
@@ -230,25 +203,33 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
where
B: ByteViewType,
{
- let value: &[u8] = array.value(row).as_ref();
+ // SAFETY: the caller ensures `row` is valid
+ let view = unsafe { *array.views().get_unchecked(row) };
+ let len = view as u32;
- let value_len = value.len();
- let view = if value_len <= 12 {
- make_view(value, 0, 0)
+ if len <= 12 {
+ // Inline value: the view is already self-contained, push as-is.
+ self.views.push(view);
} else {
- // Ensure big enough block to hold the value firstly
- self.ensure_in_progress_big_enough(value_len);
-
- // Append value
- let buffer_index = self.completed.len();
- let offset = self.in_progress.len();
- self.in_progress.extend_from_slice(value);
-
- make_view(value, buffer_index as u32, offset as u32)
- };
-
- // Append view
- self.views.push(view);
+ // Non-inline value: copy the buffer data and construct a new view
+ // that points into our own buffers, reusing the source prefix.
+ let src = ByteView::from(view);
+ self.ensure_in_progress_big_enough(len as usize);
+ let new_buffer_index = self.completed.len() as u32;
+ let new_offset = self.in_progress.len() as u32;
+ let src_buf = &array.data_buffers()[src.buffer_index as usize];
+ self.in_progress.extend_from_slice(
+ &src_buf[src.offset as usize..(src.offset + src.length) as
usize],
+ );
+ let new_view = ByteView {
Review Comment:
I am not sure if it matters, but you could make this more consice by just
modifying buffer_index and offset
```rust
// Update buffer/offset and copy to output
view.buffer_index = new_buffer_index;
view.offset = new_offset;
self.views.push(view.as_u128);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]