Re: [PR] perf(physical-plan): optimize byte view append [datafusion]

via GitHub Sun, 19 Apr 2026 07:22:51 -0700


alamb commented on code in PR #21586:
URL: https://github.com/apache/datafusion/pull/21586#discussion_r3106965696



##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs:
##########
@@ -30,6 +30,12 @@ use std::sync::Arc;
 
 const BYTE_VIEW_MAX_BLOCK_SIZE: usize = 2 * 1024 * 1024;
 
+#[derive(Clone, Copy)]

Review Comment:
   I think it would help to add some explaining what this is and what it is 
used for so a future reader doesn't have to read the rest of the code to figure 
it out



##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs:
##########
@@ -179,6 +181,133 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
         }
     }
 
+    fn vectorized_append_non_null_views(
+        &mut self,
+        array: &GenericByteViewArray<B>,
+        rows: &[usize],
+    ) {
+        let source_views = array.views();
+        self.views.reserve(rows.len());
+
+        if array.data_buffers().is_empty() {
+            self.views.extend(rows.iter().map(|&row| source_views[row]));
+            return;
+        }
+
+        let start_idx = self.views.len();
+        let mut pending = Vec::with_capacity(rows.len().saturating_add(1) / 2);

Review Comment:
   You could probably save this allocation by using some sort of temp  scratch 
space on ByteViewGroupValueBuilder
   
   Something lik
   
   ```rust
   pub struct ByteViewGroupValueBuilder<B: ByteViewType> {
   ...
     scratch: Vec<PendingByteViewCopy>,
   ```
   }
   
   So then it wouldn't reallocate on each batch



##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs:
##########
@@ -179,6 +181,133 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
         }
     }
 
+    fn vectorized_append_non_null_views(
+        &mut self,
+        array: &GenericByteViewArray<B>,
+        rows: &[usize],
+    ) {
+        let source_views = array.views();
+        self.views.reserve(rows.len());
+
+        if array.data_buffers().is_empty() {
+            self.views.extend(rows.iter().map(|&row| source_views[row]));
+            return;
+        }
+
+        let start_idx = self.views.len();
+        let mut pending = Vec::with_capacity(rows.len().saturating_add(1) / 2);
+        for (idx, &row) in rows.iter().enumerate() {
+            let view = source_views[row];
+            self.views.push(view);
+            if (view as u32) > 12 {
+                pending.push(PendingByteViewCopy {
+                    dest_index: start_idx + idx,
+                    source: ByteView::from(view),
+                });
+            }
+        }
+
+        self.batch_copy_long_views(array.data_buffers(), &pending);
+    }
+
+    fn vectorized_append_views_with_nulls(
+        &mut self,
+        array: &GenericByteViewArray<B>,
+        rows: &[usize],
+    ) {
+        let source_views = array.views();
+        let mut pending = Vec::with_capacity(rows.len().saturating_add(1) / 2);
+        self.views.reserve(rows.len());
+
+        for &row in rows {
+            if array.is_null(row) {
+                self.nulls.append(true);
+                self.views.push(0);
+                continue;
+            }
+
+            self.nulls.append(false);
+
+            let view = source_views[row];
+            let dest_index = self.views.len();
+            self.views.push(view);
+
+            if (view as u32) > 12 {
+                pending.push(PendingByteViewCopy {
+                    dest_index,
+                    source: ByteView::from(view),
+                });
+            }
+        }
+
+        self.batch_copy_long_views(array.data_buffers(), &pending);
+    }
+
+    fn batch_copy_long_views(

Review Comment:
   it would help me to understand this if it has some more comments explaining 
the rationale for postponing the copy (to detect contiguous views?)



##########
datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs:
##########
@@ -179,6 +181,133 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
         }
     }
 
+    fn vectorized_append_non_null_views(
+        &mut self,
+        array: &GenericByteViewArray<B>,
+        rows: &[usize],
+    ) {
+        let source_views = array.views();
+        self.views.reserve(rows.len());
+
+        if array.data_buffers().is_empty() {
+            self.views.extend(rows.iter().map(|&row| source_views[row]));
+            return;
+        }
+
+        let start_idx = self.views.len();
+        let mut pending = Vec::with_capacity(rows.len().saturating_add(1) / 2);
+        for (idx, &row) in rows.iter().enumerate() {
+            let view = source_views[row];
+            self.views.push(view);
+            if (view as u32) > 12 {

Review Comment:
   we culd replace 12 with MAX_INLINE_VIEW_LEN maybe to make it clearer what is 
going on here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] perf(physical-plan): optimize byte view append [datafusion]

Reply via email to