alamb commented on code in PR #7463:
URL: https://github.com/apache/arrow-rs/pull/7463#discussion_r2073584836


##########
arrow-select/src/filter.rs:
##########
@@ -656,29 +653,44 @@ where
         (start, end, len)
     }
 
-    /// Extends the in-progress array by the indexes in the provided iterator
-    fn extend_idx(&mut self, iter: impl Iterator<Item = usize>) {
+    fn extend_offsets_idx(&mut self, iter: impl Iterator<Item = usize>) {
         self.dst_offsets.extend(iter.map(|idx| {
             let start = self.src_offsets[idx].as_usize();
             let end = self.src_offsets[idx + 1].as_usize();
             let len = OffsetSize::from_usize(end - start).expect("illegal 
offset range");
             self.cur_offset += len;
-            self.dst_values
-                .extend_from_slice(&self.src_values[start..end]);
+
             self.cur_offset
         }));
+        self.dst_values.reserve_exact(self.cur_offset.as_usize());
     }
 
-    /// Extends the in-progress array by the ranges in the provided iterator
-    fn extend_slices(&mut self, iter: impl Iterator<Item = (usize, usize)>) {
+    /// Extends the in-progress array by the indexes in the provided iterator
+    fn extend_idx(&mut self, iter: impl Iterator<Item = usize>) {
+        for idx in iter {
+            let start = self.src_offsets[idx].as_usize();
+            let end = self.src_offsets[idx + 1].as_usize();
+            self.dst_values
+                .extend_from_slice(&self.src_values[start..end]);
+        }
+    }
+
+    fn extend_offsets_slices(&mut self, iter: impl Iterator<Item = (usize, 
usize)>, count: usize) {
+        self.dst_offsets.reserve_exact(count);
         for (start, end) in iter {
             // These can only fail if `array` contains invalid data
             for idx in start..end {
                 let (_, _, len) = self.get_value_range(idx);
                 self.cur_offset += len;
-                self.dst_offsets.push(self.cur_offset); // push_unchecked?
+                self.dst_offsets.push(self.cur_offset);
             }
+        }
+        self.dst_values.reserve_exact(self.cur_offset.as_usize());

Review Comment:
   I think it would make sense for this call to reserve_exact to be a part of 
`extend_idx` (which is what actually extends the values). Otherwise I think it 
is quite tricky to understand that the performance of `extend_idx` is relying 
on calling `extend_offsts_slices` first
   
   Since they are now always called in a pair `extend_offsets_slices` and 
`extend_slices` I don't think this would change anything functionally but it 
would make the code easier for me to understand (even though you would also 
have to pass `count` to the extend_idx was well
   
   Another alternative might be to make a function `fn reserve_exact` that to 
make the allocations explicit



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to