This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new f2ac2c6afb Remove custom doubling strategy + add examples to 
`VecAllocEx` (#9058)
f2ac2c6afb is described below

commit f2ac2c6afbbe1a1af2e721f87558439a1e8099df
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jan 31 11:42:53 2024 -0500

    Remove custom doubling strategy + add examples to `VecAllocEx` (#9058)
    
    * Add examples to VecAllocExt and remove custom doubling strategy
    
    * Add comment and add another example of using accounted push
---
 datafusion/execution/src/memory_pool/proxy.rs | 66 +++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 10 deletions(-)

diff --git a/datafusion/execution/src/memory_pool/proxy.rs 
b/datafusion/execution/src/memory_pool/proxy.rs
index 75e6de2715..29874fdaed 100644
--- a/datafusion/execution/src/memory_pool/proxy.rs
+++ b/datafusion/execution/src/memory_pool/proxy.rs
@@ -28,6 +28,35 @@ pub trait VecAllocExt {
     /// `accounting` by any newly allocated bytes.
     ///
     /// Note that allocation counts  capacity, not size
+    ///
+    /// # Example:
+    /// ```
+    /// # use datafusion_execution::memory_pool::proxy::VecAllocExt;
+    /// // use allocated to incrementally track how much memory is allocated 
in the vec
+    /// let mut allocated = 0;
+    /// let mut vec = Vec::new();
+    /// // Push data into the vec and the accounting will be updated to reflect
+    /// // memory allocation
+    /// vec.push_accounted(1, &mut allocated);
+    /// assert_eq!(allocated, 16); // space for 4 u32s
+    /// vec.push_accounted(1, &mut allocated);
+    /// assert_eq!(allocated, 16); // no new allocation needed
+    ///
+    /// // push more data into the vec
+    /// for _ in 0..10 { vec.push_accounted(1, &mut allocated); }
+    /// assert_eq!(allocated, 64); // underlying vec has space for 10 u32s
+    /// assert_eq!(vec.allocated_size(), 64);
+    /// ```
+    /// # Example with other allocations:
+    /// ```
+    /// # use datafusion_execution::memory_pool::proxy::VecAllocExt;
+    /// // You can use the same allocated size to track memory allocated by
+    /// // another source. For example
+    /// let mut allocated = 27;
+    /// let mut vec = Vec::new();
+    /// vec.push_accounted(1, &mut allocated); // allocates 16 bytes for vec
+    /// assert_eq!(allocated, 43); // 16 bytes for vec, 27 bytes for other
+    /// ```
     fn push_accounted(&mut self, x: Self::T, accounting: &mut usize);
 
     /// Return the amount of memory allocated by this Vec to store elements
@@ -36,6 +65,22 @@ pub trait VecAllocExt {
     /// Note this calculation is not recursive, and does not include any heap
     /// allocations contained within the Vec's elements. Does not include the
     /// size of `self`
+    ///
+    /// # Example:
+    /// ```
+    /// # use datafusion_execution::memory_pool::proxy::VecAllocExt;
+    /// let mut vec = Vec::new();
+    /// // Push data into the vec and the accounting will be updated to reflect
+    /// // memory allocation
+    /// vec.push(1);
+    /// assert_eq!(vec.allocated_size(), 16); // space for 4 u32s
+    /// vec.push(1);
+    /// assert_eq!(vec.allocated_size(), 16); // no new allocation needed
+    ///
+    /// // push more data into the vec
+    /// for _ in 0..10 { vec.push(1); }
+    /// assert_eq!(vec.allocated_size(), 64); // space for 64 now
+    /// ```
     fn allocated_size(&self) -> usize;
 }
 
@@ -43,17 +88,18 @@ impl<T> VecAllocExt for Vec<T> {
     type T = T;
 
     fn push_accounted(&mut self, x: Self::T, accounting: &mut usize) {
-        if self.capacity() == self.len() {
-            // allocate more
-
-            // growth factor: 2, but at least 2 elements
-            let bump_elements = (self.capacity() * 2).max(2);
-            let bump_size = std::mem::size_of::<u32>() * bump_elements;
-            self.reserve(bump_elements);
+        let prev_capacty = self.capacity();
+        self.push(x);
+        let new_capacity = self.capacity();
+        if new_capacity > prev_capacty {
+            // capacity changed, so we allocated more
+            let bump_size = (new_capacity - prev_capacty) * 
std::mem::size_of::<T>();
+            // Note multiplication should never overflow because `push` would
+            // have panic'd first, but the checked_add could potentially
+            // overflow since accounting could be tracking additional values, 
and
+            // could be greater than what is stored in the Vec
             *accounting = 
(*accounting).checked_add(bump_size).expect("overflow");
         }
-
-        self.push(x);
     }
     fn allocated_size(&self) -> usize {
         std::mem::size_of::<T>() * self.capacity()
@@ -69,7 +115,7 @@ pub trait RawTableAllocExt {
     /// `accounting` by any newly allocated bytes.
     ///
     /// Returns the bucket where the element was inserted.
-    /// Note that allocation counts  capacity, not size.
+    /// Note that allocation counts capacity, not size.
     ///
     /// # Example:
     /// ```

Reply via email to