This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new f2ac2c6afb Remove custom doubling strategy + add examples to
`VecAllocEx` (#9058)
f2ac2c6afb is described below
commit f2ac2c6afbbe1a1af2e721f87558439a1e8099df
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jan 31 11:42:53 2024 -0500
Remove custom doubling strategy + add examples to `VecAllocEx` (#9058)
* Add examples to VecAllocExt and remove custom doubling strategy
* Add comment and add another example of using accounted push
---
datafusion/execution/src/memory_pool/proxy.rs | 66 +++++++++++++++++++++++----
1 file changed, 56 insertions(+), 10 deletions(-)
diff --git a/datafusion/execution/src/memory_pool/proxy.rs
b/datafusion/execution/src/memory_pool/proxy.rs
index 75e6de2715..29874fdaed 100644
--- a/datafusion/execution/src/memory_pool/proxy.rs
+++ b/datafusion/execution/src/memory_pool/proxy.rs
@@ -28,6 +28,35 @@ pub trait VecAllocExt {
/// `accounting` by any newly allocated bytes.
///
/// Note that allocation counts capacity, not size
+ ///
+ /// # Example:
+ /// ```
+ /// # use datafusion_execution::memory_pool::proxy::VecAllocExt;
+ /// // use allocated to incrementally track how much memory is allocated
in the vec
+ /// let mut allocated = 0;
+ /// let mut vec = Vec::new();
+ /// // Push data into the vec and the accounting will be updated to reflect
+ /// // memory allocation
+ /// vec.push_accounted(1, &mut allocated);
+ /// assert_eq!(allocated, 16); // space for 4 u32s
+ /// vec.push_accounted(1, &mut allocated);
+ /// assert_eq!(allocated, 16); // no new allocation needed
+ ///
+ /// // push more data into the vec
+ /// for _ in 0..10 { vec.push_accounted(1, &mut allocated); }
+ /// assert_eq!(allocated, 64); // underlying vec has space for 10 u32s
+ /// assert_eq!(vec.allocated_size(), 64);
+ /// ```
+ /// # Example with other allocations:
+ /// ```
+ /// # use datafusion_execution::memory_pool::proxy::VecAllocExt;
+ /// // You can use the same allocated size to track memory allocated by
+ /// // another source. For example
+ /// let mut allocated = 27;
+ /// let mut vec = Vec::new();
+ /// vec.push_accounted(1, &mut allocated); // allocates 16 bytes for vec
+ /// assert_eq!(allocated, 43); // 16 bytes for vec, 27 bytes for other
+ /// ```
fn push_accounted(&mut self, x: Self::T, accounting: &mut usize);
/// Return the amount of memory allocated by this Vec to store elements
@@ -36,6 +65,22 @@ pub trait VecAllocExt {
/// Note this calculation is not recursive, and does not include any heap
/// allocations contained within the Vec's elements. Does not include the
/// size of `self`
+ ///
+ /// # Example:
+ /// ```
+ /// # use datafusion_execution::memory_pool::proxy::VecAllocExt;
+ /// let mut vec = Vec::new();
+ /// // Push data into the vec and the accounting will be updated to reflect
+ /// // memory allocation
+ /// vec.push(1);
+ /// assert_eq!(vec.allocated_size(), 16); // space for 4 u32s
+ /// vec.push(1);
+ /// assert_eq!(vec.allocated_size(), 16); // no new allocation needed
+ ///
+ /// // push more data into the vec
+ /// for _ in 0..10 { vec.push(1); }
+ /// assert_eq!(vec.allocated_size(), 64); // space for 64 now
+ /// ```
fn allocated_size(&self) -> usize;
}
@@ -43,17 +88,18 @@ impl<T> VecAllocExt for Vec<T> {
type T = T;
fn push_accounted(&mut self, x: Self::T, accounting: &mut usize) {
- if self.capacity() == self.len() {
- // allocate more
-
- // growth factor: 2, but at least 2 elements
- let bump_elements = (self.capacity() * 2).max(2);
- let bump_size = std::mem::size_of::<u32>() * bump_elements;
- self.reserve(bump_elements);
+ let prev_capacty = self.capacity();
+ self.push(x);
+ let new_capacity = self.capacity();
+ if new_capacity > prev_capacty {
+ // capacity changed, so we allocated more
+ let bump_size = (new_capacity - prev_capacty) *
std::mem::size_of::<T>();
+ // Note multiplication should never overflow because `push` would
+ // have panic'd first, but the checked_add could potentially
+ // overflow since accounting could be tracking additional values,
and
+ // could be greater than what is stored in the Vec
*accounting =
(*accounting).checked_add(bump_size).expect("overflow");
}
-
- self.push(x);
}
fn allocated_size(&self) -> usize {
std::mem::size_of::<T>() * self.capacity()
@@ -69,7 +115,7 @@ pub trait RawTableAllocExt {
/// `accounting` by any newly allocated bytes.
///
/// Returns the bucket where the element was inserted.
- /// Note that allocation counts capacity, not size.
+ /// Note that allocation counts capacity, not size.
///
/// # Example:
/// ```