This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new c096172b76 Track the memory usage of custom allocations so that their
size can be reported via Array::get_buffer_memory_size (#5347)
c096172b76 is described below
commit c096172b769da8003ea7816086df60f38229a891
Author: Jörn Horstmann <[email protected]>
AuthorDate: Wed Jan 31 11:35:57 2024 +0100
Track the memory usage of custom allocations so that their size can be
reported via Array::get_buffer_memory_size (#5347)
---
arrow-array/src/array/mod.rs | 13 +++++++++++++
arrow-buffer/src/alloc/mod.rs | 21 ++++++++++++++++++---
arrow-buffer/src/buffer/immutable.rs | 2 +-
arrow-buffer/src/buffer/scalar.rs | 2 +-
arrow-buffer/src/bytes.rs | 13 +++++++------
5 files changed, 40 insertions(+), 11 deletions(-)
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index f19406c161..1a58598543 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -272,6 +272,8 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// Returns the total number of bytes of memory pointed to by this array.
/// The buffers store bytes in the Arrow memory format, and include the
data as well as the validity map.
+ /// Note that this does not always correspond to the exact memory usage of
an array,
+ /// since multiple arrays can share the same buffers or slices thereof.
fn get_buffer_memory_size(&self) -> usize;
/// Returns the total number of bytes of memory occupied physically by
this array.
@@ -934,6 +936,17 @@ mod tests {
);
}
+ #[test]
+ fn test_memory_size_primitive_sliced() {
+ let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
+ let slice1 = arr.slice(0, 64);
+ let slice2 = arr.slice(64, 64);
+
+ // both slices report the full buffer memory usage, even though the
buffers are shared
+ assert_eq!(slice1.get_array_memory_size(),
arr.get_array_memory_size());
+ assert_eq!(slice2.get_array_memory_size(),
arr.get_array_memory_size());
+ }
+
#[test]
fn test_memory_size_primitive_nullable() {
let arr: PrimitiveArray<Int64Type> = (0..128)
diff --git a/arrow-buffer/src/alloc/mod.rs b/arrow-buffer/src/alloc/mod.rs
index a3cb6253f3..d7108d2969 100644
--- a/arrow-buffer/src/alloc/mod.rs
+++ b/arrow-buffer/src/alloc/mod.rs
@@ -38,7 +38,9 @@ pub(crate) enum Deallocation {
Standard(Layout),
/// An allocation from an external source like the FFI interface
/// Deallocation will happen on `Allocation::drop`
- Custom(Arc<dyn Allocation>),
+ /// The size of the allocation is tracked here separately only
+ /// for memory usage reporting via `Array::get_buffer_memory_size`
+ Custom(Arc<dyn Allocation>, usize),
}
impl Debug for Deallocation {
@@ -47,9 +49,22 @@ impl Debug for Deallocation {
Deallocation::Standard(layout) => {
write!(f, "Deallocation::Standard {layout:?}")
}
- Deallocation::Custom(_) => {
- write!(f, "Deallocation::Custom {{ capacity: unknown }}")
+ Deallocation::Custom(_, size) => {
+ write!(f, "Deallocation::Custom {{ capacity: {size} }}")
}
}
}
}
+
+#[cfg(test)]
+mod tests {
+ use crate::alloc::Deallocation;
+
+ #[test]
+ fn test_size_of_deallocation() {
+ assert_eq!(
+ std::mem::size_of::<Deallocation>(),
+ 3 * std::mem::size_of::<usize>()
+ );
+ }
+}
diff --git a/arrow-buffer/src/buffer/immutable.rs
b/arrow-buffer/src/buffer/immutable.rs
index 9db8732f36..f7fc7cffdc 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -124,7 +124,7 @@ impl Buffer {
len: usize,
owner: Arc<dyn Allocation>,
) -> Self {
- Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner))
+ Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner,
len))
}
/// Auxiliary method to create a new Buffer
diff --git a/arrow-buffer/src/buffer/scalar.rs
b/arrow-buffer/src/buffer/scalar.rs
index f1c2ae7857..3826d74e43 100644
--- a/arrow-buffer/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -134,7 +134,7 @@ impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
is_aligned,
"Memory pointer is not aligned with the specified scalar type"
),
- Deallocation::Custom(_) =>
+ Deallocation::Custom(_, _) =>
assert!(is_aligned, "Memory pointer from external source (e.g,
FFI) is not aligned with the specified scalar type. Before importing buffer
through FFI, please make sure the allocation is aligned."),
}
diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs
index 81860b6048..ba61342d8e 100644
--- a/arrow-buffer/src/bytes.rs
+++ b/arrow-buffer/src/bytes.rs
@@ -90,9 +90,9 @@ impl Bytes {
pub fn capacity(&self) -> usize {
match self.deallocation {
Deallocation::Standard(layout) => layout.size(),
- // we cannot determine this in general,
- // and thus we state that this is externally-owned memory
- Deallocation::Custom(_) => 0,
+ // we only know the size of the custom allocation
+ // its underlying capacity might be larger
+ Deallocation::Custom(_, size) => size,
}
}
@@ -116,7 +116,7 @@ impl Drop for Bytes {
_ => unsafe { std::alloc::dealloc(self.ptr.as_ptr(), *layout)
},
},
// The automatic drop implementation will free the memory once the
reference count reaches zero
- Deallocation::Custom(_allocation) => (),
+ Deallocation::Custom(_allocation, _size) => (),
}
}
}
@@ -147,10 +147,11 @@ impl Debug for Bytes {
impl From<bytes::Bytes> for Bytes {
fn from(value: bytes::Bytes) -> Self {
+ let len = value.len();
Self {
- len: value.len(),
+ len,
ptr: NonNull::new(value.as_ptr() as _).unwrap(),
- deallocation: Deallocation::Custom(std::sync::Arc::new(value)),
+ deallocation: Deallocation::Custom(std::sync::Arc::new(value),
len),
}
}
}