This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new ba02ab9b33 feat(memory-tracking): expose API to NullBuffer, ArrayData,
and Array (#8918)
ba02ab9b33 is described below
commit ba02ab9b339480241de32b90a372fd443bf3ab5b
Author: Filippo <[email protected]>
AuthorDate: Wed Mar 11 18:59:51 2026 +0100
feat(memory-tracking): expose API to NullBuffer, ArrayData, and Array
(#8918)
# Which issue does this PR close?
Part of #8137. Follow up of #7303. Replaces #8040.
# Rationale for this change
#7303 implements the fundamental symbols for tracking memory. This patch
exposes those APIs to a higher level Array and ArrayData.
# What changes are included in this PR?
New `claim` API for NullBuffer, ArrayData, and Array. New `pool`
feature-flag to arrow, arrow-array, and arrow-data.
# Are these changes tested?
Added a doctest on the `Array::claim` method.
# Are there any user-facing changes?
Added API and a new feature-flag for arrow, arrow-array, and arrow-data.
---
arrow-array/Cargo.toml | 2 +
arrow-array/src/array/boolean_array.rs | 8 +++
arrow-array/src/array/byte_array.rs | 9 +++
arrow-array/src/array/byte_view_array.rs | 11 ++++
arrow-array/src/array/dictionary_array.rs | 11 ++++
arrow-array/src/array/fixed_size_binary_array.rs | 8 +++
arrow-array/src/array/fixed_size_list_array.rs | 8 +++
arrow-array/src/array/list_array.rs | 9 +++
arrow-array/src/array/list_view_array.rs | 10 +++
arrow-array/src/array/map_array.rs | 9 +++
arrow-array/src/array/mod.rs | 79 ++++++++++++++++++++++++
arrow-array/src/array/null_array.rs | 5 ++
arrow-array/src/array/primitive_array.rs | 8 +++
arrow-array/src/array/run_array.rs | 11 ++++
arrow-array/src/array/struct_array.rs | 10 +++
arrow-array/src/array/union_array.rs | 11 ++++
arrow-buffer/Cargo.toml | 1 +
arrow-buffer/src/buffer/boolean.rs | 8 +++
arrow-buffer/src/buffer/null.rs | 9 ++-
arrow-buffer/src/buffer/offset.rs | 6 ++
arrow-buffer/src/buffer/run.rs | 6 ++
arrow-buffer/src/buffer/scalar.rs | 8 +++
arrow-data/Cargo.toml | 2 +
arrow-data/src/data.rs | 24 +++++++
arrow/Cargo.toml | 2 +
25 files changed, 274 insertions(+), 1 deletion(-)
diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml
index a046fea2b0..6be5a6daab 100644
--- a/arrow-array/Cargo.toml
+++ b/arrow-array/Cargo.toml
@@ -58,6 +58,8 @@ all-features = true
async = ["dep:futures"]
ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
force_validate = []
+# Enable memory tracking support
+pool = ["arrow-buffer/pool", "arrow-data/pool"]
[dev-dependencies]
rand = { version = "0.9", default-features = false, features = ["std",
"std_rng", "thread_rng"] }
diff --git a/arrow-array/src/array/boolean_array.rs
b/arrow-array/src/array/boolean_array.rs
index 65e19c80f8..582627b243 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -346,6 +346,14 @@ unsafe impl Array for BooleanArray {
fn get_array_memory_size(&self) -> usize {
std::mem::size_of::<Self>() + self.get_buffer_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.values.claim(pool);
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl ArrayAccessor for &BooleanArray {
diff --git a/arrow-array/src/array/byte_array.rs
b/arrow-array/src/array/byte_array.rs
index a54e9a5fc7..93924ac76b 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -525,6 +525,15 @@ unsafe impl<T: ByteArrayType> Array for
GenericByteArray<T> {
fn get_array_memory_size(&self) -> usize {
std::mem::size_of::<Self>() + self.get_buffer_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.value_offsets.claim(pool);
+ self.value_data.claim(pool);
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
diff --git a/arrow-array/src/array/byte_view_array.rs
b/arrow-array/src/array/byte_view_array.rs
index 0275b628e2..a4a319df64 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -897,6 +897,17 @@ unsafe impl<T: ByteViewType + ?Sized> Array for
GenericByteViewArray<T> {
fn get_array_memory_size(&self) -> usize {
std::mem::size_of::<Self>() + self.get_buffer_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.views.claim(pool);
+ for buffer in self.buffers.iter() {
+ buffer.claim(pool);
+ }
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a
GenericByteViewArray<T> {
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index 97e45cc5d6..0c465ec144 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -792,6 +792,12 @@ unsafe impl<T: ArrowDictionaryKeyType> Array for
DictionaryArray<T> {
+ self.keys.get_buffer_memory_size()
+ self.values.get_array_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.keys.claim(pool);
+ self.values.claim(pool);
+ }
}
impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
@@ -911,6 +917,11 @@ unsafe impl<K: ArrowDictionaryKeyType, V: Sync> Array for
TypedDictionaryArray<'
fn get_array_memory_size(&self) -> usize {
self.dictionary.get_array_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.dictionary.claim(pool);
+ }
}
impl<K, V> IntoIterator for TypedDictionaryArray<'_, K, V>
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs
b/arrow-array/src/array/fixed_size_binary_array.rs
index e3f08c066e..72e6d022a5 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -662,6 +662,14 @@ unsafe impl Array for FixedSizeBinaryArray {
fn get_array_memory_size(&self) -> usize {
std::mem::size_of::<Self>() + self.get_buffer_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.value_data.claim(pool);
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray {
diff --git a/arrow-array/src/array/fixed_size_list_array.rs
b/arrow-array/src/array/fixed_size_list_array.rs
index a3db33d61b..55a9fb9aa4 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -528,6 +528,14 @@ unsafe impl Array for FixedSizeListArray {
}
size
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.values.claim(pool);
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl super::ListLikeArray for FixedSizeListArray {
diff --git a/arrow-array/src/array/list_array.rs
b/arrow-array/src/array/list_array.rs
index d9613c6809..24f7774f2b 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -620,6 +620,15 @@ unsafe impl<OffsetSize: OffsetSizeTrait> Array for
GenericListArray<OffsetSize>
}
size
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.value_offsets.claim(pool);
+ self.values.claim(pool);
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl<OffsetSize: OffsetSizeTrait> super::ListLikeArray for
GenericListArray<OffsetSize> {
diff --git a/arrow-array/src/array/list_view_array.rs
b/arrow-array/src/array/list_view_array.rs
index eda3be11ac..75ff6117ee 100644
--- a/arrow-array/src/array/list_view_array.rs
+++ b/arrow-array/src/array/list_view_array.rs
@@ -486,6 +486,16 @@ unsafe impl<OffsetSize: OffsetSizeTrait> Array for
GenericListViewArray<OffsetSi
}
size
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.value_offsets.claim(pool);
+ self.value_sizes.claim(pool);
+ self.values.claim(pool);
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl<OffsetSize: OffsetSizeTrait> super::ListLikeArray for
GenericListViewArray<OffsetSize> {
diff --git a/arrow-array/src/array/map_array.rs
b/arrow-array/src/array/map_array.rs
index 07758d59bb..7a5fe0b468 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -430,6 +430,15 @@ unsafe impl Array for MapArray {
}
size
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.value_offsets.claim(pool);
+ self.entries.claim(pool);
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl ArrayAccessor for &MapArray {
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index ca3a02577f..e389b462fb 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -354,6 +354,75 @@ pub unsafe trait Array: std::fmt::Debug + Send + Sync {
/// This value will always be greater than returned by
`get_buffer_memory_size()` and
/// includes the overhead of the data structures that contain the pointers
to the various buffers.
fn get_array_memory_size(&self) -> usize;
+
+ /// Claim memory used by this array in the provided memory pool.
+ ///
+ /// This recursively claims memory for:
+ /// - All data buffers in this array
+ /// - All child arrays (for nested types like List, Struct, etc.)
+ /// - The null bitmap buffer if present
+ ///
+ /// This method guarantees that the memory pool will only compute occupied
memory
+ /// exactly once. For example, if this array is derived from operations
like `slice`,
+ /// calling `claim` on it would not change the memory pool's usage if the
underlying buffers
+ /// are already counted before.
+ ///
+ /// # Example
+ /// ```
+ /// # use arrow_array::{Int32Array, Array};
+ /// # use arrow_buffer::TrackingMemoryPool;
+ /// # use arrow_buffer::MemoryPool;
+ ///
+ /// let pool = TrackingMemoryPool::default();
+ ///
+ /// let small_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+ /// let small_array_size = small_array.get_buffer_memory_size();
+ ///
+ /// // Claim the array's memory in the pool
+ /// small_array.claim(&pool);
+ ///
+ /// // Create and claim slices of `small_array`; should not increase
memory usage
+ /// let slice1 = small_array.slice(0, 2);
+ /// let slice2 = small_array.slice(2, 2);
+ /// slice1.claim(&pool);
+ /// slice2.claim(&pool);
+ ///
+ /// assert_eq!(pool.used(), small_array_size);
+ ///
+ /// // Create a `large_array` which does not derive from the original
`small_array`
+ ///
+ /// let large_array = Int32Array::from((0..1000).collect::<Vec<i32>>());
+ /// let large_array_size = large_array.get_buffer_memory_size();
+ ///
+ /// large_array.claim(&pool);
+ ///
+ /// // Trying to claim more than once is a no-op
+ /// large_array.claim(&pool);
+ /// large_array.claim(&pool);
+ ///
+ /// assert_eq!(pool.used(), small_array_size + large_array_size);
+ ///
+ /// let sum_of_all_sizes = small_array_size + large_array_size +
slice1.get_buffer_memory_size() + slice2.get_buffer_memory_size();
+ ///
+ /// // `get_buffer_memory_size` works independently of the memory pool, so
a sum of all the
+ /// // arrays in scope will always be >= the memory used reported by the
memory pool.
+ /// assert_ne!(pool.used(), sum_of_all_sizes);
+ ///
+ /// // Until the final claim is dropped the buffer size remains accounted
for
+ /// drop(small_array);
+ /// drop(slice1);
+ ///
+ /// assert_eq!(pool.used(), small_array_size + large_array_size);
+ ///
+ /// // Dropping this finally releases the buffer that was backing
`small_array`
+ /// drop(slice2);
+ ///
+ /// assert_eq!(pool.used(), large_array_size);
+ /// ```
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.to_data().claim(pool)
+ }
}
/// A reference-counted reference to a generic `Array`
@@ -437,6 +506,11 @@ unsafe impl Array for ArrayRef {
fn get_array_memory_size(&self) -> usize {
self.as_ref().get_array_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.as_ref().claim(pool)
+ }
}
unsafe impl<T: Array> Array for &T {
@@ -507,6 +581,11 @@ unsafe impl<T: Array> Array for &T {
fn get_array_memory_size(&self) -> usize {
T::get_array_memory_size(self)
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ T::claim(self, pool)
+ }
}
/// A generic trait for accessing the values of an [`Array`]
diff --git a/arrow-array/src/array/null_array.rs
b/arrow-array/src/array/null_array.rs
index 00b30935d4..05dd114be7 100644
--- a/arrow-array/src/array/null_array.rs
+++ b/arrow-array/src/array/null_array.rs
@@ -133,6 +133,11 @@ unsafe impl Array for NullArray {
fn get_array_memory_size(&self) -> usize {
std::mem::size_of::<Self>()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, _pool: &dyn arrow_buffer::MemoryPool) {
+ // NullArray has no buffers to claim
+ }
}
impl From<ArrayData> for NullArray {
diff --git a/arrow-array/src/array/primitive_array.rs
b/arrow-array/src/array/primitive_array.rs
index d9c8ff66d0..b51f5f5186 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -1246,6 +1246,14 @@ unsafe impl<T: ArrowPrimitiveType> Array for
PrimitiveArray<T> {
fn get_array_memory_size(&self) -> usize {
std::mem::size_of::<Self>() + self.get_buffer_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.values.claim(pool);
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl<T: ArrowPrimitiveType> ArrayAccessor for &PrimitiveArray<T> {
diff --git a/arrow-array/src/array/run_array.rs
b/arrow-array/src/array/run_array.rs
index 4770bad05e..a3cb4565f4 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -375,6 +375,12 @@ unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
+ self.run_ends.inner().inner().capacity()
+ self.values.get_array_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.run_ends.claim(pool);
+ self.values.claim(pool);
+ }
}
impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
@@ -603,6 +609,11 @@ unsafe impl<R: RunEndIndexType, V: Sync> Array for
TypedRunArray<'_, R, V> {
fn get_array_memory_size(&self) -> usize {
self.run_array.get_array_memory_size()
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.run_array.claim(pool);
+ }
}
// Array accessor converts the index of logical array to the index of the
physical array
diff --git a/arrow-array/src/array/struct_array.rs
b/arrow-array/src/array/struct_array.rs
index b5f25fff18..da837ba16b 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -468,6 +468,16 @@ unsafe impl Array for StructArray {
}
size
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ for field in &self.fields {
+ field.claim(pool);
+ }
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+ }
}
impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
diff --git a/arrow-array/src/array/union_array.rs
b/arrow-array/src/array/union_array.rs
index 03d69a5845..5ba7b947c7 100644
--- a/arrow-array/src/array/union_array.rs
+++ b/arrow-array/src/array/union_array.rs
@@ -946,6 +946,17 @@ unsafe impl Array for UnionArray {
.sum::<usize>()
+ sum
}
+
+ #[cfg(feature = "pool")]
+ fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ self.type_ids.claim(pool);
+ if let Some(offsets) = &self.offsets {
+ offsets.claim(pool);
+ }
+ for field in self.fields.iter().flatten() {
+ field.claim(pool);
+ }
+ }
}
impl std::fmt::Debug for UnionArray {
diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml
index 02ea49c37c..1400c19863 100644
--- a/arrow-buffer/Cargo.toml
+++ b/arrow-buffer/Cargo.toml
@@ -36,6 +36,7 @@ bench = false
all-features = true
[features]
+# Enable memory tracking support
pool = []
[dependencies]
diff --git a/arrow-buffer/src/buffer/boolean.rs
b/arrow-buffer/src/buffer/boolean.rs
index f9148c7eb2..c1c7529e0a 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -489,6 +489,14 @@ impl BooleanBuffer {
self.buffer
}
+ /// Claim memory used by this buffer in the provided memory pool.
+ ///
+ /// See [`Buffer::claim`] for details.
+ #[cfg(feature = "pool")]
+ pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+ self.buffer.claim(pool);
+ }
+
/// Returns an iterator over the bits in this [`BooleanBuffer`]
pub fn iter(&self) -> BitIterator<'_> {
self.into_iter()
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index 97034a631e..6046369c62 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -26,7 +26,7 @@ use crate::{Buffer, MutableBuffer};
/// that it is null.
///
/// # See also
-/// * [`NullBufferBuilder`] for creating `NullBuffer`s
+/// * [`NullBufferBuilder`] for creating `NullBuffer`s
///
/// [Arrow specification]:
https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps
/// [`NullBufferBuilder`]: crate::NullBufferBuilder
@@ -231,6 +231,13 @@ impl NullBuffer {
let nb = NullBuffer::new(bb);
(nb.null_count() > 0).then_some(nb)
}
+
+ /// Claim memory used by this null buffer in the provided memory pool.
+ #[cfg(feature = "pool")]
+ pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+ // NullBuffer wraps a BooleanBuffer which wraps a Buffer
+ self.buffer.inner().claim(pool);
+ }
}
impl<'a> IntoIterator for &'a NullBuffer {
diff --git a/arrow-buffer/src/buffer/offset.rs
b/arrow-buffer/src/buffer/offset.rs
index 66fa7dd22e..bb34c8b238 100644
--- a/arrow-buffer/src/buffer/offset.rs
+++ b/arrow-buffer/src/buffer/offset.rs
@@ -220,6 +220,12 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
self.0
}
+ /// Claim memory used by this buffer in the provided memory pool.
+ #[cfg(feature = "pool")]
+ pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+ self.0.claim(pool);
+ }
+
/// Returns a zero-copy slice of this buffer with length `len` and
starting at `offset`
pub fn slice(&self, offset: usize, len: usize) -> Self {
Self(self.0.slice(offset, len.saturating_add(1)))
diff --git a/arrow-buffer/src/buffer/run.rs b/arrow-buffer/src/buffer/run.rs
index 0f4d9234e4..703ae91380 100644
--- a/arrow-buffer/src/buffer/run.rs
+++ b/arrow-buffer/src/buffer/run.rs
@@ -294,6 +294,12 @@ where
self.run_ends
}
+ /// Claim memory used by this buffer in the provided memory pool.
+ #[cfg(feature = "pool")]
+ pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+ self.run_ends.claim(pool);
+ }
+
/// Returns the physical indices corresponding to the provided logical
indices.
///
/// Given a slice of logical indices, this method returns a `Vec`
containing the
diff --git a/arrow-buffer/src/buffer/scalar.rs
b/arrow-buffer/src/buffer/scalar.rs
index 3c5334ca51..f74b93ab89 100644
--- a/arrow-buffer/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -126,6 +126,14 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
self.buffer
}
+ /// Claim memory used by this buffer in the provided memory pool.
+ ///
+ /// See [`Buffer::claim`] for details.
+ #[cfg(feature = "pool")]
+ pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+ self.buffer.claim(pool);
+ }
+
/// Returns true if this [`ScalarBuffer`] is equal to `other`, using
pointer comparisons
/// to determine buffer equality. This is cheaper than `PartialEq::eq` but
may
/// return false when the arrays are logically equal
diff --git a/arrow-data/Cargo.toml b/arrow-data/Cargo.toml
index 9c7a5206b2..9f1b50ed14 100644
--- a/arrow-data/Cargo.toml
+++ b/arrow-data/Cargo.toml
@@ -39,6 +39,8 @@ bench = false
force_validate = []
# Enable ffi support
ffi = ["arrow-schema/ffi"]
+# Enable memory tracking support
+pool = ["arrow-buffer/pool"]
[package.metadata.docs.rs]
all-features = true
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 21cf4e5b5e..a5a64dfe9f 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -1659,6 +1659,30 @@ impl ArrayData {
pub fn into_builder(self) -> ArrayDataBuilder {
self.into()
}
+
+ /// Claim memory used by this ArrayData in the provided memory pool.
+ ///
+ /// This claims memory for:
+ /// - All buffers in self.buffers
+ /// - All child ArrayData recursively
+ /// - The null buffer if present
+ #[cfg(feature = "pool")]
+ pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+ // Claim all data buffers
+ for buffer in &self.buffers {
+ buffer.claim(pool);
+ }
+
+ // Claim null buffer if present
+ if let Some(nulls) = &self.nulls {
+ nulls.claim(pool);
+ }
+
+ // Recursively claim child data
+ for child in &self.child_data {
+ child.claim(pool);
+ }
+ }
}
/// Return the expected [`DataTypeLayout`] Arrays of this data
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 137d785eee..8e56457ff0 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -82,6 +82,8 @@ force_validate = ["arrow-array/force_validate",
"arrow-data/force_validate"]
ffi = ["arrow-schema/ffi", "arrow-data/ffi", "arrow-array/ffi"]
chrono-tz = ["arrow-array/chrono-tz"]
canonical_extension_types = ["arrow-schema/canonical_extension_types"]
+# Enable memory tracking support
+pool = ["arrow-array/pool"]
[dev-dependencies]
chrono = { workspace = true }