adriangb commented on code in PR #9138: URL: https://github.com/apache/arrow-rs/pull/9138#discussion_r2751327162
########## arrow-memory-size/src/lib.rs: ########## @@ -0,0 +1,688 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Memory size estimation utilities for Apache Arrow +//! +//! This crate provides the [`HeapSize`] trait for calculating heap memory usage +//! of data structures, with implementations for standard library types. +//! +//! For Arrow type implementations, see: +//! - [`arrow-buffer`](https://docs.rs/arrow-buffer) for buffer types +//! - [`arrow-array`](https://docs.rs/arrow-array) for array types +//! +//! # Example +//! +//! ``` +//! use arrow_memory_size::HeapSize; +//! +//! let v: Vec<String> = vec!["hello".to_string(), "world".to_string()]; +//! let heap_bytes = v.heap_size(); +//! let total_bytes = v.total_size(); +//! ``` + +#![doc( + html_logo_url = "https://raw.githubusercontent.com/apache/arrow-rs/refs/heads/main/docs/source/_static/images/Arrow-logo_hex_black-txt_transparent-bg.svg", + html_favicon_url = "https://raw.githubusercontent.com/apache/arrow-rs/refs/heads/main/docs/source/_static/images/Arrow-logo_hex_black-txt_transparent-bg.svg" +)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![warn(missing_docs)] + +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::sync::{Arc, Mutex, RwLock}; + +/// Trait for calculating the heap memory size of a value. +/// +/// This trait provides methods for calculating how much heap memory +/// a data structure has allocated. This is useful for memory tracking, +/// cache management, and debugging memory usage. +/// +/// # Semantics +/// +/// - [`heap_size`](HeapSize::heap_size): Returns only the bytes allocated on the heap +/// by this value, not including the size of the value itself. +/// - [`total_size`](HeapSize::total_size): Returns the total memory footprint including +/// both the stack size of the value and its heap allocations. +/// +/// # Example +/// +/// ``` +/// use arrow_memory_size::HeapSize; +/// +/// let s = String::from("hello"); +/// assert!(s.heap_size() >= 5); // At least 5 bytes for "hello" +/// assert!(s.total_size() >= s.heap_size() + std::mem::size_of::<String>()); +/// ``` +pub trait HeapSize { + /// Return the size of any bytes allocated on the heap by this object, + /// including heap memory in nested structures. + /// + /// Note that the size of the type itself is not included in the result -- + /// instead, that size is added by the caller (e.g. container) or via + /// [`total_size`](HeapSize::total_size). + fn heap_size(&self) -> usize; + + /// Return the total size of this object including heap allocations + /// and the size of the object itself. + fn total_size(&self) -> usize { Review Comment: Here is `total_size()` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
