This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new a110004b3 feat: `{Field,DataType}::size` (#3149)
a110004b3 is described below
commit a110004b3d9f30358c22ac917fcad3745ea2460c
Author: Marco Neumann <[email protected]>
AuthorDate: Tue Nov 22 15:58:51 2022 +0000
feat: `{Field,DataType}::size` (#3149)
Add a way to calculate in-memory size of `Field` and `DataType`.
Closes #3147.
---
arrow-schema/src/datatype.rs | 50 ++++++++++++++++++++++++++++++++++++++++++++
arrow-schema/src/field.rs | 15 +++++++++++++
2 files changed, 65 insertions(+)
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 572d6f67d..b9be4bec7 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -372,6 +372,56 @@ impl DataType {
_ => self == other,
}
}
+
+ /// Return size of this instance in bytes.
+ ///
+ /// Includes the size of `Self`.
+ pub fn size(&self) -> usize {
+ std::mem::size_of_val(self)
+ + match self {
+ DataType::Null
+ | DataType::Boolean
+ | DataType::Int8
+ | DataType::Int16
+ | DataType::Int32
+ | DataType::Int64
+ | DataType::UInt8
+ | DataType::UInt16
+ | DataType::UInt32
+ | DataType::UInt64
+ | DataType::Float16
+ | DataType::Float32
+ | DataType::Float64
+ | DataType::Date32
+ | DataType::Date64
+ | DataType::Time32(_)
+ | DataType::Time64(_)
+ | DataType::Duration(_)
+ | DataType::Interval(_)
+ | DataType::Binary
+ | DataType::FixedSizeBinary(_)
+ | DataType::LargeBinary
+ | DataType::Utf8
+ | DataType::LargeUtf8
+ | DataType::Decimal128(_, _)
+ | DataType::Decimal256(_, _) => 0,
+ DataType::Timestamp(_, s) => {
+ s.as_ref().map(|s| s.capacity()).unwrap_or_default()
+ }
+ DataType::List(field)
+ | DataType::FixedSizeList(field, _)
+ | DataType::LargeList(field)
+ | DataType::Map(field, _) => field.size(),
+ DataType::Struct(fields) | DataType::Union(fields, _, _) => {
+ fields
+ .iter()
+ .map(|field| field.size() -
std::mem::size_of_val(field))
+ .sum::<usize>()
+ + (std::mem::size_of::<Field>() * fields.capacity())
+ }
+ DataType::Dictionary(dt1, dt2) => dt1.size() + dt2.size(),
+ }
+ }
}
#[cfg(test)]
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index 9eed03ed2..5813902dd 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -455,6 +455,21 @@ impl Field {
}
}
}
+
+ /// Return size of this instance in bytes.
+ ///
+ /// Includes the size of `Self`.
+ pub fn size(&self) -> usize {
+ std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
+ + self.data_type.size()
+ + self.name.capacity()
+ + (std::mem::size_of::<(String, String)>() *
self.metadata.capacity())
+ + self
+ .metadata
+ .iter()
+ .map(|(k, v)| k.capacity() + v.capacity())
+ .sum::<usize>()
+ }
}
// TODO: improve display with crate https://crates.io/crates/derive_more ?