This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a110004b3 feat: `{Field,DataType}::size` (#3149)
a110004b3 is described below

commit a110004b3d9f30358c22ac917fcad3745ea2460c
Author: Marco Neumann <[email protected]>
AuthorDate: Tue Nov 22 15:58:51 2022 +0000

    feat: `{Field,DataType}::size` (#3149)
    
    Add a way to calculate in-memory size of `Field` and `DataType`.
    
    Closes #3147.
---
 arrow-schema/src/datatype.rs | 50 ++++++++++++++++++++++++++++++++++++++++++++
 arrow-schema/src/field.rs    | 15 +++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 572d6f67d..b9be4bec7 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -372,6 +372,56 @@ impl DataType {
             _ => self == other,
         }
     }
+
+    /// Return size of this instance in bytes.
+    ///
+    /// Includes the size of `Self`.
+    pub fn size(&self) -> usize {
+        std::mem::size_of_val(self)
+            + match self {
+                DataType::Null
+                | DataType::Boolean
+                | DataType::Int8
+                | DataType::Int16
+                | DataType::Int32
+                | DataType::Int64
+                | DataType::UInt8
+                | DataType::UInt16
+                | DataType::UInt32
+                | DataType::UInt64
+                | DataType::Float16
+                | DataType::Float32
+                | DataType::Float64
+                | DataType::Date32
+                | DataType::Date64
+                | DataType::Time32(_)
+                | DataType::Time64(_)
+                | DataType::Duration(_)
+                | DataType::Interval(_)
+                | DataType::Binary
+                | DataType::FixedSizeBinary(_)
+                | DataType::LargeBinary
+                | DataType::Utf8
+                | DataType::LargeUtf8
+                | DataType::Decimal128(_, _)
+                | DataType::Decimal256(_, _) => 0,
+                DataType::Timestamp(_, s) => {
+                    s.as_ref().map(|s| s.capacity()).unwrap_or_default()
+                }
+                DataType::List(field)
+                | DataType::FixedSizeList(field, _)
+                | DataType::LargeList(field)
+                | DataType::Map(field, _) => field.size(),
+                DataType::Struct(fields) | DataType::Union(fields, _, _) => {
+                    fields
+                        .iter()
+                        .map(|field| field.size() - 
std::mem::size_of_val(field))
+                        .sum::<usize>()
+                        + (std::mem::size_of::<Field>() * fields.capacity())
+                }
+                DataType::Dictionary(dt1, dt2) => dt1.size() + dt2.size(),
+            }
+    }
 }
 
 #[cfg(test)]
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index 9eed03ed2..5813902dd 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -455,6 +455,21 @@ impl Field {
             }
         }
     }
+
+    /// Return size of this instance in bytes.
+    ///
+    /// Includes the size of `Self`.
+    pub fn size(&self) -> usize {
+        std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
+            + self.data_type.size()
+            + self.name.capacity()
+            + (std::mem::size_of::<(String, String)>() * 
self.metadata.capacity())
+            + self
+                .metadata
+                .iter()
+                .map(|(k, v)| k.capacity() + v.capacity())
+                .sum::<usize>()
+    }
 }
 
 // TODO: improve display with crate https://crates.io/crates/derive_more ?

Reply via email to