This is an automated email from the ASF dual-hosted git repository. tustvold pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push: new c26bb8107 Add Fields::contains (#4026) c26bb8107 is described below commit c26bb810700149e912b020048ed378e09e5267df Author: Raphael Taylor-Davies <1781103+tustv...@users.noreply.github.com> AuthorDate: Thu Apr 6 20:10:32 2023 +0100 Add Fields::contains (#4026) * Add Fields::contains * Fix typo --- arrow-schema/src/field.rs | 15 +++------------ arrow-schema/src/fields.rs | 17 +++++++++++++++++ arrow-schema/src/schema.rs | 20 ++++++++------------ 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index 1af157e4d..5edd5be7a 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -522,18 +522,9 @@ impl Field { // self need to be nullable or both of them are not nullable && (self.nullable || !other.nullable) // make sure self.metadata is a superset of other.metadata - && match (&self.metadata.is_empty(), &other.metadata.is_empty()) { - (_, true) => true, - (true, false) => false, - (false, false) => { - other.metadata().iter().all(|(k, v)| { - match self.metadata().get(k) { - Some(s) => s == v, - None => false - } - }) - } - } + && other.metadata.iter().all(|(k, v1)| { + self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default() + }) } /// Return size of this instance in bytes. diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs index 07dff2aae..b93735328 100644 --- a/arrow-schema/src/fields.rs +++ b/arrow-schema/src/fields.rs @@ -66,6 +66,23 @@ impl Fields { pub fn find(&self, name: &str) -> Option<(usize, &FieldRef)> { self.0.iter().enumerate().find(|(_, b)| b.name() == name) } + + /// Check to see if `self` is a superset of `other` + /// + /// In particular returns true if both have the same number of fields, and [`Field::contains`] + /// for each field across self and other + /// + /// In other words, any record that conforms to `other` should also conform to `self` + pub fn contains(&self, other: &Fields) -> bool { + if Arc::ptr_eq(&self.0, &other.0) { + return true; + } + self.len() == other.len() + && self + .iter() + .zip(other.iter()) + .all(|(a, b)| Arc::ptr_eq(a, b) || a.contains(b)) + } } impl Default for Fields { diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 2cc892f5a..a00e8a588 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -314,22 +314,18 @@ impl Schema { Some((idx, field.as_ref())) } - /// Check to see if `self` is a superset of `other` schema. Here are the comparison rules: + /// Check to see if `self` is a superset of `other` schema. /// - /// * `self` and `other` should contain the same number of fields - /// * for every field `f` in `other`, the field in `self` with corresponding index should be a - /// superset of `f`. - /// * self.metadata is a superset of other.metadata + /// In particular returns true if `self.metadata` is a superset of `other.metadata` + /// and [`Fields::contains`] for `self.fields` and `other.fields` /// - /// In other words, any record conforms to `other` should also conform to `self`. + /// In other words, any record that conforms to `other` should also conform to `self`. pub fn contains(&self, other: &Schema) -> bool { - self.fields.len() == other.fields.len() - && self.fields.iter().zip(other.fields.iter()).all(|(f1, f2)| f1.contains(f2)) // make sure self.metadata is a superset of other.metadata - && other.metadata.iter().all(|(k, v1)| match self.metadata.get(k) { - Some(v2) => v1 == v2, - _ => false, - }) + self.fields.contains(&other.fields) + && other.metadata.iter().all(|(k, v1)| { + self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default() + }) } }