This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a35c6c5f4 feat: DataType::contains support nested type (#4042)
a35c6c5f4 is described below

commit a35c6c5f4309787a9a2f523920af2efd9b1682b9
Author: Alex Huang <[email protected]>
AuthorDate: Wed Apr 12 11:59:02 2023 +0200

    feat: DataType::contains support nested type (#4042)
    
    * feat: DataType::contains support nested type
    
    * support recurse
    
    * check typeID for Union
---
 arrow-schema/src/datatype.rs | 26 ++++++++++++++++++
 arrow-schema/src/field.rs    | 64 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 3f684285c..64e8d0e77 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -517,6 +517,32 @@ impl DataType {
                 }
             }
     }
+
+    /// Check to see if `self` is a superset of `other`
+    ///
+    /// If DataType is a nested type, then it will check to see if the nested 
type is a superset of the other nested type
+    /// else it will check to see if the DataType is equal to the other 
DataType
+    pub fn contains(&self, other: &DataType) -> bool {
+        match (self, other) {
+            (DataType::List(f1), DataType::List(f2))
+            | (DataType::LargeList(f1), DataType::LargeList(f2)) => 
f1.contains(f2),
+            (DataType::FixedSizeList(f1, s1), DataType::FixedSizeList(f2, s2)) 
=> {
+                s1 == s2 && f1.contains(f2)
+            }
+            (DataType::Map(f1, s1), DataType::Map(f2, s2)) => s1 == s2 && 
f1.contains(f2),
+            (DataType::Struct(f1), DataType::Struct(f2)) => f1.contains(f2),
+            (DataType::Union(f1, s1), DataType::Union(f2, s2)) => {
+                s1 == s2
+                    && f1
+                        .iter()
+                        .all(|f1| f2.iter().any(|f2| f1.0 == f2.0 && 
f1.1.contains(f2.1)))
+            }
+            (DataType::Dictionary(k1, v1), DataType::Dictionary(k2, v2)) => {
+                k1.contains(k2) && v1.contains(v2)
+            }
+            _ => self == other,
+        }
+    }
 }
 
 /// The maximum precision for [DataType::Decimal128] values
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index 5edd5be7a..f38e1e26a 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -516,7 +516,7 @@ impl Field {
     /// * all other fields are equal
     pub fn contains(&self, other: &Field) -> bool {
         self.name == other.name
-        && self.data_type == other.data_type
+        && self.data_type.contains(&other.data_type)
         && self.dict_id == other.dict_id
         && self.dict_is_ordered == other.dict_is_ordered
         // self need to be nullable or both of them are not nullable
@@ -758,6 +758,68 @@ mod test {
 
         assert!(!field1.contains(&field2));
         assert!(!field2.contains(&field1));
+
+        // UnionFields with different type ID
+        let field1 = Field::new(
+            "field1",
+            DataType::Union(
+                UnionFields::new(
+                    vec![1, 2],
+                    vec![
+                        Field::new("field1", DataType::UInt8, true),
+                        Field::new("field3", DataType::Utf8, false),
+                    ],
+                ),
+                UnionMode::Dense,
+            ),
+            true,
+        );
+        let field2 = Field::new(
+            "field1",
+            DataType::Union(
+                UnionFields::new(
+                    vec![1, 3],
+                    vec![
+                        Field::new("field1", DataType::UInt8, false),
+                        Field::new("field3", DataType::Utf8, false),
+                    ],
+                ),
+                UnionMode::Dense,
+            ),
+            true,
+        );
+        assert!(!field1.contains(&field2));
+
+        // UnionFields with same type ID
+        let field1 = Field::new(
+            "field1",
+            DataType::Union(
+                UnionFields::new(
+                    vec![1, 2],
+                    vec![
+                        Field::new("field1", DataType::UInt8, true),
+                        Field::new("field3", DataType::Utf8, false),
+                    ],
+                ),
+                UnionMode::Dense,
+            ),
+            true,
+        );
+        let field2 = Field::new(
+            "field1",
+            DataType::Union(
+                UnionFields::new(
+                    vec![1, 2],
+                    vec![
+                        Field::new("field1", DataType::UInt8, false),
+                        Field::new("field3", DataType::Utf8, false),
+                    ],
+                ),
+                UnionMode::Dense,
+            ),
+            true,
+        );
+        assert!(field1.contains(&field2));
     }
 
     #[cfg(feature = "serde")]

Reply via email to