This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 1e67364f81 fix: Ignore names of technical inner fields (of List and 
Map types) when comparing datatypes for logical equivalence (#13522)
1e67364f81 is described below

commit 1e67364f81e434e170ee8042422b57071b3a232a
Author: Arttu <[email protected]>
AuthorDate: Mon Nov 25 22:32:47 2024 +0100

    fix: Ignore names of technical inner fields (of List and Map types) when 
comparing datatypes for logical equivalence (#13522)
    
    * fix: Ignore names of technical inner fields (of List and Map types) when 
comparing datatypes for logical equivalence
    
    * apply same logic to datatype_is_semantically_equal
---
 datafusion/common/src/dfschema.rs | 326 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 320 insertions(+), 6 deletions(-)

diff --git a/datafusion/common/src/dfschema.rs 
b/datafusion/common/src/dfschema.rs
index e893cee089..45620c3cac 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -656,9 +656,26 @@ impl DFSchema {
             (othertype, DataType::Dictionary(_, v1)) => v1.as_ref() == 
othertype,
             (DataType::List(f1), DataType::List(f2))
             | (DataType::LargeList(f1), DataType::LargeList(f2))
-            | (DataType::FixedSizeList(f1, _), DataType::FixedSizeList(f2, _))
-            | (DataType::Map(f1, _), DataType::Map(f2, _)) => {
-                Self::field_is_logically_equal(f1, f2)
+            | (DataType::FixedSizeList(f1, _), DataType::FixedSizeList(f2, _)) 
=> {
+                // Don't compare the names of the technical inner field
+                // Usually "item" but that's not mandated
+                Self::datatype_is_logically_equal(f1.data_type(), 
f2.data_type())
+            }
+            (DataType::Map(f1, _), DataType::Map(f2, _)) => {
+                // Don't compare the names of the technical inner fields
+                // Usually "entries", "key", "value" but that's not mandated
+                match (f1.data_type(), f2.data_type()) {
+                    (DataType::Struct(f1_inner), DataType::Struct(f2_inner)) 
=> {
+                        f1_inner.len() == f2_inner.len()
+                            && f1_inner.iter().zip(f2_inner.iter()).all(|(f1, 
f2)| {
+                                Self::datatype_is_logically_equal(
+                                    f1.data_type(),
+                                    f2.data_type(),
+                                )
+                            })
+                    }
+                    _ => panic!("Map type should have an inner struct field"),
+                }
             }
             (DataType::Struct(fields1), DataType::Struct(fields2)) => {
                 let iter1 = fields1.iter();
@@ -695,9 +712,26 @@ impl DFSchema {
             }
             (DataType::List(f1), DataType::List(f2))
             | (DataType::LargeList(f1), DataType::LargeList(f2))
-            | (DataType::FixedSizeList(f1, _), DataType::FixedSizeList(f2, _))
-            | (DataType::Map(f1, _), DataType::Map(f2, _)) => {
-                Self::field_is_semantically_equal(f1, f2)
+            | (DataType::FixedSizeList(f1, _), DataType::FixedSizeList(f2, _)) 
=> {
+                // Don't compare the names of the technical inner field
+                // Usually "item" but that's not mandated
+                Self::datatype_is_semantically_equal(f1.data_type(), 
f2.data_type())
+            }
+            (DataType::Map(f1, _), DataType::Map(f2, _)) => {
+                // Don't compare the names of the technical inner fields
+                // Usually "entries", "key", "value" but that's not mandated
+                match (f1.data_type(), f2.data_type()) {
+                    (DataType::Struct(f1_inner), DataType::Struct(f2_inner)) 
=> {
+                        f1_inner.len() == f2_inner.len()
+                            && f1_inner.iter().zip(f2_inner.iter()).all(|(f1, 
f2)| {
+                                Self::datatype_is_semantically_equal(
+                                    f1.data_type(),
+                                    f2.data_type(),
+                                )
+                            })
+                    }
+                    _ => panic!("Map type should have an inner struct field"),
+                }
             }
             (DataType::Struct(fields1), DataType::Struct(fields2)) => {
                 let iter1 = fields1.iter();
@@ -1332,6 +1366,286 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_datatype_is_logically_equal() {
+        assert!(DFSchema::datatype_is_logically_equal(
+            &DataType::Int8,
+            &DataType::Int8
+        ));
+
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &DataType::Int8,
+            &DataType::Int16
+        ));
+
+        // Test lists
+
+        // Succeeds if both have the same element type, disregards names and 
nullability
+        assert!(DFSchema::datatype_is_logically_equal(
+            &DataType::List(Field::new("item", DataType::Int8, true).into()),
+            &DataType::List(Field::new("element", DataType::Int8, 
false).into())
+        ));
+
+        // Fails if element type is different
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &DataType::List(Field::new("item", DataType::Int8, true).into()),
+            &DataType::List(Field::new("item", DataType::Int16, true).into())
+        ));
+
+        // Test maps
+        let map_field = DataType::Map(
+            Field::new(
+                "entries",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("key", DataType::Int8, false),
+                    Field::new("value", DataType::Int8, true),
+                ])),
+                true,
+            )
+            .into(),
+            true,
+        );
+
+        // Succeeds if both maps have the same key and value types, disregards 
names and nullability
+        assert!(DFSchema::datatype_is_logically_equal(
+            &map_field,
+            &DataType::Map(
+                Field::new(
+                    "pairs",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("one", DataType::Int8, false),
+                        Field::new("two", DataType::Int8, false)
+                    ])),
+                    true
+                )
+                .into(),
+                true
+            )
+        ));
+        // Fails if value type is different
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &map_field,
+            &DataType::Map(
+                Field::new(
+                    "entries",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("key", DataType::Int8, false),
+                        Field::new("value", DataType::Int16, true)
+                    ])),
+                    true
+                )
+                .into(),
+                true
+            )
+        ));
+
+        // Fails if key type is different
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &map_field,
+            &DataType::Map(
+                Field::new(
+                    "entries",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("key", DataType::Int16, false),
+                        Field::new("value", DataType::Int8, true)
+                    ])),
+                    true
+                )
+                .into(),
+                true
+            )
+        ));
+
+        // Test structs
+
+        let struct_field = DataType::Struct(Fields::from(vec![
+            Field::new("a", DataType::Int8, true),
+            Field::new("b", DataType::Int8, true),
+        ]));
+
+        // Succeeds if both have same names and datatypes, ignores nullability
+        assert!(DFSchema::datatype_is_logically_equal(
+            &struct_field,
+            &DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int8, false),
+                Field::new("b", DataType::Int8, true),
+            ]))
+        ));
+
+        // Fails if field names are different
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &struct_field,
+            &DataType::Struct(Fields::from(vec![
+                Field::new("x", DataType::Int8, true),
+                Field::new("y", DataType::Int8, true),
+            ]))
+        ));
+
+        // Fails if types are different
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &struct_field,
+            &DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int16, true),
+                Field::new("b", DataType::Int8, true),
+            ]))
+        ));
+
+        // Fails if more or less fields
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &struct_field,
+            &DataType::Struct(Fields::from(vec![Field::new("a", 
DataType::Int8, true),]))
+        ));
+    }
+
+    #[test]
+    fn test_datatype_is_logically_equivalent_to_dictionary() {
+        // Dictionary is logically equal to its value type
+        assert!(DFSchema::datatype_is_logically_equal(
+            &DataType::Utf8,
+            &DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8))
+        ));
+    }
+
+    #[test]
+    fn test_datatype_is_semantically_equal() {
+        assert!(DFSchema::datatype_is_semantically_equal(
+            &DataType::Int8,
+            &DataType::Int8
+        ));
+
+        assert!(!DFSchema::datatype_is_semantically_equal(
+            &DataType::Int8,
+            &DataType::Int16
+        ));
+
+        // Test lists
+
+        // Succeeds if both have the same element type, disregards names and 
nullability
+        assert!(DFSchema::datatype_is_semantically_equal(
+            &DataType::List(Field::new("item", DataType::Int8, true).into()),
+            &DataType::List(Field::new("element", DataType::Int8, 
false).into())
+        ));
+
+        // Fails if element type is different
+        assert!(!DFSchema::datatype_is_semantically_equal(
+            &DataType::List(Field::new("item", DataType::Int8, true).into()),
+            &DataType::List(Field::new("item", DataType::Int16, true).into())
+        ));
+
+        // Test maps
+        let map_field = DataType::Map(
+            Field::new(
+                "entries",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("key", DataType::Int8, false),
+                    Field::new("value", DataType::Int8, true),
+                ])),
+                true,
+            )
+            .into(),
+            true,
+        );
+
+        // Succeeds if both maps have the same key and value types, disregards 
names and nullability
+        assert!(DFSchema::datatype_is_semantically_equal(
+            &map_field,
+            &DataType::Map(
+                Field::new(
+                    "pairs",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("one", DataType::Int8, false),
+                        Field::new("two", DataType::Int8, false)
+                    ])),
+                    true
+                )
+                .into(),
+                true
+            )
+        ));
+        // Fails if value type is different
+        assert!(!DFSchema::datatype_is_semantically_equal(
+            &map_field,
+            &DataType::Map(
+                Field::new(
+                    "entries",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("key", DataType::Int8, false),
+                        Field::new("value", DataType::Int16, true)
+                    ])),
+                    true
+                )
+                .into(),
+                true
+            )
+        ));
+
+        // Fails if key type is different
+        assert!(!DFSchema::datatype_is_semantically_equal(
+            &map_field,
+            &DataType::Map(
+                Field::new(
+                    "entries",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("key", DataType::Int16, false),
+                        Field::new("value", DataType::Int8, true)
+                    ])),
+                    true
+                )
+                .into(),
+                true
+            )
+        ));
+
+        // Test structs
+
+        let struct_field = DataType::Struct(Fields::from(vec![
+            Field::new("a", DataType::Int8, true),
+            Field::new("b", DataType::Int8, true),
+        ]));
+
+        // Succeeds if both have same names and datatypes, ignores nullability
+        assert!(DFSchema::datatype_is_logically_equal(
+            &struct_field,
+            &DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int8, false),
+                Field::new("b", DataType::Int8, true),
+            ]))
+        ));
+
+        // Fails if field names are different
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &struct_field,
+            &DataType::Struct(Fields::from(vec![
+                Field::new("x", DataType::Int8, true),
+                Field::new("y", DataType::Int8, true),
+            ]))
+        ));
+
+        // Fails if types are different
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &struct_field,
+            &DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int16, true),
+                Field::new("b", DataType::Int8, true),
+            ]))
+        ));
+
+        // Fails if more or less fields
+        assert!(!DFSchema::datatype_is_logically_equal(
+            &struct_field,
+            &DataType::Struct(Fields::from(vec![Field::new("a", 
DataType::Int8, true),]))
+        ));
+    }
+
+    #[test]
+    fn test_datatype_is_not_semantically_equivalent_to_dictionary() {
+        // Dictionary is not semantically equal to its value type
+        assert!(!DFSchema::datatype_is_semantically_equal(
+            &DataType::Utf8,
+            &DataType::Dictionary(Box::new(DataType::Int32), 
Box::new(DataType::Utf8))
+        ));
+    }
+
     fn test_schema_2() -> Schema {
         Schema::new(vec![
             Field::new("c100", DataType::Boolean, true),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to