This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 2d17bf09d06 feat: recursively merge Field when datatype is 
DataType::List and DataType::LargeList (#5852)
2d17bf09d06 is described below

commit 2d17bf09d068d2b36792361498af2aa31f541a39
Author: Mrinal Paliwal <[email protected]>
AuthorDate: Thu Jun 13 18:42:45 2024 +0530

    feat: recursively merge Field when datatype is DataType::List and 
DataType::LargeList (#5852)
---
 arrow-schema/src/field.rs | 113 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 2 deletions(-)

diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index b84a2568ed8..a84a6ada334 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -483,6 +483,30 @@ impl Field {
                     ));
                 }
             },
+            DataType::List(field) => match &from.data_type {
+                DataType::List(from_field) => {
+                    let mut f = (**field).clone();
+                    f.try_merge(from_field)?;
+                    (*field) = Arc::new(f);
+                },
+                _ => {
+                    return Err(ArrowError::SchemaError(
+                        format!("Fail to merge schema field '{}' because the 
from data_type = {} is not DataType::List",
+                            self.name, from.data_type)
+                ))}
+            },
+            DataType::LargeList(field) => match &from.data_type {
+                DataType::LargeList(from_field) => {
+                    let mut f = (**field).clone();
+                    f.try_merge(from_field)?;
+                    (*field) = Arc::new(f);
+                },
+                _ => {
+                    return Err(ArrowError::SchemaError(
+                        format!("Fail to merge schema field '{}' because the 
from data_type = {} is not DataType::LargeList",
+                            self.name, from.data_type)
+                ))}
+            },
             DataType::Null => {
                 self.nullable = true;
                 self.data_type = from.data_type.clone();
@@ -509,9 +533,7 @@ impl Field {
             | DataType::LargeBinary
             | DataType::BinaryView
             | DataType::Interval(_)
-            | DataType::LargeList(_)
             | DataType::LargeListView(_)
-            | DataType::List(_)
             | DataType::ListView(_)
             | DataType::Map(_, _)
             | DataType::Dictionary(_, _)
@@ -623,6 +645,93 @@ mod test {
         assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
     }
 
+    #[test]
+    fn test_merge_with_nested_null() {
+        let mut struct1 = Field::new(
+            "s1",
+            DataType::Struct(Fields::from(vec![Field::new(
+                "inner",
+                DataType::Float32,
+                false,
+            )])),
+            false,
+        );
+
+        let struct2 = Field::new(
+            "s2",
+            DataType::Struct(Fields::from(vec![Field::new(
+                "inner",
+                DataType::Null,
+                false,
+            )])),
+            true,
+        );
+
+        struct1
+            .try_merge(&struct2)
+            .expect("should widen inner field's type to nullable float");
+        assert_eq!(
+            Field::new(
+                "s1",
+                DataType::Struct(Fields::from(vec![Field::new(
+                    "inner",
+                    DataType::Float32,
+                    true,
+                )])),
+                true,
+            ),
+            struct1
+        );
+
+        let mut list1 = Field::new(
+            "l1",
+            DataType::List(Field::new("inner", DataType::Float32, 
false).into()),
+            false,
+        );
+
+        let list2 = Field::new(
+            "l2",
+            DataType::List(Field::new("inner", DataType::Null, false).into()),
+            true,
+        );
+
+        list1
+            .try_merge(&list2)
+            .expect("should widen inner field's type to nullable float");
+        assert_eq!(
+            Field::new(
+                "l1",
+                DataType::List(Field::new("inner", DataType::Float32, 
true).into()),
+                true,
+            ),
+            list1
+        );
+
+        let mut large_list1 = Field::new(
+            "ll1",
+            DataType::LargeList(Field::new("inner", DataType::Float32, 
false).into()),
+            false,
+        );
+
+        let large_list2 = Field::new(
+            "ll2",
+            DataType::LargeList(Field::new("inner", DataType::Null, 
false).into()),
+            true,
+        );
+
+        large_list1
+            .try_merge(&large_list2)
+            .expect("should widen inner field's type to nullable float");
+        assert_eq!(
+            Field::new(
+                "ll1",
+                DataType::LargeList(Field::new("inner", DataType::Float32, 
true).into()),
+                true,
+            ),
+            large_list1
+        );
+    }
+
     #[test]
     fn test_fields_with_dict_id() {
         let dict1 = Field::new_dict(

Reply via email to