This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2d17bf09d06 feat: recursively merge Field when datatype is
DataType::List and DataType::LargeList (#5852)
2d17bf09d06 is described below
commit 2d17bf09d068d2b36792361498af2aa31f541a39
Author: Mrinal Paliwal <[email protected]>
AuthorDate: Thu Jun 13 18:42:45 2024 +0530
feat: recursively merge Field when datatype is DataType::List and
DataType::LargeList (#5852)
---
arrow-schema/src/field.rs | 113 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 111 insertions(+), 2 deletions(-)
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index b84a2568ed8..a84a6ada334 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -483,6 +483,30 @@ impl Field {
));
}
},
+ DataType::List(field) => match &from.data_type {
+ DataType::List(from_field) => {
+ let mut f = (**field).clone();
+ f.try_merge(from_field)?;
+ (*field) = Arc::new(f);
+ },
+ _ => {
+ return Err(ArrowError::SchemaError(
+ format!("Fail to merge schema field '{}' because the
from data_type = {} is not DataType::List",
+ self.name, from.data_type)
+ ))}
+ },
+ DataType::LargeList(field) => match &from.data_type {
+ DataType::LargeList(from_field) => {
+ let mut f = (**field).clone();
+ f.try_merge(from_field)?;
+ (*field) = Arc::new(f);
+ },
+ _ => {
+ return Err(ArrowError::SchemaError(
+ format!("Fail to merge schema field '{}' because the
from data_type = {} is not DataType::LargeList",
+ self.name, from.data_type)
+ ))}
+ },
DataType::Null => {
self.nullable = true;
self.data_type = from.data_type.clone();
@@ -509,9 +533,7 @@ impl Field {
| DataType::LargeBinary
| DataType::BinaryView
| DataType::Interval(_)
- | DataType::LargeList(_)
| DataType::LargeListView(_)
- | DataType::List(_)
| DataType::ListView(_)
| DataType::Map(_, _)
| DataType::Dictionary(_, _)
@@ -623,6 +645,93 @@ mod test {
assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
}
+ #[test]
+ fn test_merge_with_nested_null() {
+ let mut struct1 = Field::new(
+ "s1",
+ DataType::Struct(Fields::from(vec![Field::new(
+ "inner",
+ DataType::Float32,
+ false,
+ )])),
+ false,
+ );
+
+ let struct2 = Field::new(
+ "s2",
+ DataType::Struct(Fields::from(vec![Field::new(
+ "inner",
+ DataType::Null,
+ false,
+ )])),
+ true,
+ );
+
+ struct1
+ .try_merge(&struct2)
+ .expect("should widen inner field's type to nullable float");
+ assert_eq!(
+ Field::new(
+ "s1",
+ DataType::Struct(Fields::from(vec![Field::new(
+ "inner",
+ DataType::Float32,
+ true,
+ )])),
+ true,
+ ),
+ struct1
+ );
+
+ let mut list1 = Field::new(
+ "l1",
+ DataType::List(Field::new("inner", DataType::Float32,
false).into()),
+ false,
+ );
+
+ let list2 = Field::new(
+ "l2",
+ DataType::List(Field::new("inner", DataType::Null, false).into()),
+ true,
+ );
+
+ list1
+ .try_merge(&list2)
+ .expect("should widen inner field's type to nullable float");
+ assert_eq!(
+ Field::new(
+ "l1",
+ DataType::List(Field::new("inner", DataType::Float32,
true).into()),
+ true,
+ ),
+ list1
+ );
+
+ let mut large_list1 = Field::new(
+ "ll1",
+ DataType::LargeList(Field::new("inner", DataType::Float32,
false).into()),
+ false,
+ );
+
+ let large_list2 = Field::new(
+ "ll2",
+ DataType::LargeList(Field::new("inner", DataType::Null,
false).into()),
+ true,
+ );
+
+ large_list1
+ .try_merge(&large_list2)
+ .expect("should widen inner field's type to nullable float");
+ assert_eq!(
+ Field::new(
+ "ll1",
+ DataType::LargeList(Field::new("inner", DataType::Float32,
true).into()),
+ true,
+ ),
+ large_list1
+ );
+ }
+
#[test]
fn test_fields_with_dict_id() {
let dict1 = Field::new_dict(