andygrove commented on code in PR #4251:
URL: https://github.com/apache/arrow-datafusion/pull/4251#discussion_r1025371215


##########
datafusion/core/src/datasource/mod.rs:
##########
@@ -175,3 +180,65 @@ fn get_col_stats(
         })
         .collect()
 }
+
+/// Specialized copy of Schema::try_merge that supports merging fields that 
have different,
+/// but compatible, data types
+pub(crate) fn try_merge_schemas(
+    schemas: impl IntoIterator<Item = Schema>,
+) -> Result<Schema> {
+    let mut metadata = HashMap::new();
+    let mut fields: Vec<Field> = vec![];
+    for schema in schemas {
+        for (key, value) in &schema.metadata {
+            if let Some(old_val) = metadata.get(key) {
+                if old_val != value {
+                    return 
Err(DataFusionError::ArrowError(ArrowError::SchemaError(
+                        format!(
+                            "Fail to merge schema due to conflicting metadata. 
\
+                                     Key '{}' has different values '{}' and 
'{}'",
+                            key, old_val, value
+                        ),
+                    )));
+                }
+            }
+            metadata.insert(key.to_owned(), value.to_owned());
+        }
+        for field in &schema.fields {
+            if let Some((i, merge_field)) =
+                fields.iter().find_position(|f| f.name() == field.name())
+            {
+                if merge_field.data_type() != field.data_type() {
+                    if let Some(new_type) =
+                        get_wider_type(merge_field.data_type(), 
field.data_type())

Review Comment:
   I do wonder if we should make this behavior configurable so that users can 
choose between strict schema matching and implicit coercion :thinking: 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to