andygrove commented on code in PR #4251:
URL: https://github.com/apache/arrow-datafusion/pull/4251#discussion_r1025382824
##########
datafusion/core/src/datasource/mod.rs:
##########
@@ -175,3 +180,65 @@ fn get_col_stats(
})
.collect()
}
+
+/// Specialized copy of Schema::try_merge that supports merging fields that
have different,
+/// but compatible, data types
+pub(crate) fn try_merge_schemas(
+ schemas: impl IntoIterator<Item = Schema>,
+) -> Result<Schema> {
+ let mut metadata = HashMap::new();
+ let mut fields: Vec<Field> = vec![];
+ for schema in schemas {
+ for (key, value) in &schema.metadata {
+ if let Some(old_val) = metadata.get(key) {
+ if old_val != value {
+ return
Err(DataFusionError::ArrowError(ArrowError::SchemaError(
+ format!(
+ "Fail to merge schema due to conflicting metadata.
\
+ Key '{}' has different values '{}' and
'{}'",
+ key, old_val, value
+ ),
+ )));
+ }
+ }
+ metadata.insert(key.to_owned(), value.to_owned());
+ }
+ for field in &schema.fields {
+ if let Some((i, merge_field)) =
+ fields.iter().find_position(|f| f.name() == field.name())
+ {
+ if merge_field.data_type() != field.data_type() {
+ if let Some(new_type) =
+ get_wider_type(merge_field.data_type(),
field.data_type())
Review Comment:
I agree. It is always best to let the user make the choice. I will make this
configurable as part of this PR.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]