This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 2f51d5fdc1 Document schema merging. (#17249) 2f51d5fdc1 is described below commit 2f51d5fdc1d5aa33d241969c3f9bb1c6079f9421 Author: wiedld <wie...@users.noreply.github.com> AuthorDate: Fri Aug 22 12:14:22 2025 -0700 Document schema merging. (#17249) * chore: add docs explaining FieldMetadata::merge_options * chore: document DFSchema::merge, which is used in logical plan construction & modification (e.g. LP optimizers) * chore: merge_schema utils method * chore: clarify wording --- datafusion/common/src/dfschema.rs | 14 ++++++++++++ datafusion/expr/src/expr.rs | 45 ++++++++++++++++++++++++++++++++++++++- datafusion/expr/src/utils.rs | 3 +++ 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index daf4e19ce0..d3dda28882 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -297,6 +297,20 @@ impl DFSchema { /// Modify this schema by appending the fields from the supplied schema, ignoring any /// duplicate fields. + /// + /// ## Merge Precedence + /// + /// **Schema-level metadata**: Metadata from both schemas is merged. + /// If both schemas have the same metadata key, the value from the `other_schema` parameter takes precedence. + /// + /// **Field-level merging**: Only non-duplicate fields are added. This means that the + /// `self` fields will always take precedence over the `other_schema` fields. + /// Duplicate field detection is based on: + /// - For qualified fields: both qualifier and field name must match + /// - For unqualified fields: only field name needs to match + /// + /// Take note how the precedence for fields & metadata merging differs; + /// merging prefers fields from `self` but prefers metadata from `other_schema`. pub fn merge(&mut self, other_schema: &DFSchema) { if other_schema.inner.fields.is_empty() { return; diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 9e2ac794de..2324ae79c0 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -469,7 +469,50 @@ impl FieldMetadata { } /// Merges two optional `FieldMetadata` instances, overwriting any existing - /// keys in `m` with keys from `n` if present + /// keys in `m` with keys from `n` if present. + /// + /// This function is commonly used in alias operations, particularly for literals + /// with metadata. When creating an alias expression, the metadata from the original + /// expression (such as a literal) is combined with any metadata specified on the alias. + /// + /// # Arguments + /// + /// * `m` - The first metadata (typically from the original expression like a literal) + /// * `n` - The second metadata (typically from the alias definition) + /// + /// # Merge Strategy + /// + /// - If both metadata instances exist, they are merged with `n` taking precedence + /// - Keys from `n` will overwrite keys from `m` if they have the same name + /// - If only one metadata instance exists, it is returned unchanged + /// - If neither exists, `None` is returned + /// + /// # Example usage + /// ```rust + /// use datafusion_expr::expr::FieldMetadata; + /// use std::collections::BTreeMap; + /// + /// // Create metadata for a literal expression + /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([ + /// ("source".to_string(), "constant".to_string()), + /// ("type".to_string(), "int".to_string()), + /// ]))); + /// + /// // Create metadata for an alias + /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([ + /// ("description".to_string(), "answer".to_string()), + /// ("source".to_string(), "user".to_string()), // This will override literal's "source" + /// ]))); + /// + /// // Merge the metadata + /// let merged = FieldMetadata::merge_options( + /// literal_metadata.as_ref(), + /// alias_metadata.as_ref(), + /// ); + /// + /// // Result contains: {"source": "user", "type": "int", "description": "answer"} + /// assert!(merged.is_some()); + /// ``` pub fn merge_options( m: Option<&FieldMetadata>, n: Option<&FieldMetadata>, diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 7a612b6fe6..2e364d0d2b 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -1225,6 +1225,9 @@ pub fn only_or_err<T>(slice: &[T]) -> Result<&T> { } /// merge inputs schema into a single schema. +/// +/// This function merges schemas from multiple logical plan inputs using [`DFSchema::merge`]. +/// Refer to that documentation for details on precedence and metadata handling. pub fn merge_schema(inputs: &[&LogicalPlan]) -> DFSchema { if inputs.len() == 1 { inputs[0].schema().as_ref().clone() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org