This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 2f51d5fdc1 Document schema merging. (#17249)
2f51d5fdc1 is described below

commit 2f51d5fdc1d5aa33d241969c3f9bb1c6079f9421
Author: wiedld <wie...@users.noreply.github.com>
AuthorDate: Fri Aug 22 12:14:22 2025 -0700

    Document schema merging. (#17249)
    
    * chore: add docs explaining FieldMetadata::merge_options
    
    * chore: document DFSchema::merge, which is used in logical plan 
construction & modification (e.g. LP optimizers)
    
    * chore: merge_schema utils method
    
    * chore: clarify wording
---
 datafusion/common/src/dfschema.rs | 14 ++++++++++++
 datafusion/expr/src/expr.rs       | 45 ++++++++++++++++++++++++++++++++++++++-
 datafusion/expr/src/utils.rs      |  3 +++
 3 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/datafusion/common/src/dfschema.rs 
b/datafusion/common/src/dfschema.rs
index daf4e19ce0..d3dda28882 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -297,6 +297,20 @@ impl DFSchema {
 
     /// Modify this schema by appending the fields from the supplied schema, 
ignoring any
     /// duplicate fields.
+    ///
+    /// ## Merge Precedence
+    ///
+    /// **Schema-level metadata**: Metadata from both schemas is merged.
+    /// If both schemas have the same metadata key, the value from the 
`other_schema` parameter takes precedence.
+    ///
+    /// **Field-level merging**: Only non-duplicate fields are added. This 
means that the
+    /// `self` fields will always take precedence over the `other_schema` 
fields.
+    /// Duplicate field detection is based on:
+    /// - For qualified fields: both qualifier and field name must match
+    /// - For unqualified fields: only field name needs to match
+    ///
+    /// Take note how the precedence for fields & metadata merging differs;
+    /// merging prefers fields from `self` but prefers metadata from 
`other_schema`.
     pub fn merge(&mut self, other_schema: &DFSchema) {
         if other_schema.inner.fields.is_empty() {
             return;
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 9e2ac794de..2324ae79c0 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -469,7 +469,50 @@ impl FieldMetadata {
     }
 
     /// Merges two optional `FieldMetadata` instances, overwriting any existing
-    /// keys in `m` with keys from `n` if present
+    /// keys in `m` with keys from `n` if present.
+    ///
+    /// This function is commonly used in alias operations, particularly for 
literals
+    /// with metadata. When creating an alias expression, the metadata from 
the original
+    /// expression (such as a literal) is combined with any metadata specified 
on the alias.
+    ///
+    /// # Arguments
+    ///
+    /// * `m` - The first metadata (typically from the original expression 
like a literal)
+    /// * `n` - The second metadata (typically from the alias definition)
+    ///
+    /// # Merge Strategy
+    ///
+    /// - If both metadata instances exist, they are merged with `n` taking 
precedence
+    /// - Keys from `n` will overwrite keys from `m` if they have the same name
+    /// - If only one metadata instance exists, it is returned unchanged
+    /// - If neither exists, `None` is returned
+    ///
+    /// # Example usage
+    /// ```rust
+    /// use datafusion_expr::expr::FieldMetadata;
+    /// use std::collections::BTreeMap;
+    ///
+    /// // Create metadata for a literal expression
+    /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
+    ///     ("source".to_string(), "constant".to_string()),
+    ///     ("type".to_string(), "int".to_string()),
+    /// ])));
+    ///
+    /// // Create metadata for an alias
+    /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
+    ///     ("description".to_string(), "answer".to_string()),
+    ///     ("source".to_string(), "user".to_string()), // This will override 
literal's "source"
+    /// ])));
+    ///
+    /// // Merge the metadata
+    /// let merged = FieldMetadata::merge_options(
+    ///     literal_metadata.as_ref(),
+    ///     alias_metadata.as_ref(),
+    /// );
+    ///
+    /// // Result contains: {"source": "user", "type": "int", "description": 
"answer"}
+    /// assert!(merged.is_some());
+    /// ```
     pub fn merge_options(
         m: Option<&FieldMetadata>,
         n: Option<&FieldMetadata>,
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 7a612b6fe6..2e364d0d2b 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -1225,6 +1225,9 @@ pub fn only_or_err<T>(slice: &[T]) -> Result<&T> {
 }
 
 /// merge inputs schema into a single schema.
+///
+/// This function merges schemas from multiple logical plan inputs using 
[`DFSchema::merge`].
+/// Refer to that documentation for details on precedence and metadata 
handling.
 pub fn merge_schema(inputs: &[&LogicalPlan]) -> DFSchema {
     if inputs.len() == 1 {
         inputs[0].schema().as_ref().clone()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to