This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 0c3bb78e24 Unify Metadata Handing: use `FieldMetadata` in 
`Expr::Alias` and `ExprSchemable` (#16320)
0c3bb78e24 is described below

commit 0c3bb78e24722e2a4f19cdc9a76f1c498949f5be
Author: Andrew Lamb <and...@nerdnetworks.org>
AuthorDate: Tue Jun 17 12:03:49 2025 -0400

    Unify Metadata Handing: use `FieldMetadata` in `Expr::Alias` and 
`ExprSchemable` (#16320)
---
 datafusion/core/tests/dataframe/mod.rs             |  3 +-
 .../user_defined/user_defined_scalar_functions.rs  |  3 +-
 datafusion/expr/src/expr.rs                        | 57 ++++++++++++++++------
 datafusion/expr/src/expr_schema.rs                 | 31 ++++++------
 datafusion/physical-expr/src/planner.rs            |  1 -
 datafusion/proto/src/logical_plan/to_proto.rs      |  5 +-
 docs/source/library-user-guide/upgrading.md        | 29 +++++++++++
 7 files changed, 92 insertions(+), 37 deletions(-)

diff --git a/datafusion/core/tests/dataframe/mod.rs 
b/datafusion/core/tests/dataframe/mod.rs
index 3c4f8018a1..8d60dbea3d 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -70,7 +70,7 @@ use datafusion_common::{
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::runtime_env::RuntimeEnv;
-use datafusion_expr::expr::{GroupingSet, Sort, WindowFunction};
+use datafusion_expr::expr::{FieldMetadata, GroupingSet, Sort, WindowFunction};
 use datafusion_expr::var_provider::{VarProvider, VarType};
 use datafusion_expr::{
     cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder,
@@ -5676,6 +5676,7 @@ async fn test_alias() -> Result<()> {
 async fn test_alias_with_metadata() -> Result<()> {
     let mut metadata = HashMap::new();
     metadata.insert(String::from("k"), String::from("v"));
+    let metadata = FieldMetadata::from(metadata);
     let df = create_test_table("test")
         .await?
         .select(vec![col("a").alias_with_metadata("b", Some(metadata))])?
diff --git 
a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs 
b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index a4e278e511..90e49e504c 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -1533,10 +1533,11 @@ async fn test_metadata_based_udf_with_literal() -> 
Result<()> {
         [("modify_values".to_string(), "double_output".to_string())]
             .into_iter()
             .collect();
+    let input_metadata = FieldMetadata::from(input_metadata);
     let df = ctx.sql("select 0;").await?.select(vec![
         lit(5u64).alias_with_metadata("lit_with_doubling", 
Some(input_metadata.clone())),
         lit(5u64).alias("lit_no_doubling"),
-        lit_with_metadata(5u64, Some(FieldMetadata::from(input_metadata)))
+        lit_with_metadata(5u64, Some(input_metadata))
             .alias("lit_with_double_no_alias_metadata"),
     ])?;
 
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index f41f09fc0a..97f83305dc 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -517,6 +517,9 @@ impl FieldMetadata {
 
     /// Adds metadata from `other` into `self`, overwriting any existing keys.
     pub fn extend(&mut self, other: Self) {
+        if other.is_empty() {
+            return;
+        }
         let other = Arc::unwrap_or_clone(other.into_inner());
         Arc::make_mut(&mut self.inner).extend(other);
     }
@@ -531,18 +534,21 @@ impl FieldMetadata {
         self.inner.len()
     }
 
+    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
+    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
+        self.inner
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect()
+    }
+
     /// Updates the metadata on the Field with this metadata, if it is not 
empty.
     pub fn add_to_field(&self, field: Field) -> Field {
         if self.inner.is_empty() {
             return field;
         }
 
-        field.with_metadata(
-            self.inner
-                .iter()
-                .map(|(k, v)| (k.clone(), v.clone()))
-                .collect(),
-        )
+        field.with_metadata(self.to_hashmap())
     }
 }
 
@@ -575,6 +581,24 @@ impl From<&std::collections::HashMap<String, String>> for 
FieldMetadata {
     }
 }
 
+/// From hashbrown map
+impl From<HashMap<String, String>> for FieldMetadata {
+    fn from(map: HashMap<String, String>) -> Self {
+        let inner = map.into_iter().collect();
+        Self::new(inner)
+    }
+}
+
+impl From<&HashMap<String, String>> for FieldMetadata {
+    fn from(map: &HashMap<String, String>) -> Self {
+        let inner = map
+            .into_iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect();
+        Self::new(inner)
+    }
+}
+
 /// UNNEST expression.
 #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
 pub struct Unnest {
@@ -601,7 +625,7 @@ pub struct Alias {
     pub expr: Box<Expr>,
     pub relation: Option<TableReference>,
     pub name: String,
-    pub metadata: Option<std::collections::HashMap<String, String>>,
+    pub metadata: Option<FieldMetadata>,
 }
 
 impl Hash for Alias {
@@ -641,10 +665,7 @@ impl Alias {
         }
     }
 
-    pub fn with_metadata(
-        mut self,
-        metadata: Option<std::collections::HashMap<String, String>>,
-    ) -> Self {
+    pub fn with_metadata(mut self, metadata: Option<FieldMetadata>) -> Self {
         self.metadata = metadata;
         self
     }
@@ -1591,15 +1612,17 @@ impl Expr {
     /// # Example
     /// ```
     /// # use datafusion_expr::col;
-    /// use std::collections::HashMap;
+    /// # use std::collections::HashMap;
+    /// # use datafusion_expr::expr::FieldMetadata;
     /// let metadata = HashMap::from([("key".to_string(), 
"value".to_string())]);
+    /// let metadata = FieldMetadata::from(metadata);
     /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
     /// ```
     ///
     pub fn alias_with_metadata(
         self,
         name: impl Into<String>,
-        metadata: Option<std::collections::HashMap<String, String>>,
+        metadata: Option<FieldMetadata>,
     ) -> Expr {
         Expr::Alias(Alias::new(self, None::<&str>, 
name.into()).with_metadata(metadata))
     }
@@ -1621,8 +1644,10 @@ impl Expr {
     /// # Example
     /// ```
     /// # use datafusion_expr::col;
-    /// use std::collections::HashMap;
+    /// # use std::collections::HashMap;
+    /// # use datafusion_expr::expr::FieldMetadata;
     /// let metadata = HashMap::from([("key".to_string(), 
"value".to_string())]);
+    /// let metadata = FieldMetadata::from(metadata);
     /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), 
"bar", Some(metadata));
     /// ```
     ///
@@ -1630,7 +1655,7 @@ impl Expr {
         self,
         relation: Option<impl Into<TableReference>>,
         name: impl Into<String>,
-        metadata: Option<std::collections::HashMap<String, String>>,
+        metadata: Option<FieldMetadata>,
     ) -> Expr {
         Expr::Alias(Alias::new(self, relation, 
name.into()).with_metadata(metadata))
     }
@@ -3819,7 +3844,7 @@ mod test {
         // If this test fails when you change `Expr`, please try
         // `Box`ing the fields to make `Expr` smaller
         // See https://github.com/apache/datafusion/issues/16199 for details
-        assert_eq!(size_of::<Expr>(), 144);
+        assert_eq!(size_of::<Expr>(), 128);
         assert_eq!(size_of::<ScalarValue>(), 64);
         assert_eq!(size_of::<DataType>(), 24); // 3 ptrs
         assert_eq!(size_of::<Vec<Expr>>(), 24);
diff --git a/datafusion/expr/src/expr_schema.rs 
b/datafusion/expr/src/expr_schema.rs
index 5ff4873033..8ca479bb6f 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -17,8 +17,8 @@
 
 use super::{Between, Expr, Like};
 use crate::expr::{
-    AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, 
InList,
-    InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
+    AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, 
FieldMetadata,
+    InList, InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, 
WindowFunction,
     WindowFunctionParams,
 };
 use crate::type_coercion::functions::{
@@ -34,7 +34,6 @@ use datafusion_common::{
 };
 use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
-use std::collections::HashMap;
 use std::sync::Arc;
 
 /// Trait to allow expr to typable with respect to a schema
@@ -46,7 +45,7 @@ pub trait ExprSchemable {
     fn nullable(&self, input_schema: &dyn ExprSchema) -> Result<bool>;
 
     /// Given a schema, return the expr's optional metadata
-    fn metadata(&self, schema: &dyn ExprSchema) -> Result<HashMap<String, 
String>>;
+    fn metadata(&self, schema: &dyn ExprSchema) -> Result<FieldMetadata>;
 
     /// Convert to a field with respect to a schema
     fn to_field(
@@ -346,9 +345,9 @@ impl ExprSchemable for Expr {
         }
     }
 
-    fn metadata(&self, schema: &dyn ExprSchema) -> Result<HashMap<String, 
String>> {
+    fn metadata(&self, schema: &dyn ExprSchema) -> Result<FieldMetadata> {
         self.to_field(schema)
-            .map(|(_, field)| field.metadata().clone())
+            .map(|(_, field)| FieldMetadata::from(field.metadata()))
     }
 
     /// Returns the datatype and nullability of the expression based on 
[ExprSchema].
@@ -405,12 +404,10 @@ impl ExprSchemable for Expr {
 
                 let mut combined_metadata = expr.metadata(schema)?;
                 if let Some(metadata) = metadata {
-                    if !metadata.is_empty() {
-                        combined_metadata.extend(metadata.clone());
-                    }
+                    combined_metadata.extend(metadata.clone());
                 }
 
-                Ok(Arc::new(field.with_metadata(combined_metadata)))
+                Ok(Arc::new(combined_metadata.add_to_field(field)))
             }
             Expr::Negative(expr) => expr.to_field(schema).map(|(_, f)| f),
             Expr::Column(c) => schema.field_from_column(c).map(|f| 
Arc::new(f.clone())),
@@ -736,7 +733,7 @@ mod tests {
     use super::*;
     use crate::{col, lit};
 
-    use datafusion_common::{internal_err, DFSchema, ScalarValue};
+    use datafusion_common::{internal_err, DFSchema, HashMap, ScalarValue};
 
     macro_rules! test_is_expr_nullable {
         ($EXPR_TYPE:ident) => {{
@@ -842,6 +839,7 @@ mod tests {
     fn test_expr_metadata() {
         let mut meta = HashMap::new();
         meta.insert("bar".to_string(), "buzz".to_string());
+        let meta = FieldMetadata::from(meta);
         let expr = col("foo");
         let schema = MockExprSchema::new()
             .with_data_type(DataType::Int32)
@@ -860,14 +858,13 @@ mod tests {
         );
 
         let schema = DFSchema::from_unqualified_fields(
-            vec![Field::new("foo", DataType::Int32, 
true).with_metadata(meta.clone())]
-                .into(),
-            HashMap::new(),
+            vec![meta.add_to_field(Field::new("foo", DataType::Int32, 
true))].into(),
+            std::collections::HashMap::new(),
         )
         .unwrap();
 
         // verify to_field method populates metadata
-        assert_eq!(&meta, expr.to_field(&schema).unwrap().1.metadata());
+        assert_eq!(meta, expr.metadata(&schema).unwrap());
     }
 
     #[derive(Debug)]
@@ -899,8 +896,8 @@ mod tests {
             self
         }
 
-        fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
-            self.field = self.field.with_metadata(metadata);
+        fn with_metadata(mut self, metadata: FieldMetadata) -> Self {
+            self.field = metadata.add_to_field(self.field);
             self
         }
     }
diff --git a/datafusion/physical-expr/src/planner.rs 
b/datafusion/physical-expr/src/planner.rs
index 8859013289..fbc19b1202 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -115,7 +115,6 @@ pub fn create_physical_expr(
     match e {
         Expr::Alias(Alias { expr, metadata, .. }) => {
             if let Expr::Literal(v, prior_metadata) = expr.as_ref() {
-                let metadata = metadata.as_ref().map(|m| 
FieldMetadata::from(m.clone()));
                 let new_metadata = FieldMetadata::merge_options(
                     prior_metadata.as_ref(),
                     metadata.as_ref(),
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs 
b/datafusion/proto/src/logical_plan/to_proto.rs
index 814cf52904..f7f303fefe 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -211,7 +211,10 @@ pub fn serialize_expr(
                     .map(|r| vec![r.into()])
                     .unwrap_or(vec![]),
                 alias: name.to_owned(),
-                metadata: metadata.to_owned().unwrap_or(HashMap::new()),
+                metadata: metadata
+                    .as_ref()
+                    .map(|m| m.to_hashmap())
+                    .unwrap_or(HashMap::new()),
             });
             protobuf::LogicalExprNode {
                 expr_type: Some(ExprType::Alias(alias)),
diff --git a/docs/source/library-user-guide/upgrading.md 
b/docs/source/library-user-guide/upgrading.md
index 4fea730463..646753aca4 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -19,6 +19,35 @@
 
 # Upgrade Guides
 
+## DataFusion `49.0.0`
+
+### Metadata is now represented by `FieldMetadata`
+
+Metadata from the Arrow `Field` is now stored using the `FieldMetadata`
+structure. In prior versions it was stored as both a `HashMap<String, String>`
+and a `BTreeMap<String, String>`. `FieldMetadata` is a easier to work with and
+is more efficient.
+
+To create `FieldMetadata` from a `Field`:
+
+```rust
+# /* comment to avoid running
+ let metadata = FieldMetadata::from(&field);
+# */
+```
+
+To add metadata to a `Field`, use the `add_to_field` method:
+
+```rust
+# /* comment to avoid running
+let updated_field = metadata.add_to_field(field);
+# */
+```
+
+See [#16317] for details.
+
+[#16317]: https://github.com/apache/datafusion/pull/16317
+
 ## DataFusion `48.0.0`
 
 ### `Expr::Literal` has optional metadata


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to