This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch branch-42
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/branch-42 by this push:
     new 81b93e5d59 Backport "Provide field and schema metadata missing on 
cross joins, and union with null fields" (#12729) (#12974)
81b93e5d59 is described below

commit 81b93e5d59863921afcefe56904fb6bedb1755b1
Author: Matthew Turner <[email protected]>
AuthorDate: Wed Oct 16 16:59:11 2024 -0400

    Backport "Provide field and schema metadata missing on cross joins, and 
union with null fields" (#12729) (#12974)
    
    * Patch for PR 12729
    
    * Test before drop
---
 datafusion/core/src/datasource/file_format/mod.rs |  6 +++--
 datafusion/physical-plan/src/joins/cross_join.rs  | 13 ++++++---
 datafusion/physical-plan/src/union.rs             | 11 +++++++-
 datafusion/sqllogictest/src/test_context.rs       |  8 +++++-
 datafusion/sqllogictest/test_files/metadata.slt   | 32 ++++++++++++++++++++++-
 5 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/datafusion/core/src/datasource/file_format/mod.rs 
b/datafusion/core/src/datasource/file_format/mod.rs
index 1dcf480cf4..327026a2a4 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -257,11 +257,13 @@ pub(crate) fn coerce_file_schema_to_view_type(
             |field| match (table_fields.get(field.name()), field.data_type()) {
                 (Some(DataType::Utf8View), DataType::Utf8)
                 | (Some(DataType::Utf8View), DataType::LargeUtf8) => Arc::new(
-                    Field::new(field.name(), DataType::Utf8View, 
field.is_nullable()),
+                    Field::new(field.name(), DataType::Utf8View, 
field.is_nullable())
+                        .with_metadata(field.metadata().to_owned()),
                 ),
                 (Some(DataType::BinaryView), DataType::Binary)
                 | (Some(DataType::BinaryView), DataType::LargeBinary) => 
Arc::new(
-                    Field::new(field.name(), DataType::BinaryView, 
field.is_nullable()),
+                    Field::new(field.name(), DataType::BinaryView, 
field.is_nullable())
+                        .with_metadata(field.metadata().to_owned()),
                 ),
                 _ => field.clone(),
             },
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs 
b/datafusion/physical-plan/src/joins/cross_join.rs
index 11153556f2..a70645f3d6 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -69,15 +69,22 @@ impl CrossJoinExec {
     /// Create a new [CrossJoinExec].
     pub fn new(left: Arc<dyn ExecutionPlan>, right: Arc<dyn ExecutionPlan>) -> 
Self {
         // left then right
-        let all_columns: Fields = {
+        let (all_columns, metadata) = {
             let left_schema = left.schema();
             let right_schema = right.schema();
             let left_fields = left_schema.fields().iter();
             let right_fields = right_schema.fields().iter();
-            left_fields.chain(right_fields).cloned().collect()
+
+            let mut metadata = left_schema.metadata().clone();
+            metadata.extend(right_schema.metadata().clone());
+
+            (
+                left_fields.chain(right_fields).cloned().collect::<Fields>(),
+                metadata,
+            )
         };
 
-        let schema = Arc::new(Schema::new(all_columns));
+        let schema = 
Arc::new(Schema::new(all_columns).with_metadata(metadata));
         let cache = Self::compute_properties(&left, &right, 
Arc::clone(&schema));
         CrossJoinExec {
             left,
diff --git a/datafusion/physical-plan/src/union.rs 
b/datafusion/physical-plan/src/union.rs
index 78b2568605..1cf22060b6 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -474,7 +474,16 @@ fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> 
SchemaRef {
                 .iter()
                 .filter_map(|input| {
                     if input.schema().fields().len() > i {
-                        Some(input.schema().field(i).clone())
+                        let field = input.schema().field(i).clone();
+                        let right_hand_metdata = inputs
+                            .get(1)
+                            .map(|right_input| {
+                                
right_input.schema().field(i).metadata().clone()
+                            })
+                            .unwrap_or_default();
+                        let mut metadata = field.metadata().clone();
+                        metadata.extend(right_hand_metdata);
+                        Some(field.with_metadata(metadata))
                     } else {
                         None
                     }
diff --git a/datafusion/sqllogictest/src/test_context.rs 
b/datafusion/sqllogictest/src/test_context.rs
index ef2fa863e6..4ee929ce01 100644
--- a/datafusion/sqllogictest/src/test_context.rs
+++ b/datafusion/sqllogictest/src/test_context.rs
@@ -313,8 +313,13 @@ pub async fn register_metadata_tables(ctx: 
&SessionContext) {
         String::from("metadata_key"),
         String::from("the name field"),
     )]));
+    let l_name =
+        Field::new("l_name", DataType::Utf8, 
true).with_metadata(HashMap::from([(
+            String::from("metadata_key"),
+            String::from("the l_name field"),
+        )]));
 
-    let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([(
+    let schema = Schema::new(vec![id, name, 
l_name]).with_metadata(HashMap::from([(
         String::from("metadata_key"),
         String::from("the entire schema"),
     )]));
@@ -324,6 +329,7 @@ pub async fn register_metadata_tables(ctx: &SessionContext) 
{
         vec![
             Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _,
             Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")])) 
as _,
+            Arc::new(StringArray::from(vec![None, Some("l_bar"), 
Some("l_baz")])) as _,
         ],
     )
     .unwrap();
diff --git a/datafusion/sqllogictest/test_files/metadata.slt 
b/datafusion/sqllogictest/test_files/metadata.slt
index 3b2b219244..d6e3ad0c20 100644
--- a/datafusion/sqllogictest/test_files/metadata.slt
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -25,7 +25,7 @@
 ## with metadata in SQL.
 
 query IT
-select * from table_with_metadata;
+select id, name from table_with_metadata;
 ----
 1 NULL
 NULL bar
@@ -58,5 +58,35 @@ WHERE "data"."id" = "samples"."id";
 1
 3
 
+
+# Regression test: missing schema metadata, when aggregate on cross join
+query I
+SELECT count("data"."id")
+FROM
+  (
+   SELECT "id" FROM "table_with_metadata"
+  ) as "data",
+  (
+    SELECT "id" FROM "table_with_metadata"
+  ) as "samples";
+----
+6
+
+# Regression test: missing field metadata, from the NULL field on the left 
side of the union
+query ITT
+(SELECT id, NULL::string as name, l_name FROM "table_with_metadata")
+  UNION
+(SELECT id, name, NULL::string as l_name FROM "table_with_metadata")
+ORDER BY id, name, l_name;
+----
+1 NULL NULL
+3 baz NULL
+3 NULL l_baz
+NULL bar NULL
+NULL NULL l_bar
+
 statement ok
 drop table table_with_metadata;
+
+
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to