This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 58483fbbbe Fix bug in field level metadata matching code (#8286)
58483fbbbe is described below

commit 58483fbbbe732cca070209c82ae7e5cfd031f6ae
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Nov 21 05:57:35 2023 -0500

    Fix bug in field level metadata matching code (#8286)
    
    * Fix bug in field level metadata matching code
    
    * improve comment
    
    * single map
---
 datafusion/physical-plan/src/projection.rs      | 16 +++----
 datafusion/sqllogictest/src/test_context.rs     | 44 ++++++++++++++----
 datafusion/sqllogictest/test_files/metadata.slt | 62 +++++++++++++++++++++++++
 3 files changed, 104 insertions(+), 18 deletions(-)

diff --git a/datafusion/physical-plan/src/projection.rs 
b/datafusion/physical-plan/src/projection.rs
index b8e2d0e425..dfb860bc8c 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -257,16 +257,12 @@ fn get_field_metadata(
     e: &Arc<dyn PhysicalExpr>,
     input_schema: &Schema,
 ) -> Option<HashMap<String, String>> {
-    let name = if let Some(column) = e.as_any().downcast_ref::<Column>() {
-        column.name()
-    } else {
-        return None;
-    };
-
-    input_schema
-        .field_with_name(name)
-        .ok()
-        .map(|f| f.metadata().clone())
+    // Look up field by index in schema (not NAME as there can be more than one
+    // column with the same name)
+    e.as_any()
+        .downcast_ref::<Column>()
+        .map(|column| input_schema.field(column.index()).metadata())
+        .cloned()
 }
 
 fn stats_projection(
diff --git a/datafusion/sqllogictest/src/test_context.rs 
b/datafusion/sqllogictest/src/test_context.rs
index b2314f34f3..f5ab8f71aa 100644
--- a/datafusion/sqllogictest/src/test_context.rs
+++ b/datafusion/sqllogictest/src/test_context.rs
@@ -35,6 +35,7 @@ use datafusion::{
 };
 use datafusion_common::DataFusionError;
 use log::info;
+use std::collections::HashMap;
 use std::fs::File;
 use std::io::Write;
 use std::path::Path;
@@ -57,8 +58,8 @@ impl TestContext {
         }
     }
 
-    /// Create a SessionContext, configured for the specific test, if
-    /// possible.
+    /// Create a SessionContext, configured for the specific sqllogictest
+    /// test(.slt file) , if possible.
     ///
     /// If `None` is returned (e.g. because some needed feature is not
     /// enabled), the file should be skipped
@@ -67,7 +68,7 @@ impl TestContext {
             // hardcode target partitions so plans are deterministic
             .with_target_partitions(4);
 
-        let test_ctx = 
TestContext::new(SessionContext::new_with_config(config));
+        let mut test_ctx = 
TestContext::new(SessionContext::new_with_config(config));
 
         let file_name = relative_path.file_name().unwrap().to_str().unwrap();
         match file_name {
@@ -86,10 +87,8 @@ impl TestContext {
             "avro.slt" => {
                 #[cfg(feature = "avro")]
                 {
-                    let mut test_ctx = test_ctx;
                     info!("Registering avro tables");
                     register_avro_tables(&mut test_ctx).await;
-                    return Some(test_ctx);
                 }
                 #[cfg(not(feature = "avro"))]
                 {
@@ -99,10 +98,11 @@ impl TestContext {
             }
             "joins.slt" => {
                 info!("Registering partition table tables");
-
-                let mut test_ctx = test_ctx;
                 register_partition_table(&mut test_ctx).await;
-                return Some(test_ctx);
+            }
+            "metadata.slt" => {
+                info!("Registering metadata table tables");
+                register_metadata_tables(test_ctx.session_ctx()).await;
             }
             _ => {
                 info!("Using default SessionContext");
@@ -299,3 +299,31 @@ fn table_with_many_types() -> Arc<dyn TableProvider> {
     let provider = MemTable::try_new(Arc::new(schema), 
vec![vec![batch]]).unwrap();
     Arc::new(provider)
 }
+
+/// Registers a table_with_metadata that contains both field level and Table 
level metadata
+pub async fn register_metadata_tables(ctx: &SessionContext) {
+    let id = Field::new("id", DataType::Int32, 
true).with_metadata(HashMap::from([(
+        String::from("metadata_key"),
+        String::from("the id field"),
+    )]));
+    let name = Field::new("name", DataType::Utf8, 
true).with_metadata(HashMap::from([(
+        String::from("metadata_key"),
+        String::from("the name field"),
+    )]));
+
+    let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([(
+        String::from("metadata_key"),
+        String::from("the entire schema"),
+    )]));
+
+    let batch = RecordBatch::try_new(
+        Arc::new(schema),
+        vec![
+            Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _,
+            Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")])) 
as _,
+        ],
+    )
+    .unwrap();
+
+    ctx.register_batch("table_with_metadata", batch).unwrap();
+}
diff --git a/datafusion/sqllogictest/test_files/metadata.slt 
b/datafusion/sqllogictest/test_files/metadata.slt
new file mode 100644
index 0000000000..3b2b219244
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## Tests for tables that has both metadata on each field as well as metadata on
+## the schema itself.
+##########
+
+## Note that table_with_metadata is defined using Rust code
+## in the test harness as there is no way to define schema
+## with metadata in SQL.
+
+query IT
+select * from table_with_metadata;
+----
+1 NULL
+NULL bar
+3 baz
+
+query I rowsort
+SELECT (
+  SELECT id FROM table_with_metadata
+  ) UNION (
+  SELECT id FROM table_with_metadata
+  );
+----
+1
+3
+NULL
+
+query I rowsort
+SELECT "data"."id"
+FROM
+  (
+    (SELECT "id" FROM "table_with_metadata")
+      UNION
+    (SELECT "id" FROM "table_with_metadata")
+  ) as "data",
+  (
+    SELECT "id" FROM "table_with_metadata"
+  ) as "samples"
+WHERE "data"."id" = "samples"."id";
+----
+1
+3
+
+statement ok
+drop table table_with_metadata;

Reply via email to