This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 58483fbbbe Fix bug in field level metadata matching code (#8286)
58483fbbbe is described below
commit 58483fbbbe732cca070209c82ae7e5cfd031f6ae
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Nov 21 05:57:35 2023 -0500
Fix bug in field level metadata matching code (#8286)
* Fix bug in field level metadata matching code
* improve comment
* single map
---
datafusion/physical-plan/src/projection.rs | 16 +++----
datafusion/sqllogictest/src/test_context.rs | 44 ++++++++++++++----
datafusion/sqllogictest/test_files/metadata.slt | 62 +++++++++++++++++++++++++
3 files changed, 104 insertions(+), 18 deletions(-)
diff --git a/datafusion/physical-plan/src/projection.rs
b/datafusion/physical-plan/src/projection.rs
index b8e2d0e425..dfb860bc8c 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -257,16 +257,12 @@ fn get_field_metadata(
e: &Arc<dyn PhysicalExpr>,
input_schema: &Schema,
) -> Option<HashMap<String, String>> {
- let name = if let Some(column) = e.as_any().downcast_ref::<Column>() {
- column.name()
- } else {
- return None;
- };
-
- input_schema
- .field_with_name(name)
- .ok()
- .map(|f| f.metadata().clone())
+ // Look up field by index in schema (not NAME as there can be more than one
+ // column with the same name)
+ e.as_any()
+ .downcast_ref::<Column>()
+ .map(|column| input_schema.field(column.index()).metadata())
+ .cloned()
}
fn stats_projection(
diff --git a/datafusion/sqllogictest/src/test_context.rs
b/datafusion/sqllogictest/src/test_context.rs
index b2314f34f3..f5ab8f71aa 100644
--- a/datafusion/sqllogictest/src/test_context.rs
+++ b/datafusion/sqllogictest/src/test_context.rs
@@ -35,6 +35,7 @@ use datafusion::{
};
use datafusion_common::DataFusionError;
use log::info;
+use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::path::Path;
@@ -57,8 +58,8 @@ impl TestContext {
}
}
- /// Create a SessionContext, configured for the specific test, if
- /// possible.
+ /// Create a SessionContext, configured for the specific sqllogictest
+ /// test(.slt file) , if possible.
///
/// If `None` is returned (e.g. because some needed feature is not
/// enabled), the file should be skipped
@@ -67,7 +68,7 @@ impl TestContext {
// hardcode target partitions so plans are deterministic
.with_target_partitions(4);
- let test_ctx =
TestContext::new(SessionContext::new_with_config(config));
+ let mut test_ctx =
TestContext::new(SessionContext::new_with_config(config));
let file_name = relative_path.file_name().unwrap().to_str().unwrap();
match file_name {
@@ -86,10 +87,8 @@ impl TestContext {
"avro.slt" => {
#[cfg(feature = "avro")]
{
- let mut test_ctx = test_ctx;
info!("Registering avro tables");
register_avro_tables(&mut test_ctx).await;
- return Some(test_ctx);
}
#[cfg(not(feature = "avro"))]
{
@@ -99,10 +98,11 @@ impl TestContext {
}
"joins.slt" => {
info!("Registering partition table tables");
-
- let mut test_ctx = test_ctx;
register_partition_table(&mut test_ctx).await;
- return Some(test_ctx);
+ }
+ "metadata.slt" => {
+ info!("Registering metadata table tables");
+ register_metadata_tables(test_ctx.session_ctx()).await;
}
_ => {
info!("Using default SessionContext");
@@ -299,3 +299,31 @@ fn table_with_many_types() -> Arc<dyn TableProvider> {
let provider = MemTable::try_new(Arc::new(schema),
vec![vec![batch]]).unwrap();
Arc::new(provider)
}
+
+/// Registers a table_with_metadata that contains both field level and Table
level metadata
+pub async fn register_metadata_tables(ctx: &SessionContext) {
+ let id = Field::new("id", DataType::Int32,
true).with_metadata(HashMap::from([(
+ String::from("metadata_key"),
+ String::from("the id field"),
+ )]));
+ let name = Field::new("name", DataType::Utf8,
true).with_metadata(HashMap::from([(
+ String::from("metadata_key"),
+ String::from("the name field"),
+ )]));
+
+ let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([(
+ String::from("metadata_key"),
+ String::from("the entire schema"),
+ )]));
+
+ let batch = RecordBatch::try_new(
+ Arc::new(schema),
+ vec![
+ Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _,
+ Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")]))
as _,
+ ],
+ )
+ .unwrap();
+
+ ctx.register_batch("table_with_metadata", batch).unwrap();
+}
diff --git a/datafusion/sqllogictest/test_files/metadata.slt
b/datafusion/sqllogictest/test_files/metadata.slt
new file mode 100644
index 0000000000..3b2b219244
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## Tests for tables that has both metadata on each field as well as metadata on
+## the schema itself.
+##########
+
+## Note that table_with_metadata is defined using Rust code
+## in the test harness as there is no way to define schema
+## with metadata in SQL.
+
+query IT
+select * from table_with_metadata;
+----
+1 NULL
+NULL bar
+3 baz
+
+query I rowsort
+SELECT (
+ SELECT id FROM table_with_metadata
+ ) UNION (
+ SELECT id FROM table_with_metadata
+ );
+----
+1
+3
+NULL
+
+query I rowsort
+SELECT "data"."id"
+FROM
+ (
+ (SELECT "id" FROM "table_with_metadata")
+ UNION
+ (SELECT "id" FROM "table_with_metadata")
+ ) as "data",
+ (
+ SELECT "id" FROM "table_with_metadata"
+ ) as "samples"
+WHERE "data"."id" = "samples"."id";
+----
+1
+3
+
+statement ok
+drop table table_with_metadata;