This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch branch-42
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/branch-42 by this push:
new 81b93e5d59 Backport "Provide field and schema metadata missing on
cross joins, and union with null fields" (#12729) (#12974)
81b93e5d59 is described below
commit 81b93e5d59863921afcefe56904fb6bedb1755b1
Author: Matthew Turner <[email protected]>
AuthorDate: Wed Oct 16 16:59:11 2024 -0400
Backport "Provide field and schema metadata missing on cross joins, and
union with null fields" (#12729) (#12974)
* Patch for PR 12729
* Test before drop
---
datafusion/core/src/datasource/file_format/mod.rs | 6 +++--
datafusion/physical-plan/src/joins/cross_join.rs | 13 ++++++---
datafusion/physical-plan/src/union.rs | 11 +++++++-
datafusion/sqllogictest/src/test_context.rs | 8 +++++-
datafusion/sqllogictest/test_files/metadata.slt | 32 ++++++++++++++++++++++-
5 files changed, 62 insertions(+), 8 deletions(-)
diff --git a/datafusion/core/src/datasource/file_format/mod.rs
b/datafusion/core/src/datasource/file_format/mod.rs
index 1dcf480cf4..327026a2a4 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -257,11 +257,13 @@ pub(crate) fn coerce_file_schema_to_view_type(
|field| match (table_fields.get(field.name()), field.data_type()) {
(Some(DataType::Utf8View), DataType::Utf8)
| (Some(DataType::Utf8View), DataType::LargeUtf8) => Arc::new(
- Field::new(field.name(), DataType::Utf8View,
field.is_nullable()),
+ Field::new(field.name(), DataType::Utf8View,
field.is_nullable())
+ .with_metadata(field.metadata().to_owned()),
),
(Some(DataType::BinaryView), DataType::Binary)
| (Some(DataType::BinaryView), DataType::LargeBinary) =>
Arc::new(
- Field::new(field.name(), DataType::BinaryView,
field.is_nullable()),
+ Field::new(field.name(), DataType::BinaryView,
field.is_nullable())
+ .with_metadata(field.metadata().to_owned()),
),
_ => field.clone(),
},
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs
b/datafusion/physical-plan/src/joins/cross_join.rs
index 11153556f2..a70645f3d6 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -69,15 +69,22 @@ impl CrossJoinExec {
/// Create a new [CrossJoinExec].
pub fn new(left: Arc<dyn ExecutionPlan>, right: Arc<dyn ExecutionPlan>) ->
Self {
// left then right
- let all_columns: Fields = {
+ let (all_columns, metadata) = {
let left_schema = left.schema();
let right_schema = right.schema();
let left_fields = left_schema.fields().iter();
let right_fields = right_schema.fields().iter();
- left_fields.chain(right_fields).cloned().collect()
+
+ let mut metadata = left_schema.metadata().clone();
+ metadata.extend(right_schema.metadata().clone());
+
+ (
+ left_fields.chain(right_fields).cloned().collect::<Fields>(),
+ metadata,
+ )
};
- let schema = Arc::new(Schema::new(all_columns));
+ let schema =
Arc::new(Schema::new(all_columns).with_metadata(metadata));
let cache = Self::compute_properties(&left, &right,
Arc::clone(&schema));
CrossJoinExec {
left,
diff --git a/datafusion/physical-plan/src/union.rs
b/datafusion/physical-plan/src/union.rs
index 78b2568605..1cf22060b6 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -474,7 +474,16 @@ fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) ->
SchemaRef {
.iter()
.filter_map(|input| {
if input.schema().fields().len() > i {
- Some(input.schema().field(i).clone())
+ let field = input.schema().field(i).clone();
+ let right_hand_metdata = inputs
+ .get(1)
+ .map(|right_input| {
+
right_input.schema().field(i).metadata().clone()
+ })
+ .unwrap_or_default();
+ let mut metadata = field.metadata().clone();
+ metadata.extend(right_hand_metdata);
+ Some(field.with_metadata(metadata))
} else {
None
}
diff --git a/datafusion/sqllogictest/src/test_context.rs
b/datafusion/sqllogictest/src/test_context.rs
index ef2fa863e6..4ee929ce01 100644
--- a/datafusion/sqllogictest/src/test_context.rs
+++ b/datafusion/sqllogictest/src/test_context.rs
@@ -313,8 +313,13 @@ pub async fn register_metadata_tables(ctx:
&SessionContext) {
String::from("metadata_key"),
String::from("the name field"),
)]));
+ let l_name =
+ Field::new("l_name", DataType::Utf8,
true).with_metadata(HashMap::from([(
+ String::from("metadata_key"),
+ String::from("the l_name field"),
+ )]));
- let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([(
+ let schema = Schema::new(vec![id, name,
l_name]).with_metadata(HashMap::from([(
String::from("metadata_key"),
String::from("the entire schema"),
)]));
@@ -324,6 +329,7 @@ pub async fn register_metadata_tables(ctx: &SessionContext)
{
vec![
Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _,
Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")]))
as _,
+ Arc::new(StringArray::from(vec![None, Some("l_bar"),
Some("l_baz")])) as _,
],
)
.unwrap();
diff --git a/datafusion/sqllogictest/test_files/metadata.slt
b/datafusion/sqllogictest/test_files/metadata.slt
index 3b2b219244..d6e3ad0c20 100644
--- a/datafusion/sqllogictest/test_files/metadata.slt
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -25,7 +25,7 @@
## with metadata in SQL.
query IT
-select * from table_with_metadata;
+select id, name from table_with_metadata;
----
1 NULL
NULL bar
@@ -58,5 +58,35 @@ WHERE "data"."id" = "samples"."id";
1
3
+
+# Regression test: missing schema metadata, when aggregate on cross join
+query I
+SELECT count("data"."id")
+FROM
+ (
+ SELECT "id" FROM "table_with_metadata"
+ ) as "data",
+ (
+ SELECT "id" FROM "table_with_metadata"
+ ) as "samples";
+----
+6
+
+# Regression test: missing field metadata, from the NULL field on the left
side of the union
+query ITT
+(SELECT id, NULL::string as name, l_name FROM "table_with_metadata")
+ UNION
+(SELECT id, name, NULL::string as l_name FROM "table_with_metadata")
+ORDER BY id, name, l_name;
+----
+1 NULL NULL
+3 baz NULL
+3 NULL l_baz
+NULL bar NULL
+NULL NULL l_bar
+
statement ok
drop table table_with_metadata;
+
+
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]