This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch branch-52
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/branch-52 by this push:
new 822f3fa5ed [branch-52] fix: Ensure columns are casted to the correct
names with Unions (#20146) (#20879)
822f3fa5ed is described below
commit 822f3fa5ed551d746f31b7fec65fde84c207db11
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Mar 12 06:57:02 2026 -0400
[branch-52] fix: Ensure columns are casted to the correct names with Unions
(#20146) (#20879)
- Part of https://github.com/apache/datafusion/issues/20855
- Closes https://github.com/apache/datafusion/issues/20123 on branch-52
This PR:
- Backports https://github.com/apache/datafusion/pull/20146 from
@nuno-faria to the branch-52 line
---------
Co-authored-by: Nuno Faria <[email protected]>
---
datafusion/expr/src/expr_rewriter/mod.rs | 13 +-
.../optimizer/tests/optimizer_integration.rs | 6 +-
datafusion/substrait/tests/cases/logical_plans.rs | 24 +++
.../duplicate_name_in_union.substrait.json | 171 +++++++++++++++++++++
4 files changed, 208 insertions(+), 6 deletions(-)
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs
b/datafusion/expr/src/expr_rewriter/mod.rs
index a0faca76e9..32a88ab8cf 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -261,9 +261,16 @@ fn coerce_exprs_for_schema(
#[expect(deprecated)]
Expr::Wildcard { .. } => Ok(expr),
_ => {
- // maintain the original name when casting
- let name = dst_schema.field(idx).name();
- Ok(expr.cast_to(new_type, src_schema)?.alias(name))
+ match expr {
+ // maintain the original name when casting a
column, to avoid the
+ // tablename being added to it when not explicitly
set by the query
+ // (see:
https://github.com/apache/datafusion/issues/18818)
+ Expr::Column(ref column) => {
+ let name = column.name().to_owned();
+ Ok(expr.cast_to(new_type,
src_schema)?.alias(name))
+ }
+ _ => Ok(expr.cast_to(new_type, src_schema)?),
+ }
}
}
} else {
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs
b/datafusion/optimizer/tests/optimizer_integration.rs
index 36a6df54dd..fd4991c244 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -543,7 +543,7 @@ fn recursive_cte_projection_pushdown() -> Result<()> {
RecursiveQuery: is_distinct=false
Projection: test.col_int32 AS id
TableScan: test projection=[col_int32]
- Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) AS id
+ Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32)
Filter: nodes.id < Int32(3)
TableScan: nodes projection=[id]
"
@@ -567,7 +567,7 @@ fn recursive_cte_with_aliased_self_reference() ->
Result<()> {
RecursiveQuery: is_distinct=false
Projection: test.col_int32 AS id
TableScan: test projection=[col_int32]
- Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) AS id
+ Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32)
SubqueryAlias: child
Filter: nodes.id < Int32(3)
TableScan: nodes projection=[id]
@@ -630,7 +630,7 @@ fn recursive_cte_projection_pushdown_baseline() ->
Result<()> {
Projection: test.col_int32 AS n
Filter: test.col_int32 = Int32(5)
TableScan: test projection=[col_int32]
- Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) AS n
+ Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32)
Filter: countdown.n > Int32(1)
TableScan: countdown projection=[n]
"
diff --git a/datafusion/substrait/tests/cases/logical_plans.rs
b/datafusion/substrait/tests/cases/logical_plans.rs
index 41f08c579f..e3e45193e7 100644
--- a/datafusion/substrait/tests/cases/logical_plans.rs
+++ b/datafusion/substrait/tests/cases/logical_plans.rs
@@ -220,6 +220,30 @@ mod tests {
// Trigger execution to ensure plan validity
DataFrame::new(ctx.state(), plan).show().await?;
+ Ok(())
+ }
+ #[tokio::test]
+ async fn duplicate_name_in_union() -> Result<()> {
+ let proto_plan =
+
read_json("tests/testdata/test_plans/duplicate_name_in_union.substrait.json");
+ let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+ let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
+
+ assert_snapshot!(
+ plan,
+ @r"
+ Projection: foo AS col1, bar AS col2
+ Union
+ Projection: foo, bar
+ Values: (Int64(100), Int64(200))
+ Projection: x, foo
+ Values: (Int32(300), Int64(400))
+ "
+ );
+
+ // Trigger execution to ensure plan validity
+ DataFrame::new(ctx.state(), plan).show().await?;
+
Ok(())
}
}
diff --git
a/datafusion/substrait/tests/testdata/test_plans/duplicate_name_in_union.substrait.json
b/datafusion/substrait/tests/testdata/test_plans/duplicate_name_in_union.substrait.json
new file mode 100644
index 0000000000..1da2ff6131
--- /dev/null
+++
b/datafusion/substrait/tests/testdata/test_plans/duplicate_name_in_union.substrait.json
@@ -0,0 +1,171 @@
+{
+ "version": {
+ "minorNumber": 54,
+ "producer": "datafusion-test"
+ },
+ "relations": [
+ {
+ "root": {
+ "input": {
+ "set": {
+ "common": {
+ "direct": {}
+ },
+ "inputs": [
+ {
+ "project": {
+ "common": {
+ "emit": {
+ "outputMapping": [2, 3]
+ }
+ },
+ "input": {
+ "read": {
+ "common": {
+ "direct": {}
+ },
+ "baseSchema": {
+ "names": ["foo", "bar"],
+ "struct": {
+ "types": [
+ {
+ "i64": {
+ "nullability": "NULLABILITY_REQUIRED"
+ }
+ },
+ {
+ "i64": {
+ "nullability": "NULLABILITY_REQUIRED"
+ }
+ }
+ ],
+ "nullability": "NULLABILITY_REQUIRED"
+ }
+ },
+ "virtualTable": {
+ "expressions": [
+ {
+ "fields": [
+ {
+ "literal": {
+ "i64": "100"
+ }
+ },
+ {
+ "literal": {
+ "i64": "200"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ }
+ },
+ "expressions": [
+ {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 0
+ }
+ },
+ "rootReference": {}
+ }
+ },
+ {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 1
+ }
+ },
+ "rootReference": {}
+ }
+ }
+ ]
+ }
+ },
+ {
+ "project": {
+ "common": {
+ "emit": {
+ "outputMapping": [2, 3]
+ }
+ },
+ "input": {
+ "read": {
+ "common": {
+ "direct": {}
+ },
+ "baseSchema": {
+ "names": ["x", "foo"],
+ "struct": {
+ "types": [
+ {
+ "i32": {
+ "nullability": "NULLABILITY_REQUIRED"
+ }
+ },
+ {
+ "i64": {
+ "nullability": "NULLABILITY_REQUIRED"
+ }
+ }
+ ],
+ "nullability": "NULLABILITY_REQUIRED"
+ }
+ },
+ "virtualTable": {
+ "expressions": [
+ {
+ "fields": [
+ {
+ "literal": {
+ "i32": 300
+ }
+ },
+ {
+ "literal": {
+ "i64": "400"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ }
+ },
+ "expressions": [
+ {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 0
+ }
+ },
+ "rootReference": {}
+ }
+ },
+ {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 1
+ }
+ },
+ "rootReference": {}
+ }
+ }
+ ]
+ }
+ }
+ ],
+ "op": "SET_OP_UNION_ALL"
+ }
+ },
+ "names": ["col1", "col2"]
+ }
+ }
+ ]
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]