This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new eb08846c71 Fix ambiguous reference when aliasing in combination with
`ORDER BY` (#8425)
eb08846c71 is described below
commit eb08846c71c3435515b7dba496cb0cbe7f968995
Author: Asura7969 <[email protected]>
AuthorDate: Thu Dec 7 05:17:25 2023 +0800
Fix ambiguous reference when aliasing in combination with `ORDER BY` (#8425)
* Minor: Improve the document format of JoinHashMap
* ambiguous reference
* ignore aliases
* fix
* fix
* add test
* add test
* add test
* add test
---
datafusion/optimizer/tests/optimizer_integration.rs | 9 ++++-----
datafusion/sql/src/select.rs | 7 ++++++-
datafusion/sql/tests/sql_integration.rs | 17 ++++++++++++++---
datafusion/sqllogictest/test_files/select.slt | 15 +++++++++++++++
4 files changed, 39 insertions(+), 9 deletions(-)
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs
b/datafusion/optimizer/tests/optimizer_integration.rs
index e593b07361..4172881c0a 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -324,11 +324,10 @@ fn push_down_filter_groupby_expr_contains_alias() {
fn test_same_name_but_not_ambiguous() {
let sql = "SELECT t1.col_int32 AS col_int32 FROM test t1 intersect SELECT
col_int32 FROM test t2";
let plan = test_sql(sql).unwrap();
- let expected = "LeftSemi Join: col_int32 = t2.col_int32\
- \n Aggregate: groupBy=[[col_int32]], aggr=[[]]\
- \n Projection: t1.col_int32 AS col_int32\
- \n SubqueryAlias: t1\
- \n TableScan: test projection=[col_int32]\
+ let expected = "LeftSemi Join: t1.col_int32 = t2.col_int32\
+ \n Aggregate: groupBy=[[t1.col_int32]], aggr=[[]]\
+ \n SubqueryAlias: t1\
+ \n TableScan: test projection=[col_int32]\
\n SubqueryAlias: t2\
\n TableScan: test projection=[col_int32]";
assert_eq!(expected, format!("{plan:?}"));
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index c546ca7552..15f720d756 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -384,7 +384,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
&[&[plan.schema()]],
&plan.using_columns()?,
)?;
- let expr = col.alias(self.normalizer.normalize(alias));
+ let name = self.normalizer.normalize(alias);
+ // avoiding adding an alias if the column name is the same.
+ let expr = match &col {
+ Expr::Column(column) if column.name.eq(&name) => col,
+ _ => col.alias(name),
+ };
Ok(vec![expr])
}
SelectItem::Wildcard(options) => {
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 944a383ee4..48ba501453 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -3546,13 +3546,24 @@ fn test_select_unsupported_syntax_errors(#[case] sql:
&str, #[case] error: &str)
fn select_order_by_with_cast() {
let sql =
"SELECT first_name AS first_name FROM (SELECT first_name AS first_name
FROM person) ORDER BY CAST(first_name as INT)";
- let expected = "Sort: CAST(first_name AS first_name AS Int32) ASC NULLS
LAST\
- \n Projection: first_name AS first_name\
- \n Projection: person.first_name AS first_name\
+ let expected = "Sort: CAST(person.first_name AS Int32) ASC NULLS LAST\
+ \n Projection: person.first_name\
+ \n Projection: person.first_name\
\n TableScan: person";
quick_test(sql, expected);
}
+#[test]
+fn test_avoid_add_alias() {
+ // avoiding adding an alias if the column name is the same.
+ // plan1 = plan2
+ let sql = "select person.id as id from person order by person.id";
+ let plan1 = logical_plan(sql).unwrap();
+ let sql = "select id from person order by id";
+ let plan2 = logical_plan(sql).unwrap();
+ assert_eq!(format!("{plan1:?}"), format!("{plan2:?}"));
+}
+
#[test]
fn test_duplicated_left_join_key_inner_join() {
// person.id * 2 happen twice in left side.
diff --git a/datafusion/sqllogictest/test_files/select.slt
b/datafusion/sqllogictest/test_files/select.slt
index bb81c5a9a1..3f3befd85a 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -868,6 +868,21 @@ statement error DataFusion error: Error during planning:
EXCLUDE or EXCEPT conta
SELECT * EXCLUDE(d, b, c, a, a, b, c, d)
FROM table1
+# avoiding adding an alias if the column name is the same
+query TT
+EXPLAIN select a as a FROM table1 order by a
+----
+logical_plan
+Sort: table1.a ASC NULLS LAST
+--TableScan: table1 projection=[a]
+physical_plan
+SortExec: expr=[a@0 ASC NULLS LAST]
+--MemoryExec: partitions=1, partition_sizes=[1]
+
+# ambiguous column references in on join
+query error DataFusion error: Schema error: Ambiguous reference to unqualified
field a
+EXPLAIN select a as a FROM table1 t1 CROSS JOIN table1 t2 order by a
+
# run below query in multi partitions
statement ok
set datafusion.execution.target_partitions = 2;