This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 209c26670 add ambiguous check for projection (#4260)
209c26670 is described below
commit 209c26670da4ee52cf13e4302250069a686fdaa2
Author: AssHero <[email protected]>
AuthorDate: Tue Nov 22 20:12:09 2022 +0800
add ambiguous check for projection (#4260)
---
datafusion/optimizer/tests/integration-test.rs | 2 +-
datafusion/sql/src/planner.rs | 108 +++++++++++++++++++++++--
2 files changed, 104 insertions(+), 6 deletions(-)
diff --git a/datafusion/optimizer/tests/integration-test.rs
b/datafusion/optimizer/tests/integration-test.rs
index c983f8198..b2932963e 100644
--- a/datafusion/optimizer/tests/integration-test.rs
+++ b/datafusion/optimizer/tests/integration-test.rs
@@ -260,7 +260,7 @@ fn timestamp_nano_ts_utc_predicates() {
#[test]
fn propagate_empty_relation() {
- let sql = "SELECT col_int32 FROM test JOIN ( SELECT col_int32 FROM test
WHERE false ) AS ta1 ON test.col_int32 = ta1.col_int32;";
+ let sql = "SELECT test.col_int32 FROM test JOIN ( SELECT col_int32 FROM
test WHERE false ) AS ta1 ON test.col_int32 = ta1.col_int32;";
let plan = test_sql(sql).unwrap();
// when children exist EmptyRelation, it will bottom-up propagate.
let expected = "EmptyRelation";
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index a6f43618f..8e3e6d911 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -46,6 +46,8 @@ use datafusion_expr::expr::{Between, BinaryExpr, Case, Cast,
GroupingSet, Like};
use datafusion_expr::expr_rewriter::normalize_col;
use datafusion_expr::expr_rewriter::normalize_col_with_schemas;
use datafusion_expr::logical_plan::builder::project_with_alias;
+use datafusion_expr::logical_plan::Join as HashJoin;
+use datafusion_expr::logical_plan::JoinConstraint as HashJoinConstraint;
use datafusion_expr::logical_plan::{
Analyze, CreateCatalog, CreateCatalogSchema,
CreateExternalTable as PlanCreateExternalTable, CreateMemoryTable,
CreateView,
@@ -1134,6 +1136,39 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// process `from` clause
let plans = self.plan_from_tables(select.from, ctes,
outer_query_schema)?;
let empty_from = matches!(plans.first(),
Some(LogicalPlan::EmptyRelation(_)));
+ // build from schema for unqualifier column ambiguous check
+ // we should get only one field for this unqualifier column from
schema.
+ let from_schema = {
+ let mut fields = vec![];
+ let mut metadata = std::collections::HashMap::new();
+ for plan in &plans {
+ if let LogicalPlan::Join(HashJoin {
+ join_constraint: HashJoinConstraint::Using,
+ on,
+ left,
+ ..
+ }) = plan
+ {
+ // For query: select id from t1 join t2 using(id), this is
legal.
+ // We should dedup the fields for cols in using clause.
+ let mut plan_fields = plan.schema().fields().clone();
+ for join_cols in on.iter() {
+ let left_field =
left.schema().field_from_column(&join_cols.0)?;
+ plan_fields.retain(|field| {
+ field.unqualified_column().name
+ != left_field.unqualified_column().name
+ });
+ plan_fields.push(left_field.clone());
+ }
+ fields.extend_from_slice(&plan_fields);
+ metadata.extend(plan.schema().metadata().clone());
+ } else {
+ fields.extend_from_slice(plan.schema().fields());
+ metadata.extend(plan.schema().metadata().clone());
+ }
+ }
+ DFSchema::new_with_metadata(fields, metadata)?
+ };
// process `where` clause
let plan =
@@ -1146,6 +1181,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
empty_from,
outer_query_schema,
ctes,
+ &from_schema,
)?;
// having and group by clause may reference aliases defined in select
projection
@@ -1307,11 +1343,19 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
empty_from: bool,
outer_query_schema: Option<&DFSchema>,
ctes: &mut HashMap<String, LogicalPlan>,
+ from_schema: &DFSchema,
) -> Result<Vec<Expr>> {
projection
.into_iter()
.map(|expr| {
- self.sql_select_to_rex(expr, plan, empty_from,
outer_query_schema, ctes)
+ self.sql_select_to_rex(
+ expr,
+ plan,
+ empty_from,
+ outer_query_schema,
+ ctes,
+ from_schema,
+ )
})
.flat_map(|result| match result {
Ok(vec) => vec.into_iter().map(Ok).collect(),
@@ -1583,6 +1627,33 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
})
}
+ /// ambiguous check for unqualifier column
+ fn column_reference_ambiguous_check(
+ &self,
+ schema: &DFSchema,
+ exprs: &[Expr],
+ ) -> Result<()> {
+ find_column_exprs(exprs)
+ .iter()
+ .try_for_each(|col| match col {
+ Expr::Column(col) => match &col.relation {
+ None => {
+ // should get only one field in from_schema.
+ if
schema.fields_with_unqualified_name(&col.name).len() != 1 {
+ Err(DataFusionError::Internal(format!(
+ "column reference {} is ambiguous",
+ col.name
+ )))
+ } else {
+ Ok(())
+ }
+ }
+ _ => Ok(()),
+ },
+ _ => Ok(()),
+ })
+ }
+
/// Generate a relational expression from a select SQL expression
fn sql_select_to_rex(
&self,
@@ -1591,6 +1662,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
empty_from: bool,
outer_query_schema: Option<&DFSchema>,
ctes: &mut HashMap<String, LogicalPlan>,
+ from_schema: &DFSchema,
) -> Result<Vec<Expr>> {
let input_schema = match outer_query_schema {
Some(x) => {
@@ -1604,13 +1676,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
match sql {
SelectItem::UnnamedExpr(expr) => {
let expr = self.sql_to_rex(expr, &input_schema, ctes)?;
+ self.column_reference_ambiguous_check(from_schema,
&[expr.clone()])?;
Ok(vec![normalize_col(expr, plan)?])
}
SelectItem::ExprWithAlias { expr, alias } => {
- let expr = Alias(
- Box::new(self.sql_to_rex(expr, &input_schema, ctes)?),
- normalize_ident(&alias),
- );
+ let select_expr = self.sql_to_rex(expr, &input_schema, ctes)?;
+ self.column_reference_ambiguous_check(
+ from_schema,
+ &[select_expr.clone()],
+ )?;
+ let expr = Alias(Box::new(select_expr),
normalize_ident(&alias));
Ok(vec![normalize_col(expr, plan)?])
}
SelectItem::Wildcard => {
@@ -3568,6 +3643,29 @@ mod tests {
);
}
+ #[test]
+ fn select_with_ambiguous_column() {
+ let sql = "SELECT id FROM person a, person b";
+ let err = logical_plan(sql).expect_err("query should have failed");
+ assert_eq!(
+ "Internal(\"column reference id is ambiguous\")",
+ format!("{:?}", err)
+ );
+ }
+
+ #[test]
+ fn join_with_ambiguous_column() {
+ // This is legal.
+ let sql = "SELECT id FROM person a join person b using(id)";
+ let expected = "Projection: a.id\
+ \n Inner Join: Using a.id = b.id\
+ \n SubqueryAlias: a\
+ \n TableScan: person\
+ \n SubqueryAlias: b\
+ \n TableScan: person";
+ quick_test(sql, expected);
+ }
+
#[test]
fn select_with_having() {
let sql = "SELECT id, age