This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new a265509b2 Ambiguity check for where selection (#5153)
a265509b2 is described below
commit a265509b272084e758b9ec21f2133856f56e617d
Author: Jeffrey <[email protected]>
AuthorDate: Tue Feb 7 08:14:34 2023 +1100
Ambiguity check for where selection (#5153)
* Ambiguity check for where selection
* Fix error types
---
datafusion/sql/src/select.rs | 60 +++++++++++++++++++++++---------
datafusion/sql/tests/integration_test.rs | 12 ++++++-
2 files changed, 55 insertions(+), 17 deletions(-)
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 96d9be870..c63177478 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -20,7 +20,7 @@ use crate::utils::{
check_columns_satisfy_exprs, extract_aliases, normalize_ident, rebase_expr,
resolve_aliases_to_exprs, resolve_columns, resolve_positions_to_exprs,
};
-use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result};
+use datafusion_common::{Column, DFSchema, DFSchemaRef, DataFusionError,
Result};
use datafusion_expr::expr_rewriter::{normalize_col,
normalize_col_with_schemas};
use datafusion_expr::logical_plan::builder::project;
use datafusion_expr::logical_plan::Join as HashJoin;
@@ -73,6 +73,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
plan,
outer_query_schema,
planner_context,
+ &from_schema,
)?;
// process the SELECT expressions, with wildcards expanded.
@@ -237,6 +238,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
plan: LogicalPlan,
outer_query_schema: Option<&DFSchema>,
planner_context: &mut PlannerContext,
+ from_schema: &DFSchema,
) -> Result<LogicalPlan> {
match selection {
Some(predicate_expr) => {
@@ -253,6 +255,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let filter_expr =
self.sql_to_expr(predicate_expr, &join_schema,
planner_context)?;
+ self.column_reference_ambiguous_check(
+ &[filter_expr.clone()],
+ from_schema,
+ outer_query_schema,
+ )?;
let mut using_columns = HashSet::new();
expr_to_columns(&filter_expr, &mut using_columns)?;
let filter_expr = normalize_col_with_schemas(
@@ -337,15 +344,20 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
match sql {
SelectItem::UnnamedExpr(expr) => {
let expr = self.sql_to_expr(expr, plan.schema(),
planner_context)?;
- self.column_reference_ambiguous_check(from_schema,
&[expr.clone()])?;
+ self.column_reference_ambiguous_check(
+ &[expr.clone()],
+ from_schema,
+ None,
+ )?;
Ok(vec![normalize_col(expr, plan)?])
}
SelectItem::ExprWithAlias { expr, alias } => {
let select_expr =
self.sql_to_expr(expr, plan.schema(), planner_context)?;
self.column_reference_ambiguous_check(
- from_schema,
&[select_expr.clone()],
+ from_schema,
+ None,
)?;
let expr = Alias(Box::new(select_expr),
normalize_ident(alias));
Ok(vec![normalize_col(expr, plan)?])
@@ -374,26 +386,42 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
/// ambiguous check for unqualifier column
fn column_reference_ambiguous_check(
&self,
- schema: &DFSchema,
exprs: &[Expr],
+ schema: &DFSchema,
+ fallback_schema: Option<&DFSchema>,
) -> Result<()> {
find_column_exprs(exprs)
.iter()
.try_for_each(|col| match col {
- Expr::Column(col) => match &col.relation {
- None => {
- // should get only one field in from_schema.
- if
schema.fields_with_unqualified_name(&col.name).len() != 1 {
- Err(DataFusionError::Internal(format!(
- "column reference {} is ambiguous",
- col.name
- )))
- } else {
- Ok(())
+ Expr::Column(Column {
+ name,
+ relation: None,
+ ..
+ }) => {
+ match (
+ schema.fields_with_unqualified_name(name).len(),
+ fallback_schema,
+ ) {
+ // in case is from outer query if this is inner
subquery
+ (0, Some(fallback_schema)) => {
+ match
fallback_schema.fields_with_unqualified_name(name).len()
+ {
+ 0 => Err(DataFusionError::Plan(format!(
+ "column reference {name} is unknown",
+ ))),
+ 1 => Ok(()),
+ _ => Err(DataFusionError::Plan(format!(
+ "column reference {name} is ambiguous",
+ ))),
+ }
}
+ (1, _) => Ok(()),
+ // should get only one field in from_schema.
+ _ => Err(DataFusionError::Plan(format!(
+ "column reference {name} is ambiguous",
+ ))),
}
- _ => Ok(()),
- },
+ }
_ => Ok(()),
})
}
diff --git a/datafusion/sql/tests/integration_test.rs
b/datafusion/sql/tests/integration_test.rs
index 991dbe89e..761bbf345 100644
--- a/datafusion/sql/tests/integration_test.rs
+++ b/datafusion/sql/tests/integration_test.rs
@@ -515,7 +515,7 @@ fn select_with_ambiguous_column() {
let sql = "SELECT id FROM person a, person b";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
- "Internal(\"column reference id is ambiguous\")",
+ "Plan(\"column reference id is ambiguous\")",
format!("{err:?}")
);
}
@@ -533,6 +533,16 @@ fn join_with_ambiguous_column() {
quick_test(sql, expected);
}
+#[test]
+fn where_selection_with_ambiguous_column() {
+ let sql = "SELECT * FROM person a, person b WHERE id = id + 1";
+ let err = logical_plan(sql).expect_err("query should have failed");
+ assert_eq!(
+ "Plan(\"column reference id is ambiguous\")",
+ format!("{err:?}")
+ );
+}
+
#[test]
fn natural_join() {
let sql = "SELECT * FROM lineitem a NATURAL JOIN lineitem b";