This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 3dc1773403 minor: inconsistent group by position planning (#10679)
3dc1773403 is described below
commit 3dc1773403532752db01da5c545b2bdab415e489
Author: Eduard Karacharov <[email protected]>
AuthorDate: Tue May 28 03:20:31 2024 +0300
minor: inconsistent group by position planning (#10679)
* fix: inconsistent group by position planning
* Update datafusion/sql/src/utils.rs
Co-authored-by: Andrew Lamb <[email protected]>
* improved error message
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/sql/src/select.rs | 7 +--
datafusion/sql/src/utils.rs | 75 ++++++++++++++++++-------
datafusion/sql/tests/sql_integration.rs | 12 ++--
datafusion/sqllogictest/test_files/group_by.slt | 5 ++
4 files changed, 68 insertions(+), 31 deletions(-)
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index d2cd1bcf3a..e8e016bf09 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -131,7 +131,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
//
// SELECT c1, MAX(c2) AS m FROM t GROUP BY c1 HAVING MAX(c2)
> 10;
//
- let having_expr = resolve_aliases_to_exprs(&having_expr,
&alias_map)?;
+ let having_expr = resolve_aliases_to_exprs(having_expr,
&alias_map)?;
normalize_col(having_expr, &projected_plan)
})
.transpose()?;
@@ -163,10 +163,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
alias_map.remove(f.name());
}
let group_by_expr =
- resolve_aliases_to_exprs(&group_by_expr, &alias_map)?;
+ resolve_aliases_to_exprs(group_by_expr, &alias_map)?;
let group_by_expr =
- resolve_positions_to_exprs(&group_by_expr,
&select_exprs)
- .unwrap_or(group_by_expr);
+ resolve_positions_to_exprs(group_by_expr,
&select_exprs)?;
let group_by_expr = normalize_col(group_by_expr,
&projected_plan)?;
self.validate_schema_satisfies_exprs(
base_plan.schema(),
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 35fd3b48f0..51bacb5f70 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -149,47 +149,51 @@ pub(crate) fn extract_aliases(exprs: &[Expr]) ->
HashMap<String, Expr> {
}
/// Given an expression that's literal int encoding position, lookup the
corresponding expression
-/// in the select_exprs list, if the index is within the bounds and it is
indeed a position literal;
-/// Otherwise, return None
+/// in the select_exprs list, if the index is within the bounds and it is
indeed a position literal,
+/// otherwise, returns planning error.
+/// If input expression is not an int literal, returns expression as-is.
pub(crate) fn resolve_positions_to_exprs(
- expr: &Expr,
+ expr: Expr,
select_exprs: &[Expr],
-) -> Option<Expr> {
+) -> Result<Expr> {
match expr {
// sql_expr_to_logical_expr maps number to i64
//
https://github.com/apache/datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887
Expr::Literal(ScalarValue::Int64(Some(position)))
- if position > &0_i64 && position <= &(select_exprs.len() as i64) =>
+ if position > 0_i64 && position <= select_exprs.len() as i64 =>
{
let index = (position - 1) as usize;
let select_expr = &select_exprs[index];
- Some(match select_expr {
+ Ok(match select_expr {
Expr::Alias(Alias { expr, .. }) => *expr.clone(),
_ => select_expr.clone(),
})
}
- _ => None,
+ Expr::Literal(ScalarValue::Int64(Some(position))) => plan_err!(
+ "Cannot find column with position {} in SELECT clause. Valid
columns: 1 to {}",
+ position, select_exprs.len()
+ ),
+ _ => Ok(expr),
}
}
/// Rebuilds an `Expr` with columns that refer to aliases replaced by the
/// alias' underlying `Expr`.
pub(crate) fn resolve_aliases_to_exprs(
- expr: &Expr,
+ expr: Expr,
aliases: &HashMap<String, Expr>,
) -> Result<Expr> {
- expr.clone()
- .transform_up(|nested_expr| match nested_expr {
- Expr::Column(c) if c.relation.is_none() => {
- if let Some(aliased_expr) = aliases.get(&c.name) {
- Ok(Transformed::yes(aliased_expr.clone()))
- } else {
- Ok(Transformed::no(Expr::Column(c)))
- }
+ expr.transform_up(|nested_expr| match nested_expr {
+ Expr::Column(c) if c.relation.is_none() => {
+ if let Some(aliased_expr) = aliases.get(&c.name) {
+ Ok(Transformed::yes(aliased_expr.clone()))
+ } else {
+ Ok(Transformed::no(Expr::Column(c)))
}
- _ => Ok(Transformed::no(nested_expr)),
- })
- .data()
+ }
+ _ => Ok(Transformed::no(nested_expr)),
+ })
+ .data()
}
/// given a slice of window expressions sharing the same sort key, find their
common partition
@@ -346,9 +350,9 @@ mod tests {
use arrow::datatypes::{DataType as ArrowDataType, Field, Schema};
use arrow_schema::Fields;
use datafusion_common::{DFSchema, Result};
- use datafusion_expr::{col, lit, unnest, EmptyRelation, LogicalPlan};
+ use datafusion_expr::{col, count, lit, unnest, EmptyRelation, LogicalPlan};
- use crate::utils::recursive_transform_unnest;
+ use crate::utils::{recursive_transform_unnest, resolve_positions_to_exprs};
#[test]
fn test_recursive_transform_unnest() -> Result<()> {
@@ -437,4 +441,33 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_resolve_positions_to_exprs() -> Result<()> {
+ let select_exprs = vec![col("c1"), col("c2"), count(lit(1))];
+
+ // Assert 1 resolved as first column in select list
+ let resolved = resolve_positions_to_exprs(lit(1i64), &select_exprs)?;
+ assert_eq!(resolved, col("c1"));
+
+ // Assert error if index out of select clause bounds
+ let resolved = resolve_positions_to_exprs(lit(-1i64), &select_exprs);
+ assert!(resolved.is_err_and(|e| e.message().contains(
+ "Cannot find column with position -1 in SELECT clause. Valid
columns: 1 to 3"
+ )));
+
+ let resolved = resolve_positions_to_exprs(lit(5i64), &select_exprs);
+ assert!(resolved.is_err_and(|e| e.message().contains(
+ "Cannot find column with position 5 in SELECT clause. Valid
columns: 1 to 3"
+ )));
+
+ // Assert expression returned as-is
+ let resolved = resolve_positions_to_exprs(lit("text"), &select_exprs)?;
+ assert_eq!(resolved, lit("text"));
+
+ let resolved = resolve_positions_to_exprs(col("fake"), &select_exprs)?;
+ assert_eq!(resolved, col("fake"));
+
+ Ok(())
+ }
}
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 0d84d9b2ab..e32a01dbc3 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1484,16 +1484,16 @@ fn
select_simple_aggregate_with_groupby_position_out_of_range() {
let sql = "SELECT state, MIN(age) FROM person GROUP BY 0";
let err = logical_plan(sql).expect_err("query should have failed");
assert_eq!(
- "Error during planning: Projection references non-aggregate values:
Expression person.state could not be resolved from available columns: Int64(0),
MIN(person.age)",
- err.strip_backtrace()
- );
+ "Error during planning: Cannot find column with position 0 in SELECT
clause. Valid columns: 1 to 2",
+ err.strip_backtrace()
+ );
let sql2 = "SELECT state, MIN(age) FROM person GROUP BY 5";
let err2 = logical_plan(sql2).expect_err("query should have failed");
assert_eq!(
- "Error during planning: Projection references non-aggregate values:
Expression person.state could not be resolved from available columns: Int64(5),
MIN(person.age)",
- err2.strip_backtrace()
- );
+ "Error during planning: Cannot find column with position 5 in SELECT
clause. Valid columns: 1 to 2",
+ err2.strip_backtrace()
+ );
}
#[test]
diff --git a/datafusion/sqllogictest/test_files/group_by.slt
b/datafusion/sqllogictest/test_files/group_by.slt
index 43bbf6bed6..7f4106a27c 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -5059,3 +5059,8 @@ query II
SELECT a + 1 AS d, a + 1 + b AS c FROM (SELECT 1 AS a, 2 AS b) GROUP BY a + 1,
a + 1 + b;
----
2 4
+
+statement error DataFusion error: Error during planning: Cannot find column
with position 4 in SELECT clause. Valid columns: 1 to 3
+SELECT a, b, COUNT(1)
+FROM multiple_ordered_table
+GROUP BY 1, 2, 4, 5, 6;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]