This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 3dc1773403 minor: inconsistent group by position planning (#10679)
3dc1773403 is described below

commit 3dc1773403532752db01da5c545b2bdab415e489
Author: Eduard Karacharov <[email protected]>
AuthorDate: Tue May 28 03:20:31 2024 +0300

    minor: inconsistent group by position planning (#10679)
    
    * fix: inconsistent group by position planning
    
    * Update datafusion/sql/src/utils.rs
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    * improved error message
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/sql/src/select.rs                    |  7 +--
 datafusion/sql/src/utils.rs                     | 75 ++++++++++++++++++-------
 datafusion/sql/tests/sql_integration.rs         | 12 ++--
 datafusion/sqllogictest/test_files/group_by.slt |  5 ++
 4 files changed, 68 insertions(+), 31 deletions(-)

diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index d2cd1bcf3a..e8e016bf09 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -131,7 +131,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 //
                 //   SELECT c1, MAX(c2) AS m FROM t GROUP BY c1 HAVING MAX(c2) 
> 10;
                 //
-                let having_expr = resolve_aliases_to_exprs(&having_expr, 
&alias_map)?;
+                let having_expr = resolve_aliases_to_exprs(having_expr, 
&alias_map)?;
                 normalize_col(having_expr, &projected_plan)
             })
             .transpose()?;
@@ -163,10 +163,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         alias_map.remove(f.name());
                     }
                     let group_by_expr =
-                        resolve_aliases_to_exprs(&group_by_expr, &alias_map)?;
+                        resolve_aliases_to_exprs(group_by_expr, &alias_map)?;
                     let group_by_expr =
-                        resolve_positions_to_exprs(&group_by_expr, 
&select_exprs)
-                            .unwrap_or(group_by_expr);
+                        resolve_positions_to_exprs(group_by_expr, 
&select_exprs)?;
                     let group_by_expr = normalize_col(group_by_expr, 
&projected_plan)?;
                     self.validate_schema_satisfies_exprs(
                         base_plan.schema(),
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 35fd3b48f0..51bacb5f70 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -149,47 +149,51 @@ pub(crate) fn extract_aliases(exprs: &[Expr]) -> 
HashMap<String, Expr> {
 }
 
 /// Given an expression that's literal int encoding position, lookup the 
corresponding expression
-/// in the select_exprs list, if the index is within the bounds and it is 
indeed a position literal;
-/// Otherwise, return None
+/// in the select_exprs list, if the index is within the bounds and it is 
indeed a position literal,
+/// otherwise, returns planning error.
+/// If input expression is not an int literal, returns expression as-is.
 pub(crate) fn resolve_positions_to_exprs(
-    expr: &Expr,
+    expr: Expr,
     select_exprs: &[Expr],
-) -> Option<Expr> {
+) -> Result<Expr> {
     match expr {
         // sql_expr_to_logical_expr maps number to i64
         // 
https://github.com/apache/datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887
         Expr::Literal(ScalarValue::Int64(Some(position)))
-            if position > &0_i64 && position <= &(select_exprs.len() as i64) =>
+            if position > 0_i64 && position <= select_exprs.len() as i64 =>
         {
             let index = (position - 1) as usize;
             let select_expr = &select_exprs[index];
-            Some(match select_expr {
+            Ok(match select_expr {
                 Expr::Alias(Alias { expr, .. }) => *expr.clone(),
                 _ => select_expr.clone(),
             })
         }
-        _ => None,
+        Expr::Literal(ScalarValue::Int64(Some(position))) => plan_err!(
+            "Cannot find column with position {} in SELECT clause. Valid 
columns: 1 to {}",
+            position, select_exprs.len()
+        ),
+        _ => Ok(expr),
     }
 }
 
 /// Rebuilds an `Expr` with columns that refer to aliases replaced by the
 /// alias' underlying `Expr`.
 pub(crate) fn resolve_aliases_to_exprs(
-    expr: &Expr,
+    expr: Expr,
     aliases: &HashMap<String, Expr>,
 ) -> Result<Expr> {
-    expr.clone()
-        .transform_up(|nested_expr| match nested_expr {
-            Expr::Column(c) if c.relation.is_none() => {
-                if let Some(aliased_expr) = aliases.get(&c.name) {
-                    Ok(Transformed::yes(aliased_expr.clone()))
-                } else {
-                    Ok(Transformed::no(Expr::Column(c)))
-                }
+    expr.transform_up(|nested_expr| match nested_expr {
+        Expr::Column(c) if c.relation.is_none() => {
+            if let Some(aliased_expr) = aliases.get(&c.name) {
+                Ok(Transformed::yes(aliased_expr.clone()))
+            } else {
+                Ok(Transformed::no(Expr::Column(c)))
             }
-            _ => Ok(Transformed::no(nested_expr)),
-        })
-        .data()
+        }
+        _ => Ok(Transformed::no(nested_expr)),
+    })
+    .data()
 }
 
 /// given a slice of window expressions sharing the same sort key, find their 
common partition
@@ -346,9 +350,9 @@ mod tests {
     use arrow::datatypes::{DataType as ArrowDataType, Field, Schema};
     use arrow_schema::Fields;
     use datafusion_common::{DFSchema, Result};
-    use datafusion_expr::{col, lit, unnest, EmptyRelation, LogicalPlan};
+    use datafusion_expr::{col, count, lit, unnest, EmptyRelation, LogicalPlan};
 
-    use crate::utils::recursive_transform_unnest;
+    use crate::utils::{recursive_transform_unnest, resolve_positions_to_exprs};
 
     #[test]
     fn test_recursive_transform_unnest() -> Result<()> {
@@ -437,4 +441,33 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_resolve_positions_to_exprs() -> Result<()> {
+        let select_exprs = vec![col("c1"), col("c2"), count(lit(1))];
+
+        // Assert 1 resolved as first column in select list
+        let resolved = resolve_positions_to_exprs(lit(1i64), &select_exprs)?;
+        assert_eq!(resolved, col("c1"));
+
+        // Assert error if index out of select clause bounds
+        let resolved = resolve_positions_to_exprs(lit(-1i64), &select_exprs);
+        assert!(resolved.is_err_and(|e| e.message().contains(
+            "Cannot find column with position -1 in SELECT clause. Valid 
columns: 1 to 3"
+        )));
+
+        let resolved = resolve_positions_to_exprs(lit(5i64), &select_exprs);
+        assert!(resolved.is_err_and(|e| e.message().contains(
+            "Cannot find column with position 5 in SELECT clause. Valid 
columns: 1 to 3"
+        )));
+
+        // Assert expression returned as-is
+        let resolved = resolve_positions_to_exprs(lit("text"), &select_exprs)?;
+        assert_eq!(resolved, lit("text"));
+
+        let resolved = resolve_positions_to_exprs(col("fake"), &select_exprs)?;
+        assert_eq!(resolved, col("fake"));
+
+        Ok(())
+    }
 }
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 0d84d9b2ab..e32a01dbc3 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1484,16 +1484,16 @@ fn 
select_simple_aggregate_with_groupby_position_out_of_range() {
     let sql = "SELECT state, MIN(age) FROM person GROUP BY 0";
     let err = logical_plan(sql).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projection references non-aggregate values: 
Expression person.state could not be resolved from available columns: Int64(0), 
MIN(person.age)",
-            err.strip_backtrace()
-        );
+        "Error during planning: Cannot find column with position 0 in SELECT 
clause. Valid columns: 1 to 2",
+        err.strip_backtrace()
+    );
 
     let sql2 = "SELECT state, MIN(age) FROM person GROUP BY 5";
     let err2 = logical_plan(sql2).expect_err("query should have failed");
     assert_eq!(
-        "Error during planning: Projection references non-aggregate values: 
Expression person.state could not be resolved from available columns: Int64(5), 
MIN(person.age)",
-            err2.strip_backtrace()
-        );
+        "Error during planning: Cannot find column with position 5 in SELECT 
clause. Valid columns: 1 to 2",
+        err2.strip_backtrace()
+    );
 }
 
 #[test]
diff --git a/datafusion/sqllogictest/test_files/group_by.slt 
b/datafusion/sqllogictest/test_files/group_by.slt
index 43bbf6bed6..7f4106a27c 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -5059,3 +5059,8 @@ query II
 SELECT a + 1 AS d, a + 1 + b AS c FROM (SELECT 1 AS a, 2 AS b) GROUP BY a + 1, 
a + 1 + b;
 ----
 2 4
+
+statement error DataFusion error: Error during planning: Cannot find column 
with position 4 in SELECT clause. Valid columns: 1 to 3
+SELECT a, b, COUNT(1)
+FROM multiple_ordered_table
+GROUP BY 1, 2, 4, 5, 6;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to